Skip to content

Commit 3e6498d

Browse files
committed
In bigwigaverageoverbed, when a chromosome is not in the bigwig, return 0
1 parent 26ba392 commit 3e6498d

3 files changed

Lines changed: 76 additions & 41 deletions

File tree

bigtools/src/utils/cli/bigwigaverageoverbed.rs

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ use crossbeam_channel::TryRecvError;
88

99
use crate::bed::bedparser::{parse_bed, BedFileStream, StreamingBedValues};
1010
use crate::utils::file_view::FileView;
11-
use crate::utils::misc::{stats_for_bed_item, Name};
11+
use crate::utils::misc::{name_for_bed_item, stats_for_bed_item, BigWigAverageOverBedEntry, Name};
1212
use crate::utils::reopen::{Reopen, ReopenableFile};
1313
use crate::utils::split_file_into_chunks_by_size;
1414
use crate::utils::streaming_linereader::StreamingLineReader;
15-
use crate::{BBIFileRead, BigWigRead};
15+
use crate::{BBIFileRead, BBIReadError, BigWigRead};
1616

1717
#[derive(Clone, Debug, PartialEq, Parser)]
1818
#[command(
@@ -129,8 +129,24 @@ pub fn bigwigaverageoverbed(
129129
Some(Ok(entry)) => entry,
130130
};
131131

132-
let entry = match stats_for_bed_item(name, chrom, entry, inbigwig) {
132+
let name = match name_for_bed_item(name, chrom, &entry) {
133+
Ok(name) => name,
134+
Err(e) => {
135+
return Err(e.into());
136+
}
137+
};
138+
139+
let entry = match stats_for_bed_item(chrom, entry, inbigwig) {
133140
Ok(stats) => stats,
141+
Err(BBIReadError::InvalidChromosome(..)) => BigWigAverageOverBedEntry {
142+
bases: 0,
143+
max: 0.0,
144+
min: 0.0,
145+
mean: 0.0,
146+
mean0: 0.0,
147+
size: 0,
148+
sum: 0.0,
149+
},
134150
Err(e) => {
135151
return Err(e.into());
136152
}
@@ -152,7 +168,7 @@ pub fn bigwigaverageoverbed(
152168
entry.size, entry.bases, entry.sum, entry.mean0, entry.mean
153169
),
154170
};
155-
writeln!(&mut tmp, "{}\t{}", entry.name, stats)?
171+
writeln!(&mut tmp, "{}\t{}", name, stats)?
156172
}
157173

158174
Ok(tmp)
@@ -259,7 +275,14 @@ pub fn bigwigaverageoverbed(
259275
)
260276
})??;
261277

262-
let entry = match stats_for_bed_item(name, chrom, entry, &mut inbigwig) {
278+
let name = match name_for_bed_item(name, chrom, &entry) {
279+
Ok(name) => name,
280+
Err(e) => {
281+
return Err(e.into());
282+
}
283+
};
284+
285+
let entry = match stats_for_bed_item(chrom, entry, &mut inbigwig) {
263286
Ok(stats) => stats,
264287
Err(e) => return Err(e.into()),
265288
};
@@ -280,7 +303,7 @@ pub fn bigwigaverageoverbed(
280303
entry.size, entry.bases, entry.sum, entry.mean0, entry.mean
281304
),
282305
};
283-
writeln!(&mut bedoutwriter, "{}\t{}", entry.name, stats)?
306+
writeln!(&mut bedoutwriter, "{}\t{}", name, stats)?
284307
}
285308
}
286309

bigtools/src/utils/misc.rs

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ pub enum Name {
1616
}
1717

1818
pub struct BigWigAverageOverBedEntry {
19-
pub name: String,
2019
pub size: u32,
2120
pub bases: u32,
2221
pub sum: f64,
@@ -27,22 +26,44 @@ pub struct BigWigAverageOverBedEntry {
2726
}
2827

2928
#[derive(Error, Debug)]
30-
pub enum StatsError {
31-
#[error("{}", .0)]
32-
BBIReadError(#[from] BBIReadError),
33-
#[error("{}", .0)]
34-
InvalidNameCol(String),
29+
#[error("{}", self.0)]
30+
pub struct InvalidNameColError(String);
31+
32+
pub fn name_for_bed_item(
33+
name: Name,
34+
chrom: &str,
35+
entry: &BedEntry,
36+
) -> Result<String, InvalidNameColError> {
37+
let start = entry.start;
38+
let end = entry.end;
39+
40+
Ok(match name {
41+
Name::Column(col) => match col {
42+
0 => chrom.to_string(),
43+
1 => start.to_string(),
44+
2 => end.to_string(),
45+
_ => {
46+
let mut cols = entry.rest.split('\t');
47+
let v = cols.nth(col - 3);
48+
match v {
49+
Some(v) => v.to_string(),
50+
None => return Err(InvalidNameColError(format!("Invalid name column option. Number of columns ({}) is less than the value specified ({}).", entry.rest.split('\t').collect::<Vec<_>>().len() + 3, col+1))),
51+
}
52+
}
53+
},
54+
Name::Interval => format!("{}:{}-{}", chrom, start, end),
55+
Name::None => format!("{}\t{}\t{}\t{}", chrom, start, end, entry.rest),
56+
})
3557
}
3658

3759
/// Returns a `BigWigAverageOverBedEntry` for a bigWig over a given interval.
3860
/// If there are no values for the given region, then `f64::NAN` is given for
3961
/// `mean`, `min`, and `max`, and `0` is given for `mean0`.
4062
pub fn stats_for_bed_item<R: BBIFileRead>(
41-
name: Name,
4263
chrom: &str,
4364
entry: BedEntry,
4465
bigwig: &mut BigWigRead<R>,
45-
) -> Result<BigWigAverageOverBedEntry, StatsError> {
66+
) -> Result<BigWigAverageOverBedEntry, BBIReadError> {
4667
let start = entry.start;
4768
let end = entry.end;
4869

@@ -73,26 +94,7 @@ pub fn stats_for_bed_item<R: BBIFileRead>(
7394
(sum / f64::from(bases), min, max)
7495
};
7596

76-
let name = match name {
77-
Name::Column(col) => match col {
78-
0 => chrom.to_string(),
79-
1 => start.to_string(),
80-
2 => end.to_string(),
81-
_ => {
82-
let mut cols = entry.rest.split('\t');
83-
let v = cols.nth(col - 3);
84-
match v {
85-
Some(v) => v.to_string(),
86-
None => return Err(StatsError::InvalidNameCol(format!("Invalid name column option. Number of columns ({}) is less than the value specified ({}).", entry.rest.split('\t').collect::<Vec<_>>().len() + 3, col+1))),
87-
}
88-
}
89-
},
90-
Name::Interval => format!("{}:{}-{}", chrom, start, end),
91-
Name::None => format!("{}\t{}\t{}\t{}", chrom, start, end, entry.rest),
92-
};
93-
9497
Ok(BigWigAverageOverBedEntry {
95-
name,
9698
size,
9799
bases,
98100
sum,
@@ -106,21 +108,23 @@ pub fn stats_for_bed_item<R: BBIFileRead>(
106108
#[derive(Error, Debug)]
107109
pub enum BigWigAverageOverBedError {
108110
#[error("{}", .0)]
109-
StatsError(#[from] StatsError),
111+
BBIReadError(#[from] BBIReadError),
110112
#[error("{}", .0)]
111113
BedValueError(#[from] BedValueError),
114+
#[error("{}", .0)]
115+
InvalidNameColError(#[from] InvalidNameColError),
112116
}
113117

114118
pub fn bigwig_average_over_bed<R: BBIFileRead>(
115119
bed: impl BufRead,
116120
mut bigwig: BigWigRead<R>,
117121
name: Name,
118-
) -> impl Iterator<Item = Result<BigWigAverageOverBedEntry, BigWigAverageOverBedError>> {
122+
) -> impl Iterator<Item = Result<(String, BigWigAverageOverBedEntry), BigWigAverageOverBedError>> {
119123
let mut bedstream = StreamingLineReader::new(bed);
120124

121125
let mut error: bool = false;
122126
let iter = std::iter::from_fn(
123-
move || -> Option<Result<BigWigAverageOverBedEntry, BigWigAverageOverBedError>> {
127+
move || -> Option<Result<(String, BigWigAverageOverBedEntry), BigWigAverageOverBedError>> {
124128
if error {
125129
return None;
126130
}
@@ -140,12 +144,19 @@ pub fn bigwig_average_over_bed<R: BBIFileRead>(
140144
Some(Ok(v)) => v,
141145
};
142146

143-
match stats_for_bed_item(name, chrom, entry, &mut bigwig) {
147+
let name = match name_for_bed_item(name, chrom, &entry) {
148+
Ok(name) => name,
149+
Err(e) => {
150+
return Some(Err(e.into()));
151+
}
152+
};
153+
154+
match stats_for_bed_item(chrom, entry, &mut bigwig) {
144155
Err(e) => {
145156
error = true;
146157
Some(Err(e.into()))
147158
}
148-
Ok(v) => Some(Ok(v)),
159+
Ok(v) => Some(Ok((name, v))),
149160
}
150161
},
151162
);

pybigtools/src/lib.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2256,7 +2256,8 @@ impl BigWigAverageOverBedStatistics {
22562256
#[pyclass(module = "pybigtools")]
22572257
struct BigWigAverageOverBedEntriesIterator {
22582258
iter: Box<
2259-
dyn Iterator<Item = Result<BigWigAverageOverBedEntry, BigWigAverageOverBedError>> + Send,
2259+
dyn Iterator<Item = Result<(String, BigWigAverageOverBedEntry), BigWigAverageOverBedError>>
2260+
+ Send,
22602261
>,
22612262
usename: bool,
22622263
stats: Option<Vec<BigWigAverageOverBedStatistics>>,
@@ -2276,7 +2277,7 @@ impl BigWigAverageOverBedEntriesIterator {
22762277
.transpose()
22772278
.map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?;
22782279

2279-
let Some(v) = v else {
2280+
let Some((name, v)) = v else {
22802281
return Ok(None);
22812282
};
22822283
let stats = match &slf.stats {
@@ -2319,7 +2320,7 @@ impl BigWigAverageOverBedEntriesIterator {
23192320
};
23202321

23212322
match slf.usename {
2322-
true => Ok(Some((v.name, stats).to_object(slf.py()))),
2323+
true => Ok(Some((name, stats).to_object(slf.py()))),
23232324
false => Ok(Some(stats)),
23242325
}
23252326
}

0 commit comments

Comments
 (0)