Skip to content

Commit

Permalink
fix: fix parsing precursor charge in MGF files (#13)
Browse files Browse the repository at this point in the history
* fix parsing precursor charge in MGF files

  This fixes two issues:
  - the charge value in the PEPMASS header can have the same suffixed
    format as the CHARGE header
  - the order of the headers isn't determined, so the charge value should
    still be saved correctly if the CHARGE header is before the PEPMASS
    header
  • Loading branch information
paretje authored Oct 31, 2024
1 parent 13ffd69 commit 4709c37
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 48 deletions.
92 changes: 45 additions & 47 deletions src/io/mgf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ struct SpectrumBuilder<
pub intensity_array: Vec<f32>,
pub charge_array: Vec<i32>,
pub has_charge: u32,
pub precursor_charge: Option<i32>,
pub detail_level: DetailLevel,
empty_metadata: bool,
centroided_type: PhantomData<C>,
Expand All @@ -114,6 +115,7 @@ impl<C: CentroidPeakAdapting, D: DeconvolutedPeakAdapting> Default for SpectrumB
intensity_array: Default::default(),
charge_array: Default::default(),
has_charge: Default::default(),
precursor_charge: Default::default(),
detail_level: Default::default(),
centroided_type: Default::default(),
deconvoluted_type: Default::default(),
Expand Down Expand Up @@ -341,17 +343,8 @@ impl<R: io::Read, C: CentroidPeakAdapting, D: DeconvolutedPeakAdapting> MGFReade
.map_err(|e| warn!("Failed to parse PEPMASS intensity {value}: {e}"))
.unwrap_or_default();
let charge: Option<i32> = match parts.next() {
Some(c) => match c.parse::<i32>() {
Ok(val) => Some(val),
Err(e) => {
self.state = MGFParserState::Error;
self.error = Some(MGFError::MalformedHeaderLine(format!(
"Malformed charge value in PEPMASS header {value}: {e}"
)));
return false;
}
},
None => None,
Some(c) => self.parse_charge(c),
None => builder.precursor_charge,
};
builder.description.precursor = Some(Precursor {
ions: vec![SelectedIon {
Expand All @@ -364,40 +357,16 @@ impl<R: io::Read, C: CentroidPeakAdapting, D: DeconvolutedPeakAdapting> MGFReade
});
}
"CHARGE" => {
let (sign, value, tail_sign) = if let Some(stripped) = value.strip_suffix('+') {
(1, stripped, true)
} else if let Some(stripped) = value.strip_suffix('-') {
(-1, stripped, true)
} else {
(1, value, false)
};

if tail_sign && (value.starts_with('-') || value.starts_with('+')) {
self.state = MGFParserState::Error;
self.error = Some(MGFError::MalformedHeaderLine(format!(
"Could not parse CHARGE header {value}"
)));
return false;
}

match value.parse::<i32>() {
Ok(z) => {
if let Some(ion) = builder
.description
.precursor
.get_or_insert_with(Precursor::default)
.iter_mut()
.last()
{
ion.charge = Some(sign * z);
}
}
Err(e) => {
self.state = MGFParserState::Error;
self.error = Some(MGFError::MalformedHeaderLine(format!(
"Could not parse CHARGE header {value} : {e}"
)));
return false;
builder.precursor_charge = self.parse_charge(value);
if let Some(ion) = builder
.description
.precursor
.get_or_insert_with(Precursor::default)
.iter_mut()
.last()
{
if ion.charge.is_none() {
ion.charge = builder.precursor_charge
}
}
}
Expand All @@ -418,6 +387,35 @@ impl<R: io::Read, C: CentroidPeakAdapting, D: DeconvolutedPeakAdapting> MGFReade
}
}

fn parse_charge(&mut self, value: &str) -> Option<i32> {
let (sign, value, tail_sign) = if let Some(stripped) = value.strip_suffix('+') {
(1, stripped, true)
} else if let Some(stripped) = value.strip_suffix('-') {
(-1, stripped, true)
} else {
(1, value, false)
};

if tail_sign && (value.starts_with('-') || value.starts_with('+')) {
self.state = MGFParserState::Error;
self.error = Some(MGFError::MalformedHeaderLine(format!(
"Could not parse charge value {value}"
)));
return None;
}

match value.parse::<i32>() {
Ok(z) => Some(sign * z),
Err(e) => {
self.state = MGFParserState::Error;
self.error = Some(MGFError::MalformedHeaderLine(format!(
"Could not parse charge value {value} : {e}"
)));
return None;
}
}
}

fn handle_peak(&mut self, line: &str, builder: &mut SpectrumBuilder<C, D>) -> bool {
let peak_line = self.parse_peak_from_line(line, builder).unwrap_or(false);
if peak_line {
Expand Down Expand Up @@ -1197,7 +1195,7 @@ mod test {
}
}
assert_eq!(ms1_count, 0);
assert_eq!(msn_count, 34);
assert_eq!(msn_count, 35);
}

#[test]
Expand All @@ -1224,7 +1222,7 @@ mod test {
})
}
assert_eq!(ms1_count, 0);
assert_eq!(msn_count, 34);
assert_eq!(msn_count, 35);
}

#[test]
Expand Down
2 changes: 1 addition & 1 deletion src/io/shorthand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ mod test {
assert_eq!(val.len(), 48);
let val: Vec<_> =
mz_read!("./test/data/small.mgf".as_ref(), reader => { reader.collect() })?;
assert_eq!(val.len(), 34);
assert_eq!(val.len(), 35);
let val = mz_read!("./test/data/small.mzML".as_ref(), reader => { reader.file_description().clone() })?;
assert_eq!(val.source_files.len(), 1);
Ok(())
Expand Down
13 changes: 13 additions & 0 deletions test/data/small.mgf
Original file line number Diff line number Diff line change
Expand Up @@ -25514,3 +25514,16 @@ PEPMASS=882.535034179688 29807.576171875
1743.361206 3.1923613548
END IONS

BEGIN IONS
TITLE=charge before pepmass
CHARGE=2+
PEPMASS=475.137295
ION_MOBILITY=42.42
RTINSECONDS=51.2
72.04439 100
148.06043 600
232.07504 300
263.08737 400
347.10198 500
423.11802 200
END IONS

0 comments on commit 4709c37

Please sign in to comment.