Skip to content

Commit

Permalink
feature: change average_spectra to take an iterator over `SpectrumL…
Browse files Browse the repository at this point in the history
…ike` (#6)

* Allowed average from an iterator and added more documentation

---------

Co-authored-by: Joshua Klein <mobiusklein@gmail.com>
  • Loading branch information
douweschulte and mobiusklein authored Sep 6, 2024
1 parent 980bbab commit 0e0582e
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 66 deletions.
62 changes: 35 additions & 27 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,16 @@
name = "mzdata"
version = "0.29.0"
edition = "2018"
keywords = [
'mass-spectrometry',
'mzml',
'mgf'
]
keywords = ['mass-spectrometry', 'mzml', 'mgf']

categories = [
"science",
"parser-implementations",
"data-structures"
]
categories = ["science", "parser-implementations", "data-structures"]

description = "A library to read mass spectrometry data formats"
license = "Apache-2.0"
repository = "https://github.com/mobiusklein/mzdata"
documentation = "https://docs.rs/mzdata"

exclude = [
"tmp/*",
"test/data/*"
]
exclude = ["tmp/*", "test/data/*"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[[bin]]
Expand Down Expand Up @@ -54,8 +43,8 @@ lto = true
debug = true

[features]
# default = ["nalgebra", "parallelism", "mzsignal", "mzmlb", "zlib-ng-compat", ]
default = ["zlib-ng-compat", ]
# default = ["nalgebra", "parallelism", "mzsignal", "zlib-ng-compat"]
default = ["zlib-ng-compat"]

openblas = ["mzsignal", "mzsignal/openblas"]
netlib = ["mzsignal", "mzsignal/netlib"]
Expand All @@ -80,7 +69,11 @@ mzmlb = ["hdf5", "ndarray", "hdf5-sys"]
# but not flate2/zlib
hdf5_static = ["mzmlb", "hdf5-sys/static", "hdf5-sys/zlib", "libz-sys"]

thermo = ["thermorawfilereader", "thermorawfilereader/net8_0", "thermorawfilereader/nethost-download"]
thermo = [
"thermorawfilereader",
"thermorawfilereader/net8_0",
"thermorawfilereader/nethost-download",
]

doc-only = ["thermorawfilereader/doc-only"]

Expand All @@ -91,25 +84,32 @@ regex = "1"
lazy_static = "1.4.0"
serde = { version = "1.0.204", features = ["derive"] }
serde_json = "1.0.120"
quick-xml = { version = "0.30", features = [ "serialize" ] }
flate2 = {version = "1.0.20"}
quick-xml = { version = "0.30", features = ["serialize"] }
flate2 = { version = "1.0.20" }
num-traits = "0.2"
indexmap = { version = "2.0.0", features = [ "serde" ] }
indexmap = { version = "2.0.0", features = ["serde"] }
log = "0.4.20"
mzpeaks = { version = ">=0.20.0,<1.0.0" }
rayon = { version = ">=1.8.0,<2.0", optional = true }
mzsignal = { version = ">=0.22.0,<1.0.0", default-features = false, optional = true}
md5 = "0.7.0"
tokio = {version = "1.32.0", optional = true, features = ["macros", "rt", "fs", "rt-multi-thread"]}

hdf5 = {version = "0.8.1", optional = true, features = ["blosc", "lzf",]}
tokio = { version = "1.32.0", optional = true, features = [
"macros",
"rt",
"fs",
"rt-multi-thread",
] }

hdf5 = { version = "0.8.1", optional = true, features = ["blosc", "lzf"] }
hdf5-sys = { version = "0.8.1", optional = true }
libz-sys = { version = "1.1", default-features = false, features = ["static", ], optional = true }
libz-sys = { version = "1.1", default-features = false, features = [
"static",
], optional = true }
ndarray = { version = "0.15.6", optional = true }
filename = { version = "0.1.1", optional = true }

numpress = { version = "1.1.0", optional = true }
bytemuck = { version = "1.15.0", features = ["extern_crate_alloc"]}
bytemuck = { version = "1.15.0", features = ["extern_crate_alloc"] }
base64-simd = "0.8.0"
thiserror = "1.0.50"

Expand All @@ -120,7 +120,7 @@ chrono = "0.4.37"
bitflags = "2.5.0"

[dev-dependencies]
criterion = { version = "0.5.1", features = [ "html_reports" ] }
criterion = { version = "0.5.1", features = ["html_reports"] }
test-log = "0.2.12 "
env_logger = "0.10.0"
tempfile = "3.10"
Expand All @@ -132,5 +132,13 @@ harness = false


[package.metadata.docs.rs]
features = ["parallelism", "mzsignal", "nalgebra", "mzmlb", "async", "thermorawfilereader", "doc-only"]
features = [
"parallelism",
"mzsignal",
"nalgebra",
"mzmlb",
"async",
"thermorawfilereader",
"doc-only",
]
no-default-features = true
107 changes: 68 additions & 39 deletions src/spectrum/group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,8 @@ mod mzsignal_impl {

use crate::spectrum::bindata::{to_bytes, BuildArrayMapFrom, BuildFromArrayMap};
use crate::spectrum::{
ArrayType, BinaryArrayMap, BinaryDataArrayType, DataArray, SignalContinuity,
ArrayType, BinaryArrayMap, BinaryDataArrayType, DataArray, RefPeakDataLevel,
SignalContinuity,
};

use super::*;
Expand All @@ -669,7 +670,7 @@ mod mzsignal_impl {
use mzpeaks::{MZLocated, PeakCollection};
use mzsignal::average::{average_signal, SignalAverager};
use mzsignal::reprofile::{reprofile, PeakSetReprofiler, PeakShape, PeakShapeModel};
use mzsignal::{ArrayPair, FittedPeak};
use mzsignal::{ArrayPair, FittedPeak, MZGrid};

impl From<ArrayPair<'_>> for BinaryArrayMap {
fn from(value: ArrayPair<'_>) -> Self {
Expand All @@ -692,47 +693,75 @@ mod mzsignal_impl {
}
}

/// Average a series of [`MultiLayerSpectrum`] together
pub fn average_spectra<C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default>(
spectra: &[MultiLayerSpectrum<C, D>],
/// Average a series of [`SpectrumLike`] together. The supplied dx will be used to [`reprofile`]
/// centroid spectra. The resulting [`ArrayPair`] can be made into a [`BinaryArrayMap`] using
/// `.into()`. Which in turn can be used to create a new [`MultiLayerSpectrum`] or
/// [`RawSpectrum`](crate::spectrum::RawSpectrum). Internally it uses [`SpectrumLike::peaks`]
/// to retrieve the spectrum data. Any deconvoluted spectra will be skipped.
///
/// Note: only available with feature `mzsignal`.
pub fn average_spectra<
'lifetime,
S: SpectrumLike<C, D> + 'lifetime,
C: CentroidLike,
D: DeconvolutedCentroidLike,
>(
spectra: impl IntoIterator<Item = &'lifetime S>,
dx: f64,
) -> ArrayPair<'_> {
let arrays: Vec<_> = spectra
.iter()
.map(|scan| {
if scan.signal_continuity() == SignalContinuity::Profile {
if let Some(array_map) = scan.raw_arrays() {
let mz = array_map.mzs().unwrap().to_vec();
let inten = array_map.intensities().unwrap().to_vec();
Some(ArrayPair::from((mz, inten)))
} else {
warn!("{} did not have raw data arrays", scan.id());
None
}
} else {
if let Some(peaks) = scan.peaks.as_ref() {
let fpeaks: Vec<_> = peaks
) -> ArrayPair<'static> {
let mut to_be_reprofiled = Vec::new();
let mut profiles = Vec::new();

for spectrum in spectra {
match (spectrum.signal_continuity(), spectrum.peaks()) {
(_, RefPeakDataLevel::Missing | RefPeakDataLevel::Deconvoluted(_)) => (),
(SignalContinuity::Centroid, RefPeakDataLevel::RawData(_))
| (_, RefPeakDataLevel::Centroid(_)) => {
to_be_reprofiled.push(
spectrum
.peaks()
.iter()
.map(|p| FittedPeak::from(p.as_centroid()))
.collect();
let signal = reprofile(fpeaks.iter(), dx);
Some(ArrayPair::from((
signal.mz_array.to_vec(),
signal.intensity_array.to_vec(),
)))
} else {
warn!(
"{} was not in profile mode but no centroids found",
scan.id()
);
None
}
.collect::<Vec<_>>(),
);
}
})
.flatten()
.collect();
let arrays = average_signal(&arrays, dx);
arrays
(_, RefPeakDataLevel::RawData(array_map)) => {
let mzs = array_map.mzs().unwrap();
let intensities = array_map.intensities().unwrap();
profiles.push(ArrayPair::from((mzs, intensities)));
}
}
}

if !to_be_reprofiled.is_empty() {
let mz_start = to_be_reprofiled
.iter()
.map(|p| p.first().map_or(f64::MAX, |p| p.mz))
.min_by(f64::total_cmp)
.unwrap_or_default();
let mz_end = to_be_reprofiled
.iter()
.map(|p| p.last().map_or(f64::MIN, |p| p.mz))
.max_by(f64::total_cmp)
.unwrap_or_default();

let reprofiler = PeakSetReprofiler::new(mz_start, mz_end, dx);

for peaks in to_be_reprofiled {
let models: Vec<PeakShapeModel<'_>> = peaks.iter().map(|p| p.into()).collect();
if models.is_empty() {
profiles.push(ArrayPair::from((Vec::new(), Vec::new())));
} else {
let arrays = reprofiler.reprofile_from_models(&models);
profiles.push(ArrayPair::from((
reprofiler.copy_mz_array(),
arrays.intensity_array.into_owned(),
)))
}
}
}

average_signal(&profiles, dx)
}

#[derive(Debug, Clone)]
Expand Down

0 comments on commit 0e0582e

Please sign in to comment.