From 56d67d4cddd1c1a1141026cd951649050c034504 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Sun, 24 Mar 2024 22:28:25 -0400 Subject: [PATCH] Documentation --- CHANGELOG.md | 25 +- examples/from_stdin.rs | 2 +- examples/msn_target_mapping.rs | 6 +- examples/mzconvert.rs | 141 +++------ examples/readme.rs | 2 +- src/io.rs | 6 +- src/io/infer_format.rs | 541 ++++++++++++++++++++++++++++++-- src/io/mgf.rs | 10 +- src/io/mzml.rs | 2 +- src/io/mzml/async.rs | 4 +- src/io/mzml/reader.rs | 4 +- src/io/mzml/writer.rs | 12 +- src/io/mzmlb.rs | 2 +- src/io/mzmlb/reader.rs | 4 +- src/io/mzmlb/writer.rs | 6 +- src/io/thermo.rs | 6 +- src/io/thermo/reader.rs | 27 +- src/io/traits.rs | 90 ++++-- src/io/utils.rs | 22 +- src/lib.rs | 11 +- src/main.rs | 87 +---- src/meta/file_description.rs | 16 + src/meta/traits.rs | 2 +- src/params.rs | 56 +++- src/prelude.rs | 7 +- src/spectrum/scan_properties.rs | 10 +- src/spectrum/utils.rs | 6 +- 27 files changed, 798 insertions(+), 309 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index caad399..ba5a0df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,11 +10,24 @@ and this project adheres to [Semantic Versioning]. ### Added - `MGFReaderType` and `MGFWriterType` implement `MSDataFileMetadata` - `ThermoRawFileReader` has been added to read Thermo RAW files when the .NET 8 runtime is available, using [`thermorawfilereader`](https://crates.io/crates/thermorawfilereader/0.2.1) +- `Source` and `Sink` algebraic types to represent things that spectra can be read from or written to. +- `mz_read` and `mz_write` are macros to open files for reading and writing in unboxed context, but which + only live within a scoped closure. +- `MassSpectrometryReadWriteProcess` trait for orchestrating reading from a `Source`, writing to a `Sink`, and transforming the + data through an arbitrary function specified as part of the trait implementation. Like `mz_read`/`mz_write`, the scope enclosed + by the trait method. ### Changed - `MGFWriterType` now generates a spectrum title when one is absent, rather than defaulting to the spectrum's native ID. - `CURIE` can now be compared to `Param` +- Renamed `ScanWriter` to `SpectrumWriter` and `ScanSource` to `SpectrumSource` for consistency with other trait naming conventions. +- `MZFileReader::open_file` now returns an `io::Result` in keeping with the idea that reading a `File` might fail as well, + even if it is already open, because it is the wrong type of file. This also allows file formats that cannot be read from + arbitrary `io::Read` objects to signal failure without crashing the whole system. +- `Collator`, `std::sync::mpsc::{Sender, SyncSender}` now implement `SpectrumWriter` when properly parameterized. +- `PeakDataLevel` has been refactored into two types, `PeakDataLevel` is an owning type and `RefPeakDataLevel` + is a borrowing type. ### Deprecated @@ -28,7 +41,7 @@ and this project adheres to [Semantic Versioning]. ## [0.12.0] - 2024-01-29 ### Changed -- Require a newer version of `mzsignal`, fixing the rather embarassing error of swapping FWHM +- Require a newer version of `mzsignal`, fixing the rather embarrassing error of swapping FWHM and SNR during peak picking. - Thicken the use of internal abstraction around `PrecursorSelection` for the future of allowing more than one `SelectedIon` per `Precursor`. @@ -58,7 +71,7 @@ and this project adheres to [Semantic Versioning]. ## [0.8.0] - 2024-01-10 ### Added -- Added `close` to the `ScanWriter` trait which "closes" the formatted structure of the file. As Rust lacks a notion of a "closed" +- Added `close` to the `SpectrumWriter` trait which "closes" the formatted structure of the file. As Rust lacks a notion of a "closed" `io::Write`, the underlying writer isn't actually "closed" until the whole struct is dropped. - Added `Drop` implementation for `MzMLWriterType` and `MzMLbWriterType` which ensures that the `close` method is called to make the resulting file well-formed. @@ -72,7 +85,7 @@ and this project adheres to [Semantic Versioning]. - `SpectrumGroupingIterator` and other such iterator support `RandomAccessSpectrumGroupingIterator`. ### Changed -- `ScanWriter` no longer applies a lifespan requirement on individual writing operations. +- `SpectrumWriter` no longer applies a lifespan requirement on individual writing operations. - `filename` is no longer a required dependency, it is only needed to use `MzMLbReaderType::from_file` which otherwise panics. It introduces unpredictable and difficult to diagnose compilation errors. - `MGFWriterType` skips MS1 spectra automatically. @@ -103,7 +116,7 @@ and this project adheres to [Semantic Versioning]. - Make some window size attributes smaller as they do not require double precision. - Clean up the internal implementation of the various internal `SpectrumBuilder` types. - Factor up `mzdata::spectrum::signal` to be less monolithic and a complete redesign of the traits used to convert `mzpeaks` to and from binary arrays. -- Massive refactoring of `mzdata::io::traits` to make more traits depend upon `ScanSource` instead of `SpectrumIterator` and to make things slightly less verbose. +- Massive refactoring of `mzdata::io::traits` to make more traits depend upon `SpectrumSource` instead of `SpectrumIterator` and to make things slightly less verbose. - Switched the default `mzsignal` backend to `nalgebra` instead of `intel-mkl` for simplicity. ## [0.5.0] - 2021-09-22 @@ -112,13 +125,13 @@ and this project adheres to [Semantic Versioning]. - MzML writing via `mzdata::io::mzml::MzMLWriter` - Added feature flags to allow the user to choose amongst more `flate2` backends (zlib _default_, zlib-ng-compat, miniz_oxide) -- Grouped iteration mode for connecting precursor and product spectra over an iterator stream using the `groups` method of `ScanSource`. +- Grouped iteration mode for connecting precursor and product spectra over an iterator stream using the `groups` method of `SpectrumSource`. ### Changed - Re-structuring and renaming of the various iterator mechanisms for more consistency. `ScanIterator` -> `SpectrumIterator`, et cetera. Minor refactoring - of this sort expected to come for `ScanSource` as responsibilities are worked out. + of this sort expected to come for `SpectrumSource` as responsibilities are worked out. ### Deprecated diff --git a/examples/from_stdin.rs b/examples/from_stdin.rs index 3219808..c409301 100644 --- a/examples/from_stdin.rs +++ b/examples/from_stdin.rs @@ -5,7 +5,7 @@ use std::io::{self, Seek}; use std::time::Instant; use mzdata::io::{ - infer_from_stream, MassSpectrometryFormat, PreBufferedStream, RestartableGzDecoder, ScanSource, + infer_from_stream, MassSpectrometryFormat, PreBufferedStream, RestartableGzDecoder, SpectrumSource, }; use mzdata::{MGFReader, MzMLReader}; diff --git a/examples/msn_target_mapping.rs b/examples/msn_target_mapping.rs index b41fedd..6e05cb3 100644 --- a/examples/msn_target_mapping.rs +++ b/examples/msn_target_mapping.rs @@ -79,7 +79,7 @@ impl SelectedTarget { } -pub struct MSnTargetTrackingIterator { +pub struct MSnTargetTrackingIterator { source: SpectrumGroupingIterator, time_width: f64, error_tolerance: Tolerance, @@ -88,7 +88,7 @@ pub struct MSnTargetTrackingIterator { targets: VecDeque, } -impl MSnTargetTrackingIterator { +impl MSnTargetTrackingIterator { pub fn new( source: SpectrumGroupingIterator, time_width: f64, @@ -232,7 +232,7 @@ impl MSnTargetTrackingIterator { } } -impl Iterator for MSnTargetTrackingIterator { +impl Iterator for MSnTargetTrackingIterator { type Item = (SpectrumGroup, Vec); fn next(&mut self) -> Option { diff --git a/examples/mzconvert.rs b/examples/mzconvert.rs index ef38b4f..e1e5235 100644 --- a/examples/mzconvert.rs +++ b/examples/mzconvert.rs @@ -1,28 +1,21 @@ use std::env; -use std::fs; use std::io; +use std::path::PathBuf; use std::process::exit; use std::thread; use std::time; - use std::sync::mpsc::sync_channel; -#[cfg(feature = "mzmlb")] -use mzdata::io::mzmlb; - -#[cfg(feature = "thermorawfilereader")] -use mzdata::io::ThermoRawReader; - -use mzdata::io::MassSpectrometryReadWriteProcess; use mzdata::io::{ - infer_format, infer_from_path, infer_from_stream, MassSpectrometryFormat, PreBufferedStream, + Sink, Source, MassSpectrometryReadWriteProcess, + checksum_file }; +use mzdata::meta::SourceFile; +use mzdata::params::ControlledVocabulary; use mzdata::prelude::*; -use mzdata::{MGFReader, MGFWriter, MzMLReader, MzMLWriter}; use env_logger; -use mzpeaks::CentroidPeak; -use mzpeaks::DeconvolutedPeak; +use mzpeaks::{CentroidPeak, DeconvolutedPeak}; #[derive(Debug, Clone)] pub struct MZConvert { @@ -36,87 +29,14 @@ impl MZConvert { } pub fn main(&self) -> io::Result<()> { - self.reader_then() - } - - fn reader_then(&self) -> io::Result<()> { - if self.inpath == "-" { - let mut stream = PreBufferedStream::new(io::stdin())?; - let (ms_format, _compressed) = infer_from_stream(&mut stream)?; - match ms_format { - MassSpectrometryFormat::MGF => self.writer_then(MGFReader::new(stream))?, - MassSpectrometryFormat::MzML => { - self.writer_then(MzMLReader::new(io::BufReader::new(stream)))? - } - _ => { - eprintln!("Could not infer input format from STDIN"); - exit(1) - } - } - } else { - let (ms_format, _compressed) = infer_format(&self.inpath)?; - match ms_format { - MassSpectrometryFormat::MGF => { - let reader = MGFReader::open_path(&self.inpath)?; - self.writer_then(reader)?; - } - MassSpectrometryFormat::MzML => { - let reader = MzMLReader::open_path(&self.inpath)?; - self.writer_then(reader)?; - } - #[cfg(feature = "mzmlb")] - MassSpectrometryFormat::MzMLb => { - let reader = mzmlb::MzMLbReader::open_path(&self.inpath)?; - self.writer_then(reader)?; - } - #[cfg(feature = "thermorawfilereader")] - MassSpectrometryFormat::ThermoRaw => { - let reader = ThermoRawReader::open_path(&self.inpath)?; - self.writer_then(reader)?; - } - _ => { - eprintln!("Could not infer input format from {}", self.inpath); - exit(1) - } - } - }; - Ok(()) + let source = if self.inpath == "-" { + Source::Stdin + } else {Source::<_, _>::from(self.inpath.as_ref())}; + let sink = Sink::::from(self.outpath.as_ref()); + self.open_reader(source, sink) } - fn writer_then( - &self, - reader: R, - ) -> io::Result<()> { - match infer_from_path(&self.outpath).0 { - MassSpectrometryFormat::MGF => { - let mut writer = - MGFWriter::new(io::BufWriter::new(fs::File::create(&self.outpath)?)); - writer.copy_metadata_from(&reader); - self.task(reader, writer)?; - } - MassSpectrometryFormat::MzML => { - let mut writer = - MzMLWriter::new(io::BufWriter::new(fs::File::create(&self.outpath)?)); - writer.copy_metadata_from(&reader); - self.task(reader, writer)?; - } - #[cfg(feature = "mzmlb")] - MassSpectrometryFormat::MzMLb => { - let mut writer = mzmlb::MzMLbWriterBuilder::new(&self.outpath) - .with_zlib_compression(9) - .create()?; - writer.copy_metadata_from(&reader); - self.task(reader, writer)?; - } - _ => { - eprintln!("Could not infer output format from {}", self.outpath); - exit(1) - } - } - Ok(()) - } - - fn task( + fn task( &self, reader: R, mut writer: W, @@ -150,10 +70,10 @@ impl MassSpectrometryReadWriteProcess for MZConv fn task< R: RandomAccessSpectrumIterator - + ScanSource + + SpectrumSource + Send + 'static, - W: ScanWriter + Send + 'static, + W: SpectrumWriter + Send + 'static, >( &self, reader: R, @@ -161,6 +81,38 @@ impl MassSpectrometryReadWriteProcess for MZConv ) -> Result<(), Self::ErrorType> { self.task(reader, writer) } + + #[allow(unused)] + fn transform_writer< + R: RandomAccessSpectrumIterator + MSDataFileMetadata + SpectrumSource + Send + 'static, + W: SpectrumWriter + MSDataFileMetadata + Send + 'static, + >( + &self, + reader: R, + reader_format: mzdata::io::MassSpectrometryFormat, + mut writer: W, + writer_format: mzdata::io::MassSpectrometryFormat, + ) -> Result<(R, W), Self::ErrorType> { + if self.inpath != "-" { + let pb: PathBuf = self.inpath.clone().into(); + let checksum = checksum_file(&pb)?; + let has_already = reader.file_description().source_files.iter().flat_map(|f| f.get_param_by_name("SHA-1").map(|c| c.value == checksum)).all(|a| a); + if !has_already { + let mut sf = SourceFile::default(); + sf.location = pb.parent().map(|p| format!("file://{}", p.to_string_lossy())).unwrap_or("file://".to_string()); + sf.name = pb.file_name().map(|p| p.to_string_lossy().to_string()).unwrap_or("".to_string()); + let par = ControlledVocabulary::MS.param_val(1000569u32, "SHA-1", checksum); + sf.add_param(par); + sf.file_format = reader_format.as_param(); + + if let Some(ref_sf) = reader.file_description().source_files.last() { + sf.id_format = ref_sf.id_format.clone() + } + writer.file_description_mut().source_files.push(sf); + } + }; + Ok((reader, writer)) + } } fn main() -> io::Result<()> { @@ -174,7 +126,6 @@ fn main() -> io::Result<()> { eprintln!("Please provide a path to write an MS file to, or '-'"); exit(1) }); - let start = time::Instant::now(); let job = MZConvert::new(inpath, outpath); job.main()?; diff --git a/examples/readme.rs b/examples/readme.rs index 071dc16..7b9abb5 100644 --- a/examples/readme.rs +++ b/examples/readme.rs @@ -1,6 +1,6 @@ use std::fs; use mzdata::prelude::*; -use mzpeaks::{Tolerance, prelude::*}; +use mzpeaks::Tolerance; use mzdata::io::MzMLReader; use mzdata::spectrum::SignalContinuity; diff --git a/src/io.rs b/src/io.rs index b5d4128..ed0ae09 100644 --- a/src/io.rs +++ b/src/io.rs @@ -16,7 +16,7 @@ pub(crate) mod compression; pub use crate::io::infer_format::{ infer_format, infer_from_path, infer_from_stream, open_file, MassSpectrometryFormat, - MassSpectrometryReadWriteProcess + MassSpectrometryReadWriteProcess, Sink, Source }; pub use crate::io::mgf::{MGFError, MGFReader, MGFWriter}; #[cfg(feature = "async")] @@ -26,10 +26,10 @@ pub use crate::io::mzml::{MzMLParserError, MzMLReader, MzMLWriter}; pub use crate::io::mzmlb::{MzMLbError, MzMLbReader}; pub use crate::io::offset_index::OffsetIndex; pub use crate::io::traits::{ - MZFileReader, RandomAccessSpectrumIterator, ScanSource, ScanWriter, SpectrumAccessError, + MZFileReader, RandomAccessSpectrumIterator, SpectrumSource, SpectrumWriter, SpectrumAccessError, SpectrumGrouping, SpectrumIterator, StreamingSpectrumIterator, }; -pub use crate::io::utils::{DetailLevel, PreBufferedStream}; +pub use crate::io::utils::{DetailLevel, PreBufferedStream, checksum_file}; pub use compression::RestartableGzDecoder; #[cfg(feature = "thermorawfilereader")] diff --git a/src/io/infer_format.rs b/src/io/infer_format.rs index 13e1138..f0776b4 100644 --- a/src/io/infer_format.rs +++ b/src/io/infer_format.rs @@ -1,13 +1,15 @@ +use std::fmt::Display; use std::fs; use std::io::{self, prelude::*, BufReader}; use std::path::{self, Path, PathBuf}; -use std::sync::mpsc::{Sender, SyncSender}; +use std::sync::mpsc::{Receiver, Sender, SyncSender}; use flate2::{bufread::GzDecoder, write::GzEncoder}; use mzpeaks::{CentroidLike, CentroidPeak, DeconvolutedCentroidLike, DeconvolutedPeak}; use crate::io::PreBufferedStream; +use crate::params::ControlledVocabulary; #[cfg(feature = "mzmlb")] pub use crate::{ io::mzmlb::{MzMLbReaderType, MzMLbWriterBuilder}, @@ -17,11 +19,11 @@ pub use crate::{ use crate::io::compression::{is_gzipped, is_gzipped_extension, RestartableGzDecoder}; use crate::io::mgf::{is_mgf, MGFReaderType, MGFWriterType}; use crate::io::mzml::{is_mzml, MzMLReaderType, MzMLWriterType}; -use crate::io::traits::{RandomAccessSpectrumIterator, ScanSource, ScanWriter}; +use crate::io::traits::{RandomAccessSpectrumIterator, SpectrumSource, SpectrumWriter}; use crate::meta::MSDataFileMetadata; use crate::spectrum::bindata::{BuildArrayMapFrom, BuildFromArrayMap}; use crate::spectrum::MultiLayerSpectrum; -use crate::{MGFReader, MzMLReader}; +use crate::{MGFReader, MzMLReader, Param}; #[cfg(feature = "thermorawfilereader")] use super::thermo::{ThermoRawReader, ThermoRawReaderType, is_thermo_raw_prefix}; @@ -35,13 +37,31 @@ use super::traits::{SeekRead, SpectrumReceiver, StreamingSpectrumIterator}; pub enum MassSpectrometryFormat { MGF, MzML, - #[cfg(feature = "mzmlb")] MzMLb, - #[cfg(feature = "thermorawfilereader")] ThermoRaw, Unknown, } +impl MassSpectrometryFormat { + + pub fn as_param(&self) -> Option { + let p = match self { + MassSpectrometryFormat::MGF => ControlledVocabulary::MS.const_param_ident("Mascot MGF format", 1001062), + MassSpectrometryFormat::MzML => ControlledVocabulary::MS.const_param_ident("MzML format", 1000584), + MassSpectrometryFormat::MzMLb => ControlledVocabulary::MS.const_param_ident("mzMLb format", 1002838), + MassSpectrometryFormat::ThermoRaw => ControlledVocabulary::MS.const_param_ident("Thermo RAW format", 1000563), + MassSpectrometryFormat::Unknown => return None, + }; + Some(p.into()) + } +} + +impl Display for MassSpectrometryFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + /// Given a path, infer the file format and whether or not the file at that path is /// GZIP compressed pub fn infer_from_path>(path: P) -> (MassSpectrometryFormat, bool) { @@ -123,7 +143,7 @@ pub fn infer_format>(path: P) -> io::Result<(MassSpectrom /// Given a local file system path, infer the file format, and attempt to open it /// for reading. -pub fn open_file>(path: P) -> io::Result> { +pub fn open_file>(path: P) -> io::Result> { let path = path.into(); let (format, is_gzipped) = infer_format(path.clone())?; @@ -163,7 +183,7 @@ pub fn open_file>(path: P) -> io::Result, MassSpectrometryFormat) + Reader(Box, Option) +} + +impl + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> Source { + + pub fn index_file_name(&self) -> Option { + match &self { + Self::PathLike(path) => { + if let Some(stem) = path.file_name() { + if let Some(parent) = path.parent() { + let base = parent.join(stem); + let name = base.with_extension("index.json"); + return Some(name); + } + } + None + } + _ => None + } + } + + pub fn has_index_file(&self) -> bool { + match self.index_file_name() { + Some(path) => path.exists(), + None => false, + } + } +} + +impl + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, + D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> From<&Path> for Source { + fn from(value: &Path) -> Self { + Self::PathLike(value.into()) + } +} + + +impl + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, + D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> From for Source { + fn from(value: String) -> Self { + Self::PathLike(value.into()) + } +} + +impl + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, + D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> From>> for Source { + fn from(value: SpectrumReceiver>) -> Self { + Self::Receiver(value.into()) + } } -impl, +impl + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, + D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> From>> for Source { + fn from(value: Receiver>) -> Self { + Self::Receiver(value.into()) + } +} + +impl< C: CentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, - D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> From

for Source { - fn from(value: P) -> Self { - Self::PathLike(value.as_ref().into()) + D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> From<(Box, MassSpectrometryFormat)> for Source { + fn from(value: (Box, MassSpectrometryFormat)) -> Self { + Self::Reader(value.0, Some(value.1)) } } +impl< + C: CentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, + D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> From> for Source { + fn from(value: Box) -> Self { + Self::Reader(value, None) + } +} /// An abstraction over places to write spectra pub enum Sink, MassSpectrometryFormat) } -impl, - C: CentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, - D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> From

for Sink { - fn from(value: P) -> Self { - Self::PathLike(value.as_ref().into()) +impl + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, + D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> + From<(Box, MassSpectrometryFormat)> for Sink { + fn from(value: (Box, MassSpectrometryFormat)) -> Self { + Self::Writer(value.0, value.1) + } +} + +impl + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, + D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> From<&Path> for Sink { + fn from(value: &Path) -> Self { + Self::PathLike(value.into()) + } +} + +impl + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, + D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> From for Sink { + fn from(value: String) -> Self { + Self::PathLike(value.into()) + } +} + +impl + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, + D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> From>> for Sink { + fn from(value: Sender>) -> Self { + Self::Sender(value.into()) + } +} + +impl + BuildArrayMapFrom + BuildFromArrayMap + Clone + 'static + Sync + Send, + D: DeconvolutedCentroidLike + Default + From + BuildArrayMapFrom + BuildFromArrayMap + Clone + Sync + 'static + Send> From>> for Sink { + fn from(value: SyncSender>) -> Self { + Self::SyncSender(value.into()) } } +/// Encapsulate the read-transform-write process for mass spectrometry data sources. +/// +/// This trait handles all the gory details of file format inference with [`open_reader`](MassSpectrometryReadWriteProcess::open_reader) +/// and [`open_writer`](MassSpectrometryReadWriteProcess::open_writer), leaving open the chance to customize those objects after their +/// creation in [`transform_reader`](MassSpectrometryReadWriteProcess::transform_reader) and [`transform_writer`](MassSpectrometryReadWriteProcess::transform_writer) respectively. +/// +/// The only function that must be implemented explicitly is [`task`](MassSpectrometryReadWriteProcess::task) which receives +/// the reader and writer, and must contain the logic to transmit one from the other +/// with whatever transformations you wish to apply between them. pub trait MassSpectrometryReadWriteProcess< C: CentroidLike + Default @@ -263,6 +382,22 @@ pub trait MassSpectrometryReadWriteProcess< { type ErrorType: From; + /// The main entry point that starts the whole system running on a reader [`Source`] + /// and a writer [`Sink`], or equivalent objects. + /// + /// By default this just invokes [`MassSpectrometryReadWriteProcess::open_reader`], but if any additional + /// configuration needs to be done before that happens, it can be done here. + /// Examples include creating a thread pool, temporary files or directories, + /// or some other scoped activity. + fn main>, Q: Into>>( + &self, + read_path: P, + write_path: Q, + ) -> Result<(), Self::ErrorType> { + self.open_reader(read_path, write_path) + } + + /// Opens the reader, transforms it with [`MassSpectrometryReadWriteProcess::transform_reader`], and then passes control to [`MassSpectrometryReadWriteProcess::open_writer`] fn open_reader>, Q: Into>>( &self, read_path: P, @@ -326,7 +461,12 @@ pub trait MassSpectrometryReadWriteProcess< .into()), } }, - Source::Reader(handle, format) => { + Source::Reader(mut handle, format) => { + let (format, _is_gzipped) = if let Some(format) = format { + (format, false) + } else { + infer_from_stream(&mut handle)? + }; match format { MassSpectrometryFormat::MGF => { let handle = io::BufReader::new(handle); @@ -405,9 +545,10 @@ pub trait MassSpectrometryReadWriteProcess< } } + /// Opens the writer, transforms it with [`MassSpectrometryReadWriteProcess::transform_writer`], and then passes control to [`MassSpectrometryReadWriteProcess::task`] fn open_writer< Q: Into>, - R: RandomAccessSpectrumIterator + MSDataFileMetadata + ScanSource + Send + 'static, + R: RandomAccessSpectrumIterator + MSDataFileMetadata + SpectrumSource + Send + 'static, >( &self, reader: R, @@ -531,9 +672,13 @@ pub trait MassSpectrometryReadWriteProcess< } } + /// Customize the reader in some way. The format is passed along to allow each format + /// to be customized explicitly. + /// + /// A no-op by default. #[allow(unused)] fn transform_reader< - R: RandomAccessSpectrumIterator + ScanSource + Send + 'static, + R: RandomAccessSpectrumIterator + MSDataFileMetadata + SpectrumSource + Send + 'static, >( &self, reader: R, @@ -542,10 +687,18 @@ pub trait MassSpectrometryReadWriteProcess< Ok(reader) } + /// Customize the writer in some way. The format is passed along to allow each format + /// to be customized explicitly, and the reader is provided side-by-side to permit additional + /// information to be used. + /// + /// A no-op by default. + /// + /// # Note + /// The caller already invokes [`MSDataFileMetadata::copy_metadata_from`] #[allow(unused)] fn transform_writer< - R: RandomAccessSpectrumIterator + ScanSource + Send + 'static, - W: ScanWriter + Send + 'static, + R: RandomAccessSpectrumIterator + MSDataFileMetadata + SpectrumSource + Send + 'static, + W: SpectrumWriter + MSDataFileMetadata + Send + 'static, >( &self, reader: R, @@ -556,9 +709,11 @@ pub trait MassSpectrometryReadWriteProcess< Ok((reader, writer)) } + /// The place where the work happens to transmit data from `reader` to `writer` with whatever transformations + /// need to take place. fn task< - R: RandomAccessSpectrumIterator + ScanSource + Send + 'static, - W: ScanWriter + Send + 'static, + R: RandomAccessSpectrumIterator + MSDataFileMetadata + SpectrumSource + Send + 'static, + W: SpectrumWriter + Send + 'static, >( &self, reader: R, @@ -566,6 +721,297 @@ pub trait MassSpectrometryReadWriteProcess< ) -> Result<(), Self::ErrorType>; } + +/// A macro that dynamically works out how to get a [`SpectrumSource`]-derived object +/// from a path or [`io::Read`](std::io::Read) + [`io::Seek`](std::io::Seek) boxed object. +/// This is meant to be a convenience for working with a scoped file reader +/// without penalty. +/// +/// `$source` is coerced into a [`Source`] which the macro in turn probes to determine +/// the appropriate file reading type. Unlike [`open_file`], this macro does not actually +/// return the reading type behind an opaque `Box`, but lets you interact +/// with the concrete type intersection without concern with object safety in an anonymous closure: +/// +/// ```no_run +/// let spectra: Vec = mz_read!( +/// "./test/data/small.mzML".as_ref(), +/// reader => { reader.collect() } +/// )?; +/// ``` +/// The closure will return a `std::io::Result` whose success value is inferred from context. The +/// reader's lifetime is bound to the closure, and cannot be extracted without substantial type system +/// torture. +/// +/// If you want to use peak types *other than* the simple defaults, pass them as additional parameters after +/// the closure. +#[macro_export] +macro_rules! mz_read { + ($source:expr, $reader:ident => $impl:tt) => { + $crate::mz_read!($source, $reader => $impl, mzpeaks::CentroidPeak, mzpeaks::DeconvolutedPeak) + }; + ($source:expr, $reader:ident => $impl:tt, $C:ty, $D:ty) => {{ + let source = $crate::io::Source::<_, _>::from($source); + match source { + $crate::io::Source::PathLike(read_path) => { + let (format, is_gzipped) = $crate::io::infer_format(&read_path)?; + match format { + $crate::io::MassSpectrometryFormat::MGF => { + let handle = std::fs::File::open(read_path)?; + if is_gzipped { + let fh = $crate::io::RestartableGzDecoder::new(std::io::BufReader::new(handle)); + #[allow(unused_mut)] + let mut $reader: $crate::io::StreamingSpectrumIterator<$C, $D, _, _> = $crate::io::StreamingSpectrumIterator::new($crate::io::mgf::MGFReaderType::<_, $C, $D>::new(fh)); + Ok($impl) + } else { + #[allow(unused_mut)] + let mut $reader: $crate::io::mgf::MGFReaderType<_, $C, $D> = $crate::io::mgf::MGFReaderType::new_indexed(handle); + Ok($impl) + } + } + $crate::io::MassSpectrometryFormat::MzML => { + let handle = std::fs::File::open(read_path)?; + + if is_gzipped { + let fh = $crate::io::RestartableGzDecoder::new(std::io::BufReader::new(handle)); + #[allow(unused_mut)] + let mut $reader: $crate::io::StreamingSpectrumIterator<$C, $D, _, _> = $crate::io::StreamingSpectrumIterator::new($crate::io::mzml::MzMLReaderType::<_, $C, $D>::new(fh)); + Ok($impl) + } else { + #[allow(unused_mut)] + let mut $reader: $crate::io::mzml::MzMLReaderType<_, $C, $D> = $crate::io::mzml::MzMLReaderType::<_, $C, $D>::new_indexed(handle); + Ok($impl) + } + } + #[cfg(feature = "mzmlb")] + $crate::io::MassSpectrometryFormat::MzMLb => { + #[allow(unused_mut)] + let mut $reader: $crate::io::mzmlb::MzMLbReaderType<$C, $D> = $crate::io::mzmlb::MzMLbReaderType::<$C, $D>::new(&read_path)?; + Ok($impl) + }, + #[cfg(feature = "thermorawfilereader")] + $crate::io::MassSpectrometryFormat::ThermoRaw => { + #[allow(unused_mut)] + let mut $reader: $crate::io::thermo::ThermoRawReaderType<$C, $D> = $crate::io::thermo::ThermoRawReaderType::<$C, $D>::new(&read_path)?; + Ok($impl) + }, + _ => Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + format!( + "Input file format for {} not supported", + read_path.display() + ), + )), + } + }, + $crate::io::Source::Reader(mut handle, format) => { + let (format, is_gzipped) = if let Some(format) = format { (format, false) } else { $crate::io::infer_from_stream(&mut handle)? }; + match format { + $crate::io::MassSpectrometryFormat::MGF => { + let handle = std::io::BufReader::new(handle); + #[allow(unused_mut)] + if is_gzipped { + let fh = $crate::io::RestartableGzDecoder::new(std::io::BufReader::new(handle)); + #[allow(unused_mut)] + let mut $reader: $crate::io::StreamingSpectrumIterator<$C, $D, _, _> = $crate::io::StreamingSpectrumIterator::new($crate::io::mgf::MGFReaderType::<_, $C, $D>::new(fh)); + Ok($impl) + } else { + #[allow(unused_mut)] + let mut $reader: $crate::io::mgf::MGFReaderType<_, $C, $D> = $crate::io::mgf::MGFReaderType::new_indexed(handle); + Ok($impl) + } + }, + $crate::io::MassSpectrometryFormat::MzML => { + let handle = io::BufReader::new(handle); + #[allow(unused_mut)] + if is_gzipped { + let fh = $crate::io::RestartableGzDecoder::new(std::io::BufReader::new(handle)); + #[allow(unused_mut)] + let mut $reader: $crate::io::StreamingSpectrumIterator<$C, $D, _, _> = $crate::io::StreamingSpectrumIterator::new($crate::io::mzml::MzMLReaderType::<_, $C, $D>::new(fh)); + Ok($impl) + } else { + #[allow(unused_mut)] + let mut $reader: $crate::io::mzml::MzMLReaderType<_, $C, $D> = $crate::io::mzml::MzMLReaderType::<_, $C, $D>::new_indexed(handle); + Ok($impl) + } + }, + _ => Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + format!( + "Input file format for {:?} not supported from an io::Read", + format + ), + )), + } + }, + $crate::io::Source::Receiver(receiver) => { + #[allow(unused_mut)] + let mut $reader: $crate::io::StreamingSpectrumIterator<$C, $D, _, _> = $crate::io::StreamingSpectrumIterator::new(receiver); + Ok($impl) + }, + $crate::io::Source::Stdin => { + let mut buffered = + $crate::io::PreBufferedStream::new_with_buffer_size(std::io::stdin(), 2usize.pow(20))?; + let (ms_format, compressed) = $crate::io::infer_from_stream(&mut buffered)?; + match ms_format { + $crate::io::MassSpectrometryFormat::MGF => { + if compressed { + #[allow(unused_mut)] + let mut $reader: $crate::io::StreamingSpectrumIterator<$C, $D, _, _> = $crate::io::StreamingSpectrumIterator::new( + $crate::io::mgf::MGFReaderType::new( + $crate::io::RestartableGzDecoder::new(std::io::BufReader::new(buffered)), + )); + Ok($impl) + } else { + #[allow(unused_mut)] + let mut $reader: $crate::io::StreamingSpectrumIterator<$C, $D, _, _> = $crate::io::StreamingSpectrumIterator::new( + $crate::io::mgf::MGFReaderType::new(buffered)); + Ok($impl) + } + } + $crate::io::MassSpectrometryFormat::MzML => { + if compressed { + #[allow(unused_mut)] + let mut $reader: $crate::io::StreamingSpectrumIterator<$C, $D, _, _> = $crate::io::StreamingSpectrumIterator::new( + $crate::io::mzml::MzMLReaderType::new($crate::io::RestartableGzDecoder::new(std::io::BufReader::new(buffered)), + )); + Ok($impl) + } else { + #[allow(unused_mut)] + let mut $reader: $crate::io::StreamingSpectrumIterator<$C, $D, _, _> = $crate::io::StreamingSpectrumIterator::new( + $crate::io::mzml::MzMLReaderType::new(buffered)); + Ok($impl) + } + } + _ => { + return Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "{ms_format:?} format is not supported over Stdin", + ).into()) + } + } + }, + } + }}; +} + +/// A macro that dynamically works out how to get a [`SpectrumWriter`](crate::io::SpectrumWriter) from a path +/// or [`io::Write`] boxed object. +/// +/// `$sink` is coerced to a [`Sink`] which in turn the macro probes in order to determine how +/// to create the appropriate writer type. Unlike other uses of [`Sink`], `Sender` and `SyncSender` +/// are not supported. It lets you interact with the concrete type intersection in an anonymous closure: +/// +/// ```no_run +/// mz_read!("./test/data/small.mzML".as_ref(), reader => { +/// mz_write!("./tmp/test.mzML".as_ref(), writer => { +/// writer.copy_metadata_from(&reader); +/// for s in reader { +/// writer.write_owned(s)?; +/// } +/// })?; +/// })?; +/// ``` +/// +/// The closure will return a `std::io::Result` whose success value is inferred from context. The +/// writer's lifetime is bound to the closure, and cannot be extracted without substantial type system +/// torture. +/// +/// If you want to use peak types *other than* the simple defaults, pass them as additional parameters after +/// the closure +#[macro_export] +macro_rules! mz_write { + ($sink:expr, $writer:ident => $impl:tt) => { + mz_write!($sink, $writer => $impl, mzpeaks::CentroidPeak, mzpeaks::DeconvolutedPeak) + }; + ($sink:expr, $writer:ident => $impl:tt, $C:ty, $D:ty) => {{ + let sink = $crate::io::Sink::<$C, $D>::from($sink); + match sink { + $crate::io:: Sink::Sender(_) | $crate::io::Sink::SyncSender(_) => { + Err(std::io::Error::new(std::io::ErrorKind::Unsupported, "Sender writers aren't supported by `mz_write`")) + } + $crate::io::Sink::PathLike(write_path) => { + let (writer_format, is_gzip) = $crate::io::infer_from_path(&write_path); + match writer_format { + $crate::io::MassSpectrometryFormat::MGF => { + let handle = std::io::BufWriter::new(std::fs::File::create(&write_path)?); + if is_gzip { + let handle = flate2::write::GzEncoder::new(handle, flate2::Compression::best()); + let mut $writer: $crate::io::mgf::MGFWriterType<_, $C, $D> = $crate::io::mgf::MGFWriterType::new( + handle, + ); + Ok($impl) + } else { + let mut $writer: $crate::io::mgf::MGFWriterType<_, $C, $D> = $crate::io::mgf::MGFWriterType::new( + handle, + ); + Ok($impl) + + } + } + $crate::io::MassSpectrometryFormat::MzML => { + let handle = std::io::BufWriter::new(std::fs::File::create(&write_path)?); + if is_gzip { + let handle = flate2::write::GzEncoder::new(handle, flate2::Compression::best()); + let mut $writer: $crate::io::mzml::MzMLWriterType<_, $C, $D> = $crate::io::mzml::MzMLWriterType::new( + handle, + ); + Ok($impl) + } else { + let mut $writer: $crate::io::mzml::MzMLWriterType<_, $C, $D> = $crate::io::mzml::MzMLWriterType::new( + handle, + ); + Ok($impl) + } + } + #[cfg(feature = "mzmlb")] + $crate::io::MassSpectrometryFormat::MzMLb => { + let mut $writer = $crate::io::mzmlb::MzMLbWriterBuilder::<$C, $D>::new(&write_path) + .with_zlib_compression(9) + .create()?; + Ok($impl) + } + _ => Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + format!( + "Output file format {:?} for {} not supported", + writer_format, + write_path.display() + ), + )), + } + }, + $crate::io::Sink::Writer(handle, writer_format) => { + match writer_format { + $crate::io::MassSpectrometryFormat::MGF => { + let handle = std::io::BufWriter::new(handle); + let mut $writer: $crate::io::mgf::MGFWriterType<_, $C, $D> = $crate::io::mgf::MGFWriterType::new( + handle, + ); + Ok($impl) + } + $crate::io::MassSpectrometryFormat::MzML => { + let handle = std::io::BufWriter::new(handle); + let mut $writer: MzMLWriterType<_, $C, $D> = MzMLWriterType::new( + handle, + ); + Ok($impl) + } + _ => { + Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + format!( + "Output file format for {:?} not supported", + writer_format + ), + )) + } + } + } + } + }}; +} + + #[cfg(test)] mod test { use crate::{ @@ -615,4 +1061,51 @@ mod test { panic!("Failed to open file") } } + + #[test] + fn test_source_conv() -> io::Result<()> { + let s = Source::::from("text/path".as_ref()); + assert!(matches!(s, Source::PathLike(_))); + + let fh = Box::new(io::BufReader::new(fs::File::open("./test/data/small.mgf")?)) as Box; + let rs: Source = (fh, MassSpectrometryFormat::MGF).into(); + assert!(matches!(rs, Source::Reader(_, _))); + + Ok(()) + } + + #[test] + fn test_mz_read() -> io::Result<()> { + let val: Vec<_> = mz_read!("./test/data/small.mzML".as_ref(), reader => { reader.collect() })?; + assert_eq!(val.len(), 48); + let val: Vec<_> = mz_read!("./test/data/small.mgf".as_ref(), reader => { reader.collect() })?; + assert_eq!(val.len(), 34); + let val = mz_read!("./test/data/small.mzML".as_ref(), reader => { reader.file_description().clone() })?; + assert_eq!(val.source_files.len(), 1); + Ok(()) + } + + #[test] + fn test_mz_read_nested() -> io::Result<()> { + mz_read!("./test/data/small.mzML".as_ref(), reader => { + mz_read!("./test/data/small.mzML".as_ref(), reader2 => { + assert_eq!(reader.len(), reader2.len()); + })?; + })?; + + Ok(()) + } + + #[test] + fn test_mz_write() -> io::Result<()> { + mz_read!("./test/data/small.mzML".as_ref(), reader => { + mz_write!("./tmp/test.mzML".as_ref(), writer => { + writer.copy_metadata_from(&reader); + for s in reader { + writer.write_owned(s)?; + } + })?; + })?; + Ok(()) + } } diff --git a/src/io/mgf.rs b/src/io/mgf.rs index 7faea98..e3e3249 100644 --- a/src/io/mgf.rs +++ b/src/io/mgf.rs @@ -26,7 +26,7 @@ use regex::Regex; use super::offset_index::OffsetIndex; use super::traits::{ - MZFileReader, RandomAccessSpectrumIterator, ScanSource, ScanWriter, SeekRead, + MZFileReader, RandomAccessSpectrumIterator, SpectrumSource, SpectrumWriter, SeekRead, SpectrumAccessError, }; use super::utils::DetailLevel; @@ -557,7 +557,7 @@ impl< R: SeekRead, C: CentroidPeakAdapting + From, D: DeconvolutedPeakAdapting + From, - > ScanSource> for MGFReaderType + > SpectrumSource> for MGFReaderType { /// Retrieve a spectrum by it's native ID fn get_spectrum_by_id(&mut self, id: &str) -> Option> { @@ -811,8 +811,8 @@ TITLE="#, .and_then(|p| Some((p.value.clone(), true))) .unwrap_or_else(|| (self.make_title(spectrum), false)); self.handle.write_all(title.as_bytes())?; - self.write_kv("NATIVEID", spectrum.id())?; - self.handle.write_all(b"\nRTINSECONDS=")?; + self.write_kv("\nNATIVEID", spectrum.id())?; + self.handle.write_all(b"RTINSECONDS=")?; self.handle .write_all((spectrum.start_time() * 60.0).to_string().as_bytes())?; self.handle.write_all(b"\n")?; @@ -961,7 +961,7 @@ impl< W: io::Write, C: CentroidPeakAdapting + From + 'static, D: DeconvolutedPeakAdapting + From + 'static, - > ScanWriter for MGFWriterType + > SpectrumWriter for MGFWriterType { fn write + 'static>(&mut self, spectrum: &S) -> io::Result { if spectrum.ms_level() != 1 { diff --git a/src/io/mzml.rs b/src/io/mzml.rs index e96889f..d236004 100644 --- a/src/io/mzml.rs +++ b/src/io/mzml.rs @@ -2,7 +2,7 @@ Implements a parser for the PSI-MS mzML and indexedmzML XML file formats for representing raw and processed mass spectra, providing a [`RandomAccessSpectrumIterator`](crate::io::traits::RandomAccessSpectrumIterator) -interface for reading, and [`ScanWriter`](crate::io::traits::ScanWriter) +interface for reading, and [`SpectrumWriter`](crate::io::traits::SpectrumWriter) interface for writing. The mzML format is standardized by the Proteomics Standards Initiative (PSI), with diff --git a/src/io/mzml/async.rs b/src/io/mzml/async.rs index d2ca732..e459a53 100644 --- a/src/io/mzml/async.rs +++ b/src/io/mzml/async.rs @@ -32,7 +32,7 @@ use crate::spectrum::spectrum::{ use super::super::offset_index::OffsetIndex; // Need to learn more about async traits // use super::super::traits::{ -// MZFileReader, RandomAccessSpectrumIterator, ScanAccessError, ScanSource, +// MZFileReader, RandomAccessSpectrumIterator, ScanAccessError, SpectrumSource, // }; pub trait AsyncReadType: AsyncRead + AsyncReadExt {} @@ -591,7 +591,7 @@ impl IndexedMzMLIndexExtractor { /// an `indexedmzML` document and use the offset map to jump to immediately jump to a specific spectrum. /// /// **Note**: Because async traits are not yet stable, and this is currently the only asynchronous reader -/// in the library, this also re-creates the [`ScanSource`](crate::io::traits::ScanSource) API with +/// in the library, this also re-creates the [`SpectrumSource`](crate::io::traits::SpectrumSource) API with /// asynchronous execution. impl< R: AsyncReadType + AsyncSeek + AsyncSeekExt + Unpin + Sync, diff --git a/src/io/mzml/reader.rs b/src/io/mzml/reader.rs index cb46067..016b803 100644 --- a/src/io/mzml/reader.rs +++ b/src/io/mzml/reader.rs @@ -18,7 +18,7 @@ use crate::prelude::*; use super::super::offset_index::OffsetIndex; use super::super::traits::{ - MZFileReader, RandomAccessSpectrumIterator, ScanSource, SeekRead, SpectrumAccessError, + MZFileReader, RandomAccessSpectrumIterator, SpectrumSource, SeekRead, SpectrumAccessError, }; use super::reading_shared::EntryType; @@ -1537,7 +1537,7 @@ impl< R: SeekRead, C: CentroidPeakAdapting + BuildFromArrayMap, D: DeconvolutedPeakAdapting + BuildFromArrayMap, - > ScanSource> for MzMLReaderType + > SpectrumSource> for MzMLReaderType { /// Retrieve a spectrum by it's native ID fn get_spectrum_by_id(&mut self, id: &str) -> Option> { diff --git a/src/io/mzml/writer.rs b/src/io/mzml/writer.rs index f07990e..02484eb 100644 --- a/src/io/mzml/writer.rs +++ b/src/io/mzml/writer.rs @@ -14,7 +14,7 @@ use quick_xml::Error as XMLError; use quick_xml::Writer; use super::super::offset_index::OffsetIndex; -use super::super::traits::ScanWriter; +use super::super::traits::SpectrumWriter; use super::super::utils::MD5HashingStream; use mzpeaks::{CentroidPeak, DeconvolutedPeak}; @@ -436,7 +436,7 @@ pub struct MzMLWriterType< } impl<'a, W: Write, C: CentroidLike + Default + BuildArrayMapFrom, D: DeconvolutedCentroidLike + Default + BuildArrayMapFrom> - ScanWriter for MzMLWriterType + SpectrumWriter for MzMLWriterType { fn write + 'static>(&mut self, spectrum: &S) -> io::Result { match self.write_spectrum(spectrum) { @@ -466,7 +466,7 @@ impl(&mut self, source: &T) { + fn copy_metadata_from(&mut self, source: &impl MSDataFileMetadata) { *self.data_processings_mut() = source.data_processings().clone(); *self.instrument_configurations_mut() = source.instrument_configurations().clone(); *self.file_description_mut() = source.file_description().clone(); @@ -658,6 +658,12 @@ where attrib!("name", sf.name, tag); attrib!("location", sf.location, tag); self.handle.write_event(Event::Start(tag.borrow()))?; + if let Some(param) = sf.file_format.as_ref() { + self.handle.write_param(param)? + } + if let Some(param) = sf.id_format.as_ref() { + self.handle.write_param(param)? + } for param in sf.params() { self.handle.write_param(param)? } diff --git a/src/io/mzmlb.rs b/src/io/mzmlb.rs index a51bcd6..19a3d72 100644 --- a/src/io/mzmlb.rs +++ b/src/io/mzmlb.rs @@ -3,7 +3,7 @@ Implements a parser for the mzMLb file format for representing raw and processed mass spectra, providing a [`RandomAccessSpectrumIterator`](crate::io::traits::RandomAccessSpectrumIterator) -interface for reading, and [`ScanWriter`](crate::io::traits::ScanWriter) +interface for reading, and [`SpectrumWriter`](crate::io::traits::SpectrumWriter) interface for writing. The mzMLb format embeds a variant of the mzML format within an HDF5 file, storing diff --git a/src/io/mzmlb/reader.rs b/src/io/mzmlb/reader.rs index 62a5bc7..41c7eab 100644 --- a/src/io/mzmlb/reader.rs +++ b/src/io/mzmlb/reader.rs @@ -20,7 +20,7 @@ use crate::io::mzml::{ }; use crate::io::traits::MZFileReader; use crate::io::utils::DetailLevel; -use crate::io::{OffsetIndex, RandomAccessSpectrumIterator, ScanSource, SpectrumAccessError}; +use crate::io::{OffsetIndex, RandomAccessSpectrumIterator, SpectrumSource, SpectrumAccessError}; use crate::prelude::{MSDataFileMetadata, ParamLike}; use crate::meta::{ @@ -1068,7 +1068,7 @@ impl< impl< C: CentroidPeakAdapting + BuildFromArrayMap, D: DeconvolutedPeakAdapting + BuildFromArrayMap, - > ScanSource> for MzMLbReaderType + > SpectrumSource> for MzMLbReaderType { /// Retrieve a spectrum by it's native ID fn get_spectrum_by_id(&mut self, id: &str) -> Option> { diff --git a/src/io/mzmlb/writer.rs b/src/io/mzmlb/writer.rs index b5b0de3..4b6c567 100644 --- a/src/io/mzmlb/writer.rs +++ b/src/io/mzmlb/writer.rs @@ -23,7 +23,7 @@ use mzpeaks::{CentroidLike, CentroidPeak, DeconvolutedCentroidLike, Deconvoluted use quick_xml::events::{BytesStart, Event}; use thiserror::Error; -use crate::io::traits::ScanWriter; +use crate::io::traits::SpectrumWriter; use crate::meta::{ DataProcessing, FileDescription, InstrumentConfiguration, MassSpectrometryRun, Software, }; @@ -124,7 +124,7 @@ where self.mzml_writer.softwares_mut() } - fn copy_metadata_from(&mut self, source: &T) { + fn copy_metadata_from(&mut self, source: &impl MSDataFileMetadata) { self.mzml_writer.copy_metadata_from(source) } @@ -382,7 +382,7 @@ impl io::Write for ByteWriter { pub type WriterResult = Result<(), MzMLbWriterError>; -impl<'a, C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default> ScanWriter +impl<'a, C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default> SpectrumWriter for MzMLbWriterType where C: BuildArrayMapFrom, diff --git a/src/io/thermo.rs b/src/io/thermo.rs index 895b164..45965e5 100644 --- a/src/io/thermo.rs +++ b/src/io/thermo.rs @@ -1,7 +1,9 @@ //! Reader implementation for Thermo RAW files, [`ThermoRawReaderType`]. //! -//! Depends upon the [`thermorawfilereader`] crate which manages the self-hosted `dotnet` -//! runtime. +//! Depends upon the [`thermorawfilereader`] crate which manages the self-hosted `.NET` +//! runtime. You must still have a working [`.NET 8`](https://dotnet.microsoft.com/en-us/download/dotnet/8.0) runtime installed on the machine you +//! wish to run this on until Thermo's library supports .NET ahead-of-time compilation. For scripted installation of the .NET runtime +//! see . //! //! ```no_run //! use std::io; diff --git a/src/io/thermo/reader.rs b/src/io/thermo/reader.rs index cd1e5f1..5d54b4d 100644 --- a/src/io/thermo/reader.rs +++ b/src/io/thermo/reader.rs @@ -1,7 +1,7 @@ -use std::{collections::HashMap, fs, io, marker::PhantomData, mem, path::PathBuf}; +use std::{collections::HashMap, io, marker::PhantomData, mem, path::PathBuf}; use crate::{ - io::{DetailLevel, OffsetIndex}, + io::{DetailLevel, OffsetIndex, utils::checksum_file}, meta::{ Component, ComponentType, DataProcessing, FileDescription, InstrumentConfiguration, Software, SourceFile, @@ -17,7 +17,6 @@ use crate::{ }; use mzpeaks::{peak_set::PeakSetVec, prelude::*, CentroidPeak, DeconvolutedPeak, MZ}; -use sha1::{self, Digest}; use thermorawfilereader::schema::{ IonizationMode, MassAnalyzer, Polarity, SpectrumData, SpectrumMode, @@ -48,21 +47,6 @@ pub fn is_thermo_raw_prefix(buffer: &[u8]) -> bool { prefix == "Finnigan" } -fn checksum_file(path: &PathBuf) -> io::Result { - let mut checksum = sha1::Sha1::new(); - let mut reader = io::BufReader::new(fs::File::open(path)?); - let mut buf = Vec::new(); - buf.resize(2usize.pow(20), 0); - while let Ok(i) = reader.read(&mut buf) { - if i == 0 { - break; - } - checksum.update(&buf[..i]); - } - let x = base16ct::lower::encode_string(&checksum.finalize()); - Ok(x) -} - /** A Thermo Fisher RAW file reader that supports iteration and random access. */ @@ -301,11 +285,6 @@ impl, D: DeconvolutedCentroidLike components_to_instrument_id .insert((vconf.ionization_mode(), vconf.mass_analyzer()), i as u32); config.add_param(model_type.to_param()); - config.add_param(Param::new_key_value( - "instrument model".to_string(), - descr.model().unwrap_or_default().to_string(), - )); - configs.insert(i as u32, config); } (sw, configs, components_to_instrument_id) @@ -590,7 +569,7 @@ impl, D: DeconvolutedCentroidLike } impl, D: DeconvolutedCentroidLike + Default> - ScanSource> for ThermoRawReaderType + SpectrumSource> for ThermoRawReaderType { fn reset(&mut self) { self.index = 0; diff --git a/src/io/traits.rs b/src/io/traits.rs index 0cd19f6..4fc49f9 100644 --- a/src/io/traits.rs +++ b/src/io/traits.rs @@ -27,8 +27,8 @@ impl SeekRead for T {} /// A base trait defining the behaviors of a source of spectra. /// -/// A [`ScanSource`] -pub trait ScanSource< +/// A [`SpectrumSource`] +pub trait SpectrumSource< C: CentroidLike + Default = CentroidPeak, D: DeconvolutedCentroidLike + Default = DeconvolutedPeak, S: SpectrumLike = MultiLayerSpectrum, @@ -140,7 +140,7 @@ pub trait ScanSource< } } -/// A generic iterator over a [`ScanSource`] implementer that assumes the +/// A generic iterator over a [`SpectrumSource`] implementer that assumes the /// source has already been indexed. Otherwise, the source's own iterator /// behavior should be used. pub struct SpectrumIterator< @@ -148,10 +148,10 @@ pub struct SpectrumIterator< C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, S: SpectrumLike, - R: ScanSource, + R: SpectrumSource, > { source: &'lifespan mut R, - phantom: PhantomData, + spectrum_type: PhantomData, centroid_type: PhantomData, deconvoluted_type: PhantomData, index: usize, @@ -162,7 +162,7 @@ impl< 'lifespan, C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, - R: ScanSource, + R: SpectrumSource, S: SpectrumLike, > SpectrumIterator<'lifespan, C, D, S, R> { @@ -171,7 +171,7 @@ impl< source, index: 0, back_index: 0, - phantom: PhantomData, + spectrum_type: PhantomData, centroid_type: PhantomData, deconvoluted_type: PhantomData, } @@ -182,7 +182,7 @@ impl< 'lifespan, C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, - R: ScanSource, + R: SpectrumSource, S: SpectrumLike, > Iterator for SpectrumIterator<'lifespan, C, D, S, R> { @@ -202,7 +202,7 @@ impl< 'lifespan, C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, - R: ScanSource, + R: SpectrumSource, S: SpectrumLike, > ExactSizeIterator for SpectrumIterator<'lifespan, C, D, S, R> { @@ -215,7 +215,7 @@ impl< 'lifespan, C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, - R: ScanSource, + R: SpectrumSource, S: SpectrumLike, > DoubleEndedIterator for SpectrumIterator<'lifespan, C, D, S, R> { @@ -235,8 +235,8 @@ impl< C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, S: SpectrumLike, - R: ScanSource, - > ScanSource for SpectrumIterator<'lifespan, C, D, S, R> + R: SpectrumSource, + > SpectrumSource for SpectrumIterator<'lifespan, C, D, S, R> { fn reset(&mut self) { self.index = 0; @@ -270,7 +270,7 @@ impl< 'lifespan, C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, - R: ScanSource, + R: SpectrumSource, S: SpectrumLike, > MSDataFileMetadata for SpectrumIterator<'lifespan, C, D, S, R> where @@ -285,7 +285,7 @@ pub trait MZFileReader< C: CentroidLike + Default = CentroidPeak, D: DeconvolutedCentroidLike + Default = DeconvolutedPeak, S: SpectrumLike = MultiLayerSpectrum, ->: ScanSource + Sized +>: SpectrumSource + Sized { /// An on-trait method of constructing an index. Assumed /// to be a trivial wrapper. @@ -408,13 +408,13 @@ impl From for io::Error { } } -/// An extension of [`ScanSource`] that supports relocatable iteration relative to a +/// An extension of [`SpectrumSource`] that supports relocatable iteration relative to a /// specific spectrum coordinate or identifier. pub trait RandomAccessSpectrumIterator< C: CentroidLike + Default = CentroidPeak, D: DeconvolutedCentroidLike + Default = DeconvolutedPeak, S: SpectrumLike = MultiLayerSpectrum, ->: ScanSource +>: SpectrumSource { /// Start iterating from the spectrum whose native ID matches `id` fn start_from_id(&mut self, id: &str) -> Result<&mut Self, SpectrumAccessError>; @@ -431,7 +431,7 @@ impl< C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, S: SpectrumLike, - R: ScanSource, + R: SpectrumSource, > RandomAccessSpectrumIterator for SpectrumIterator<'lifespan, C, D, S, R> { /// Start iterating from the spectrum whose native ID matches `id` @@ -475,7 +475,39 @@ impl< } } -/// An alternative implementation of [`ScanSource`] for non-rewindable underlying streams +/// A union trait for [`SpectrumSource`] and [`RandomAccessSpectrumIterator`] +pub trait RandomAccessSpectrumSource< + C: CentroidLike + Default = CentroidPeak, + D: DeconvolutedCentroidLike + Default = DeconvolutedPeak, + S: SpectrumLike = MultiLayerSpectrum, +>: SpectrumSource + RandomAccessSpectrumIterator +{ +} + +impl< + C: CentroidLike + Default, + D: DeconvolutedCentroidLike + Default, + S: SpectrumLike, + T: SpectrumSource + RandomAccessSpectrumIterator, + > RandomAccessSpectrumSource for T +{ +} + +pub trait SpectrumSourceWithMetadata< + C: CentroidLike + Default = CentroidPeak, + D: DeconvolutedCentroidLike + Default = DeconvolutedPeak, + S: SpectrumLike = MultiLayerSpectrum, +>: SpectrumSource + MSDataFileMetadata {} + +impl< + C: CentroidLike + Default, + D: DeconvolutedCentroidLike + Default, + S: SpectrumLike, + T: SpectrumSource + MSDataFileMetadata, + > SpectrumSourceWithMetadata for T {} + + +/// An alternative implementation of [`SpectrumSource`] for non-rewindable underlying streams pub struct StreamingSpectrumIterator< C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, @@ -517,7 +549,7 @@ impl< D: DeconvolutedCentroidLike + Default, S: SpectrumLike, I: Iterator, - > ScanSource for StreamingSpectrumIterator + > SpectrumSource for StreamingSpectrumIterator { fn reset(&mut self) { panic!("Cannot reset StreamingSpectrumIterator") @@ -883,9 +915,9 @@ pub trait RandomAccessSpectrumGroupingIterator< /// A collection of spectra held in memory but providing an interface /// identical to a data file. This structure owns its data, so in order -/// to yield ownership for [`ScanSource`], they are cloned +/// to yield ownership for [`SpectrumSource`], they are cloned #[derive(Debug, Default)] -pub struct MemoryScanSource< +pub struct MemorySpectrumSource< C: CentroidLike + Default = CentroidPeak, D: DeconvolutedCentroidLike + Default = DeconvolutedPeak, S: SpectrumLike = MultiLayerSpectrum, @@ -901,7 +933,7 @@ impl< C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, S: SpectrumLike + Clone, - > MemoryScanSource + > MemorySpectrumSource { pub fn new(spectra: VecDeque) -> Self { let mut offsets = OffsetIndex::new("spectrum".to_string()); @@ -923,7 +955,7 @@ impl< C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, S: SpectrumLike + Clone, - > Iterator for MemoryScanSource + > Iterator for MemorySpectrumSource { type Item = S; @@ -943,7 +975,7 @@ impl< C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, S: SpectrumLike + Clone, - > ScanSource for MemoryScanSource + > SpectrumSource for MemorySpectrumSource { fn reset(&mut self) { self.position = 0; @@ -977,7 +1009,7 @@ impl< C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, S: SpectrumLike + Clone, - > RandomAccessSpectrumIterator for MemoryScanSource + > RandomAccessSpectrumIterator for MemorySpectrumSource { fn start_from_id(&mut self, id: &str) -> Result<&mut Self, SpectrumAccessError> { match self.offsets.get(id) { @@ -1013,7 +1045,7 @@ impl< C: CentroidLike + Default, D: DeconvolutedCentroidLike + Default, S: SpectrumLike + Clone, - > From> for MemoryScanSource + > From> for MemorySpectrumSource { fn from(value: VecDeque) -> Self { Self::new(value) @@ -1021,7 +1053,7 @@ impl< } /// Common interface for spectrum writing -pub trait ScanWriter< +pub trait SpectrumWriter< C: CentroidLike + Default = CentroidPeak, D: DeconvolutedCentroidLike + Default = DeconvolutedPeak, > @@ -1115,8 +1147,8 @@ mod test { #[test] fn test_object_safe() { - // If `ScanSource` were not object safe, this code + // If `SpectrumSource` were not object safe, this code // couldn't compile. - let _f = |_x: &dyn ScanSource| {}; + let _f = |_x: &dyn SpectrumSource| {}; } } diff --git a/src/io/utils.rs b/src/io/utils.rs index 00c7de8..d7d2d4d 100644 --- a/src/io/utils.rs +++ b/src/io/utils.rs @@ -3,9 +3,12 @@ use std::fs; use std::io; use std::path; +use std::path::PathBuf; +use std::io::prelude::*; use md5::Context as MD5Context; use md5::Digest; +use sha1::{self, Digest as _}; type ByteBuffer = io::Cursor>; @@ -30,7 +33,7 @@ pub enum DetailLevel { } #[derive(Debug, Clone, Default)] -pub struct FileSource { +pub(crate) struct FileSource { pub source: FileWrapper, } @@ -282,10 +285,25 @@ impl PreBufferedStream { } } +/// Compute a SHA-1 digest of a file path +pub fn checksum_file(path: &PathBuf) -> io::Result { + let mut checksum = sha1::Sha1::new(); + let mut reader = io::BufReader::new(fs::File::open(path)?); + let mut buf = Vec::new(); + buf.resize(2usize.pow(20), 0); + while let Ok(i) = reader.read(&mut buf) { + if i == 0 { + break; + } + checksum.update(&buf[..i]); + } + let x = base16ct::lower::encode_string(&checksum.finalize()); + Ok(x) +} + #[cfg(test)] mod test { use super::*; - use std::io::prelude::*; #[test] fn test_from_buffer() { diff --git a/src/lib.rs b/src/lib.rs index 5c2ab71..5a13b13 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,11 @@ //! 2. mzML & indexedmzML files using [`MzMLWriter`] in [`mzdata::io::mzml`](crate::io::mzml) //! 3. mzMLb files using [`MzMLbWriter`] in [`mzdata::io::mzmlb`](crate::io::mzmlb), if the `mzmlb` feature is enabled //! +//! This menagerie of different formats and gzip compression or not can be inferred from a path or [`io::Read`](std::io::Read) using [`io::infer_format`] and [`io::infer_from_stream`]. +//! Conventional dispatch requires boxing behind a trait, which is complicated but possible through [`io::open_file`]. The [`mz_read`] macro provides a convenient +//! means of working with an unboxed value, but with a limited scope. The [`mz_write`] macro is the equivalent for opening a writer. +//! There are additional tools for dealing with file format dispatch under development. +//! //! It also includes a set of representation layers for spectra in [`mzdata::spectrum`](crate::spectrum) //! //! # Example @@ -44,7 +49,8 @@ //! //! ## Traits //! The library makes heavy use of traits to abstract over the implementation details of different file formats. -//! These traits are included in [`mzdata::prelude`](crate::prelude). +//! These traits are included in [`mzdata::prelude`](crate::prelude). It also imports [`mzpeaks::prelude`]. +//! //! pub mod io; pub mod meta; @@ -62,6 +68,9 @@ pub use crate::io::mzmlb::{ MzMLbError, MzMLbReader, MzMLbWriter, MzMLbWriterBuilder, MzMLbWriterError, }; +#[cfg(feature = "thermo")] +pub use crate::io::thermo::ThermoRawReader; + pub use crate::params::{Param, ParamList}; pub use crate::spectrum::{CentroidSpectrum, RawSpectrum, Spectrum}; diff --git a/src/main.rs b/src/main.rs index f23de3a..5054fd1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,53 +1,18 @@ use std::env; -use std::fs; use std::io; use std::path; use std::process; use std::thread::spawn; use std::time; - use std::collections::HashMap; use std::sync::mpsc::sync_channel; -use mzdata::io::infer_from_stream; -#[cfg(feature = "mzmlb")] -use mzdata::io::mzmlb; -use mzdata::io::MassSpectrometryFormat; -use mzdata::io::PreBufferedStream; -use mzdata::io::{mgf, mzml}; +use mzdata::io::Source; use mzdata::prelude::*; use mzdata::spectrum::{ DeconvolutedSpectrum, MultiLayerSpectrum, RefPeakDataLevel, SignalContinuity, SpectrumLike, }; -use mzpeaks::PeakCollection; - -fn load_file + Clone>(path: P) -> io::Result> { - let reader = mzml::MzMLReader::open_path(path)?; - Ok(reader) -} - -fn load_mgf_file + Clone>(path: P) -> io::Result> { - let reader = mgf::MGFReader::open_path(path)?; - Ok(reader) -} - -#[cfg(feature = "mzmlb")] -fn load_mzmlb_file + Clone>(path: P) -> io::Result { - let reader = mzmlb::MzMLbReader::open_path(&path.into())?; - let blosc_threads = match std::env::var("BLOSC_NUM_THREADS") { - Ok(val) => match val.parse() { - Ok(nt) => nt, - Err(e) => { - eprintln!("Failed to parse BLOSC_NUM_THREADS env var: {}", e); - 4 - } - }, - Err(_) => 4, - }; - mzmlb::MzMLbReader::set_blosc_nthreads(blosc_threads); - Ok(reader) -} #[derive(Default)] struct MSDataFileSummary { @@ -92,7 +57,7 @@ impl MSDataFileSummary { } } - pub fn _scan_file(&mut self, reader: &mut R) { + pub fn _scan_file(&mut self, reader: &mut R) { let start = time::Instant::now(); reader.enumerate().for_each(|(i, scan)| { if i % 10000 == 0 && i > 0 { @@ -111,11 +76,11 @@ impl MSDataFileSummary { println!("{:0.3} seconds elapsed", elapsed.as_secs_f64()); } - pub fn scan_file(&mut self, reader: R) { + pub fn scan_file(&mut self, reader: R) { self.scan_file_threaded(reader) } - pub fn scan_file_threaded(&mut self, reader: R) { + pub fn scan_file_threaded(&mut self, reader: R) { let start = time::Instant::now(); let (sender, receiver) = sync_channel(2usize.pow(12)); let read_handle = spawn(move || { @@ -196,47 +161,13 @@ fn main() -> io::Result<()> { let mut summarizer = MSDataFileSummary::default(); if path.as_os_str() == "-" { - let mut stream = PreBufferedStream::new(io::stdin())?; - match infer_from_stream(&mut stream)? { - (MassSpectrometryFormat::MGF, false) => { - let reader = mgf::MGFReader::new(io::BufReader::new(stream)); - summarizer.scan_file(reader) - } - (MassSpectrometryFormat::MzML, false) => { - let reader = mzml::MzMLReader::new(stream); - summarizer.scan_file(reader) - } - #[cfg(feature = "mzmlb")] - (MassSpectrometryFormat::MzMLb, _) => { - eprintln!("Cannot read mzMLb files from STDIN"); - process::exit(1); - } - (_, _) => { - eprintln!("Could not infer format from STDIN"); - process::exit(1); - } - } - } else if let Some(ext) = path.extension() { - if ext.to_string_lossy().to_lowercase() == "mzmlb" { - #[cfg(feature = "mzmlb")] - { - let reader = load_mzmlb_file(path)?; - summarizer.scan_file(reader) - } - #[cfg(not(feature = "mzmlb"))] - { - panic!("Cannot read mzMLb file. Recompile enabling the `mzmlb` feature") - } - } else if ext.to_string_lossy().to_lowercase() == "mgf" { - let reader = load_mgf_file(path)?; - summarizer.scan_file(reader) - } else { - let reader = load_file(path)?; + mzdata::mz_read!(Source::Stdin, reader => { summarizer.scan_file(reader) - } + })?; } else { - let reader = load_file(path)?; - summarizer.scan_file(reader) + mzdata::mz_read!(path.as_ref(), reader => { + summarizer.scan_file(reader) + })?; }; summarizer.write_out(); diff --git a/src/meta/file_description.rs b/src/meta/file_description.rs index e0f36fc..9546638 100644 --- a/src/meta/file_description.rs +++ b/src/meta/file_description.rs @@ -1,3 +1,7 @@ +use std::path::Path; +use std::io; + +use crate::io::infer_format; use crate::impl_param_described; use crate::params::{Param, ParamDescribed, ParamList, CURIE, ControlledVocabulary}; @@ -11,6 +15,18 @@ pub struct SourceFile { pub params: ParamList, } +impl SourceFile { + pub fn from_path>(path: P) -> io::Result { + let path = path.as_ref(); + let (format, _gz) = infer_format(path)?; + let mut inst = Self::default(); + inst.name = path.file_name().map(|n| n.to_string_lossy().to_string()).unwrap_or_default(); + inst.location = path.canonicalize()?.parent().map(|s| format!("file://{}", s.to_string_lossy())).unwrap_or_else(|| "file://".to_string()); + inst.file_format = format.as_param(); + Ok(inst) + } +} + /// A description of the file data file and its contents #[derive(Debug, Clone, Default, PartialEq, Eq)] pub struct FileDescription { diff --git a/src/meta/traits.rs b/src/meta/traits.rs index a26bf40..2d30497 100644 --- a/src/meta/traits.rs +++ b/src/meta/traits.rs @@ -31,7 +31,7 @@ pub trait MSDataFileMetadata { /// Copy the metadata from another [`MSDataFileMetadata`] implementation into /// this one. - fn copy_metadata_from(&mut self, source: &T) { + fn copy_metadata_from(&mut self, source: &impl MSDataFileMetadata) { *self.data_processings_mut() = source.data_processings().clone(); *self.instrument_configurations_mut() = source.instrument_configurations().clone(); *self.file_description_mut() = source.file_description().clone(); diff --git a/src/params.rs b/src/params.rs index 85242fb..d71d570 100644 --- a/src/params.rs +++ b/src/params.rs @@ -486,7 +486,33 @@ const UO_CV: &str = "UO"; const MS_CV_BYTES: &[u8] = MS_CV.as_bytes(); const UO_CV_BYTES: &[u8] = UO_CV.as_bytes(); -impl ControlledVocabulary { + +/// Anything that can be converted into an accession code portion of a [`CURIE`] +#[derive(Debug, Clone)] +pub enum AccessionLike<'a> { + Text(Cow<'a, str>), + Number(u32) +} + +impl<'a> From for AccessionLike<'a> { + fn from(value: u32) -> Self { + Self::Number(value) + } +} + +impl<'a> From<&'a str> for AccessionLike<'a> { + fn from(value: &'a str) -> Self { + Self::Text(Cow::Borrowed(value)) + } +} + +impl<'a> From for AccessionLike<'a> { + fn from(value: String) -> Self { + Self::Text(Cow::Owned(value)) + } +} + +impl<'a> ControlledVocabulary { pub const fn prefix(&self) -> Cow<'static, str> { match &self { Self::MS => Cow::Borrowed(MS_CV), @@ -510,17 +536,27 @@ impl ControlledVocabulary { } } - pub fn param, S: Into>(&self, accession: A, name: S) -> Param { + pub fn param>, S: Into>(&self, accession: A, name: S) -> Param { let mut param = Param::new(); param.controlled_vocabulary = Some(*self); param.name = name.into(); - if let Some(nb) = accession.as_ref().split(':').nth(1) { - param.accession = Some(nb.parse().unwrap_or_else(|_| { - panic!( - "Expected accession to be numeric, got {}", - accession.as_ref() - ) - })); + + let accession: AccessionLike = accession.into(); + + match accession { + AccessionLike::Text(s) => { + if let Some(nb) = s.split(":").last() { + param.accession = Some(nb.parse().unwrap_or_else(|_| { + panic!( + "Expected accession to be numeric, got {}", + s + ) + })) + } + }, + AccessionLike::Number(n) => { + param.accession = Some(n) + }, } param } @@ -558,7 +594,7 @@ impl ControlledVocabulary { self.const_param(name, "", accession, unit) } - pub fn param_val, A: AsRef, V: ToString>( + pub fn param_val, A: Into>, V: ToString>( &self, accession: A, name: S, diff --git a/src/prelude.rs b/src/prelude.rs index 70dc2dc..790e049 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -1,7 +1,8 @@ //! A set of foundational traits used throughout the library. pub use crate::io::traits::{ - MZFileReader, RandomAccessSpectrumGroupingIterator, RandomAccessSpectrumIterator, ScanSource, - ScanWriter, SeekRead, SpectrumAccessError, SpectrumGrouping, + MZFileReader, RandomAccessSpectrumGroupingIterator, RandomAccessSpectrumIterator, + RandomAccessSpectrumSource as _, SpectrumSourceWithMetadata as _, SpectrumSource, + SpectrumWriter, SeekRead, SpectrumAccessError, SpectrumGrouping, }; pub use crate::meta::MSDataFileMetadata; @@ -18,3 +19,5 @@ pub use crate::spectrum::group::SpectrumGroupAveraging; pub use std::convert::TryInto; #[doc(hidden)] pub use std::io::prelude::*; +#[doc(hidden)] +pub use mzpeaks::prelude::*; \ No newline at end of file diff --git a/src/spectrum/scan_properties.rs b/src/spectrum/scan_properties.rs index ccfcadb..c5ce1a0 100644 --- a/src/spectrum/scan_properties.rs +++ b/src/spectrum/scan_properties.rs @@ -3,7 +3,7 @@ use std::fmt::Display; use num_traits::Float; use super::spectrum::{CentroidPeakAdapting, DeconvolutedPeakAdapting, SpectrumLike}; -use crate::io::traits::ScanSource; +use crate::io::traits::SpectrumSource; use crate::params::{ControlledVocabulary, Param, ParamLike, Unit, CURIE}; use crate::{impl_param_described, ParamList}; @@ -556,7 +556,7 @@ pub struct Precursor { } impl Precursor { - /// Given a ScanSource object, look up the precursor scan in it. + /// Given a SpectrumSource object, look up the precursor scan in it. /// This is useful when examining the area *around* where the precursor /// ion was or to obtain a snapshot of the retention time when the spectrum /// was scheduled. @@ -565,7 +565,7 @@ impl Precursor { C: CentroidPeakAdapting, D: DeconvolutedPeakAdapting, S: SpectrumLike, - R: ScanSource, + R: SpectrumSource, { match self.precursor_id.as_ref() { Some(id) => source.get_spectrum_by_id(id), @@ -573,7 +573,7 @@ impl Precursor { } } - /// Given a ScanSource object, look up the product scan in it. + /// Given a SpectrumSource object, look up the product scan in it. /// This is rarely needed unless you have manually separated [`Precursor`] /// objects from their spectra. pub fn product_spectrum(&self, source: &mut R) -> Option @@ -581,7 +581,7 @@ impl Precursor { C: CentroidPeakAdapting, D: DeconvolutedPeakAdapting, S: SpectrumLike, - R: ScanSource, + R: SpectrumSource, { match self.product_id.as_ref() { Some(id) => source.get_spectrum_by_id(id), diff --git a/src/spectrum/utils.rs b/src/spectrum/utils.rs index d229404..c1673a7 100644 --- a/src/spectrum/utils.rs +++ b/src/spectrum/utils.rs @@ -212,7 +212,7 @@ impl Collator { impl< C: CentroidLike + Default + Send + BuildArrayMapFrom + BuildFromArrayMap + Clone, D: DeconvolutedCentroidLike + Default + Send + BuildArrayMapFrom + BuildFromArrayMap + Clone, - > ScanWriter for Collator> + > SpectrumWriter for Collator> { fn write + 'static>(&mut self, spectrum: &S) -> std::io::Result { let k = spectrum.index(); @@ -246,7 +246,7 @@ impl< impl< C: CentroidLike + Default + Send + BuildArrayMapFrom + BuildFromArrayMap + Clone, D: DeconvolutedCentroidLike + Default + Send + BuildArrayMapFrom + BuildFromArrayMap + Clone, - > ScanWriter for Sender> { + > SpectrumWriter for Sender> { fn write + 'static>(&mut self, spectrum: &S) -> std::io::Result { let k = spectrum.index(); let peaks = spectrum.peaks().cloned(); @@ -293,7 +293,7 @@ impl< impl< C: CentroidLike + Default + Send + BuildArrayMapFrom + BuildFromArrayMap + Clone, D: DeconvolutedCentroidLike + Default + Send + BuildArrayMapFrom + BuildFromArrayMap + Clone, - > ScanWriter for SyncSender> { + > SpectrumWriter for SyncSender> { fn write + 'static>(&mut self, spectrum: &S) -> std::io::Result { let k = spectrum.index(); let peaks = spectrum.peaks().cloned();