From 6cd260fd6d0277430ecedde7f4aa5fe2bf09c702 Mon Sep 17 00:00:00 2001
From: Johannes Rainer <johannes.rainer@gmail.com>
Date: Wed, 25 Sep 2024 08:25:29 +0200
Subject: [PATCH] docs: restructure documentation

---
 DESCRIPTION           |   4 +-
 R/Spectra.R           |   4 +
 man/addProcessing.Rd  | 547 +++++++++++++++++++++++++++++++++
 man/combinePeaks.Rd   | 110 +++++++
 man/combineSpectra.Rd | 240 +++++++++++++++
 man/compareSpectra.Rd | 131 ++++++++
 man/filterMsLevel.Rd  | 689 ++++++++++++++++++++++++++++++++++++++++++
 man/spectraData.Rd    | 598 ++++++++++++++++++++++++++++++++++++
 8 files changed, 2321 insertions(+), 2 deletions(-)
 create mode 100644 man/addProcessing.Rd
 create mode 100644 man/combinePeaks.Rd
 create mode 100644 man/combineSpectra.Rd
 create mode 100644 man/compareSpectra.Rd
 create mode 100644 man/filterMsLevel.Rd
 create mode 100644 man/spectraData.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index a04e4ac3..0270d5db 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -40,9 +40,9 @@ Authors@R: c(person(given = "RforMassSpectrometry Package Maintainer",
 Depends:
     R (>= 4.0.0),
     S4Vectors,
-    BiocParallel,
-    ProtGenerics (>= 1.37.1)
+    BiocParallel
 Imports:
+    ProtGenerics (>= 1.37.1),
     methods,
     IRanges,
     MsCoreUtils (>= 1.7.5),
diff --git a/R/Spectra.R b/R/Spectra.R
index 179ee58c..045cf88a 100644
--- a/R/Spectra.R
+++ b/R/Spectra.R
@@ -1053,6 +1053,8 @@ asDataFrame <- function(object, i = seq_along(object),
 }
 
 #' @rdname spectraData
+#'
+#' @export
 setMethod("acquisitionNum", "Spectra", function(object)
     acquisitionNum(object@backend))
 
@@ -1195,6 +1197,8 @@ setMethod("mz", "Spectra", function(object, f = processingChunkFactor(object),
 })
 
 #' @rdname spectraData
+#'
+#' @export
 setMethod(
     "peaksData", "Spectra",
     function(object, columns = c("mz", "intensity"),
diff --git a/man/addProcessing.Rd b/man/addProcessing.Rd
new file mode 100644
index 00000000..787aeabe
--- /dev/null
+++ b/man/addProcessing.Rd
@@ -0,0 +1,547 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Spectra-functions.R, R/Spectra.R
+\name{applyProcessing}
+\alias{applyProcessing}
+\alias{processingLog}
+\alias{scalePeaks}
+\alias{addProcessing}
+\alias{bin}
+\alias{containsMz}
+\alias{containsNeutralLoss}
+\alias{entropy}
+\alias{pickPeaks}
+\alias{replaceIntensitiesBelow}
+\alias{reset}
+\alias{smooth}
+\alias{spectrapply}
+\alias{addProcessing,Spectra-method}
+\alias{bin,Spectra-method}
+\alias{containsMz,Spectra-method}
+\alias{containsNeutralLoss,Spectra-method}
+\alias{entropy,Spectra-method}
+\alias{entropy,ANY-method}
+\alias{pickPeaks,Spectra-method}
+\alias{replaceIntensitiesBelow,Spectra-method}
+\alias{reset,Spectra-method}
+\alias{smooth,Spectra-method}
+\alias{spectrapply,Spectra-method}
+\title{Data manipulation and analysis methods}
+\usage{
+applyProcessing(
+  object,
+  f = processingChunkFactor(object),
+  BPPARAM = bpparam(),
+  ...
+)
+
+processingLog(x)
+
+scalePeaks(x, by = sum, msLevel. = uniqueMsLevels(x))
+
+\S4method{addProcessing}{Spectra}(object, FUN, ..., spectraVariables = character())
+
+\S4method{bin}{Spectra}(
+  x,
+  binSize = 1L,
+  breaks = NULL,
+  msLevel. = uniqueMsLevels(x),
+  FUN = sum,
+  zero.rm = TRUE
+)
+
+\S4method{containsMz}{Spectra}(
+  object,
+  mz = numeric(),
+  tolerance = 0,
+  ppm = 20,
+  which = c("any", "all"),
+  BPPARAM = bpparam()
+)
+
+\S4method{containsNeutralLoss}{Spectra}(
+  object,
+  neutralLoss = 0,
+  tolerance = 0,
+  ppm = 20,
+  BPPARAM = bpparam()
+)
+
+\S4method{entropy}{Spectra}(object, normalized = TRUE)
+
+\S4method{entropy}{ANY}(object, ...)
+
+\S4method{pickPeaks}{Spectra}(
+  object,
+  halfWindowSize = 2L,
+  method = c("MAD", "SuperSmoother"),
+  snr = 0,
+  k = 0L,
+  descending = FALSE,
+  threshold = 0,
+  msLevel. = uniqueMsLevels(object),
+  ...
+)
+
+\S4method{replaceIntensitiesBelow}{Spectra}(
+  object,
+  threshold = min,
+  value = 0,
+  msLevel. = uniqueMsLevels(object)
+)
+
+\S4method{reset}{Spectra}(object, ...)
+
+\S4method{smooth}{Spectra}(
+  x,
+  halfWindowSize = 2L,
+  method = c("MovingAverage", "WeightedMovingAverage", "SavitzkyGolay"),
+  msLevel. = uniqueMsLevels(x),
+  ...
+)
+
+\S4method{spectrapply}{Spectra}(
+  object,
+  FUN,
+  ...,
+  chunkSize = integer(),
+  f = factor(),
+  BPPARAM = SerialParam()
+)
+}
+\arguments{
+\item{object}{A \code{Spectra} object.}
+
+\item{f}{For \code{spectrapply()} and \code{applyProcessing()}: \code{factor} defining
+how \code{object} should be splitted for eventual parallel processing.
+Defaults to \code{factor()} for \code{spectrapply()} hence the object is not
+splitted while it defaults to \code{f = processingChunkSize(object)} for
+\code{applyProcessing()} splitting thus the object by default into chunks
+depending on \code{\link[=processingChunkSize]{processingChunkSize()}}.}
+
+\item{BPPARAM}{Parallel setup configuration. See \code{\link[=bpparam]{bpparam()}} for more
+information. This is passed directly to the \code{\link[=backendInitialize]{backendInitialize()}} method
+of the \linkS4class{MsBackend}. See also \code{\link[=processingChunkSize]{processingChunkSize()}} for
+additional information on parallel processing.}
+
+\item{...}{Additional arguments passed to internal and downstream functions.}
+
+\item{x}{A \code{Spectra}.}
+
+\item{by}{For \code{scalePeaks()}: function to calculate a single \code{numeric} from
+intensity values of a spectrum by which all intensities (of
+that spectrum) should be divided by. The default \code{by = sum} will
+divide intensities of each spectrum by the sum of intensities of that
+spectrum.}
+
+\item{msLevel.}{\code{integer} defining the MS level(s) of the spectra to which
+the function should be applied (defaults to all MS levels of \code{object}.}
+
+\item{FUN}{For \code{addProcessing()}: function to be applied to the peak matrix
+of each spectrum in \code{object}.
+For \code{bin()}: function to aggregate intensity values of peaks falling
+into the same bin. Defaults to \code{FUN = sum} thus summing up intensities.
+For \code{spectrapply()} and \code{chunkapply()}: function to be applied to
+each individual or each chunk of \code{Spectra}.}
+
+\item{spectraVariables}{For \code{addProcessing()}: \code{character} with additional
+spectra variables that should be passed along to the function defined
+with \code{FUN}. See function description for details.}
+
+\item{binSize}{For \code{bin()}: \code{numeric(1)} defining the size for the m/z bins.
+Defaults to \code{binSize = 1}.}
+
+\item{breaks}{For \code{bin()}: \code{numeric} defining the m/z breakpoints between
+bins.}
+
+\item{zero.rm}{For \code{bin()}: \code{logical(1)} indicating whether to remove bins
+with zero intensity. Defaults to \code{TRUE}, meaning the function will
+discard bins created with an intensity of 0 to enhance memory
+efficiency.}
+
+\item{mz}{For \code{containsMz()}: \code{numeric} with the m/z value(s) of the mass
+peaks to check.}
+
+\item{tolerance}{For \code{containsMz()} and \code{neutralLoss()}:
+\code{numeric(1)} allowing to define a constant maximal accepted difference
+between m/z values for peaks to be matched.}
+
+\item{ppm}{For \code{containsMz()} and \code{neutralLoss()}: \code{numeric(1)} defining a
+relative, m/z-dependent, maximal accepted difference between m/z values
+for peaks to be matched.}
+
+\item{which}{For \code{containsMz()}: either \code{"any"} or \code{"all"} defining whether
+any (the default) or all provided \code{mz} have to be present in the
+spectrum.}
+
+\item{neutralLoss}{for \code{containsNeutralLoss()}: \code{numeric(1)} defining the
+value which should be subtracted from the spectrum's precursor m/z.}
+
+\item{normalized}{for \code{entropy()}: \code{logical(1)} whether the normalized
+entropy should be calculated (default). See also \code{\link[=nentropy]{nentropy()}} for
+details.}
+
+\item{halfWindowSize}{For \code{pickPeaks()}: \code{integer(1)}, used in the
+identification of the mass peaks: a local maximum has to be the
+maximum in the window from \code{(i - halfWindowSize):(i + halfWindowSize)}.
+For \code{smooth()}: \code{integer(1)}, used in the smoothing algorithm, the
+window reaches from \code{(i - halfWindowSize):(i + halfWindowSize)}.}
+
+\item{method}{For \code{pickPeaks()}: \code{character(1)}, the noise estimators that
+should be used, currently the the \emph{M}edian \emph{A}bsolute \emph{D}eviation
+(\code{method = "MAD"}) and Friedman's Super Smoother
+(\code{method = "SuperSmoother"}) are supported.
+For \code{smooth()}: \code{character(1)}, the smoothing function that should be
+used, currently, the Moving-Average- (\code{method = "MovingAverage"}),
+Weighted-Moving-Average- (\verb{method = "WeightedMovingAverage")},
+Savitzky-Golay-Smoothing (\code{method = "SavitzkyGolay"}) are supported.}
+
+\item{snr}{For \code{pickPeaks()}: \code{double(1)} defining the
+\emph{S}ignal-to-\emph{N}oise-\emph{R}atio. The intensity of a local maximum has to be
+higher than \code{snr * noise} to be considered as peak.}
+
+\item{k}{For \code{pickPeaks()}: \code{integer(1)}, number of values left and right of
+the peak that should be considered in the weighted mean calculation.}
+
+\item{descending}{For \code{pickPeaks()}: \code{logical}, if \code{TRUE} just values
+betwee the nearest valleys around the peak centroids are used.}
+
+\item{threshold}{For \code{pickPeaks()}: a \code{numeric(1)} defining the proportion
+of the maximal peak intensity. Only values above the threshold are
+used for the weighted mean calculation.
+For \code{replaceIntensitiesBelow()}: a \code{numeric(1)} defining the threshold
+or a \code{function} to calculate the threshold for each spectrum on its
+intensity values. Defaults to \code{threshold = min}.}
+
+\item{value}{For \code{replaceIntensitiesBelow()}: \code{numeric(1)} defining the
+value with which intensities should be replaced with.}
+
+\item{chunkSize}{For \code{spectrapply()}: size of the chunks into which the
+\code{Spectra} should be split. This parameter overrides parameters
+\code{f} and \code{BPPARAM}.}
+}
+\value{
+See the documentation of the individual functions for a description of the
+return value.
+}
+\description{
+Various data analysis functions are available for \code{Spectra} objects. These
+can be categorized into functions that either return a \code{Spectra} object
+(with the manipulated data) and functions that directly return the
+result from the calculation. For the former category, the data manipulations
+are cached in the result object's \emph{processing queue} and only exectuted
+on-the-fly when the respective data gets extracted from the \code{Spectra} (see
+section \emph{The processing queue} for more information).
+
+For the second category, the calculations are directly executed and the
+result, usually one value per spectrum, returned. Generally, to reduce
+memory demand, a chunk-wise processing of the data is performed.
+}
+\section{Data analysis methods returning a \code{Spectra}}{
+
+
+The methods listed here return a \code{Spectra} object as a result.
+\itemize{
+\item \code{addProcessing()}: adds an arbitrary function that should be applied to the
+peaks matrix of every spectrum in \code{object}. The function (can be passed
+with parameter \code{FUN}) is expected to take a peaks matrix as input and to
+return a peaks matrix. A peaks matrix is a numeric matrix with two columns,
+the first containing the m/z values of the peaks and the second the
+corresponding intensities. The function has to have \code{...} in its
+definition. Additional arguments can be passed with \code{...}. With parameter
+\code{spectraVariables} it is possible to define additional spectra variables
+from \code{object} that should be passed to the function \code{FUN}. These will be
+passed by their name (e.g. specifying \code{spectraVariables = "precursorMz"}
+will pass the spectra's precursor m/z as a parameter named \code{precursorMz}
+to the function. The only exception is the spectra's MS level, these will
+be passed to the function as a parameter called \code{spectrumMsLevel} (i.e.
+with \code{spectraVariables = "msLevel"} the MS levels of each spectrum will be
+submitted to the function as a parameter called \code{spectrumMsLevel}).
+Examples are provided in the package vignette.
+\item \code{bin()}: aggregates individual spectra into discrete (m/z) bins. Binning is
+performed only on spectra of the specified MS level(s) (parameter
+\code{msLevel}, by default all MS levels of \code{x}). The bins can be defined with
+parameter \code{breaks} which by default are equally sized bins, with size
+being defined by parameter \code{binSize}, from the minimal to the maximal m/z
+of all spectra (of MS level \code{msLevel}) within \code{x}. The same bins are used
+for all spectra in \code{x}. All intensity values for peaks falling into the
+same bin are aggregated using the function provided with parameter \code{FUN}
+(defaults to \code{FUN = sum}, i.e. all intensities are summed up). Note that
+the binning operation is applied to the peak data on-the-fly upon data
+access and it is possible to \emph{revert} the operation with the \code{reset()}
+function (see description of \code{reset()} below).
+\item \code{countIdentifications}: counts the number of identifications each scan has
+led to. See \code{\link[=countIdentifications]{countIdentifications()}} for more details.
+\item \code{pickPeaks()}: picks peaks on individual spectra using a moving
+window-based approach (window size = \code{2 * halfWindowSize}). For noisy
+spectra there are currently two different noise estimators available,
+the \emph{M}edian \emph{A}bsolute \emph{D}eviation (\code{method = "MAD"}) and
+Friedman's Super Smoother (\code{method = "SuperSmoother"}),
+as implemented in the \code{\link[MsCoreUtils:noise]{MsCoreUtils::noise()}}.
+The method supports also to optionally \emph{refine} the m/z value of
+the identified centroids by considering data points that belong (most
+likely) to the same mass peak. Therefore the m/z value is calculated as an
+intensity weighted average of the m/z values within the peak region.
+The peak region is defined as the m/z values (and their respective
+intensities) of the \code{2 * k} closest signals to the centroid or the closest
+valleys (\code{descending = TRUE}) in the \code{2 * k} region. For the latter the \code{k}
+has to be chosen general larger. See \code{\link[MsCoreUtils:refineCentroids]{MsCoreUtils::refineCentroids()}} for
+details.
+If the ratio of the signal to the highest intensity of the peak is below
+\code{threshold} it will be ignored for the weighted average.
+\item \code{replaceIntensitiesBelow()}: replaces intensities below a specified
+threshold with the provided \code{value}. Parameter \code{threshold} can be either
+a single numeric value or a function which is applied to all non-\code{NA}
+intensities of each spectrum to determine a threshold value for each
+spectrum. The default is \code{threshold = min} which replaces all values
+which are <= the minimum intensity in a spectrum with \code{value} (the
+default for \code{value} is \code{0}). Note that the function specified with
+\code{threshold} is expected to have a parameter \code{na.rm} since \code{na.rm = TRUE}
+will be passed to the function. If the spectrum is in profile mode,
+ranges of successive non-0 peaks <= \code{threshold} are set to 0.
+Parameter \code{msLevel.} allows to apply this to only spectra of certain MS
+level(s).
+\item \code{scalePeaks()}: scales intensities of peaks within each spectrum depending
+on parameter \code{by}. With \code{by = sum} (the default) peak intensities are
+divided by the sum of peak intensities within each spectrum. The sum of
+intensities is thus 1 for each spectrum after scaling. Parameter
+\code{msLevel.} allows to apply the scaling of spectra of a certain MS level.
+By default (\code{msLevel. = uniqueMsLevels(x)}) intensities for all
+spectra will be scaled.
+\item \code{smooth()}: smooths individual spectra using a moving window-based approach
+(window size = \code{2 * halfWindowSize}). Currently, the
+Moving-Average- (\code{method = "MovingAverage"}),
+Weighted-Moving-Average- (\verb{method = "WeightedMovingAverage")},
+weights depending on the distance of the center and calculated
+\code{1/2^(-halfWindowSize:halfWindowSize)}) and
+Savitzky-Golay-Smoothing (\code{method = "SavitzkyGolay"}) are supported.
+For details how to choose the correct \code{halfWindowSize} please see
+\code{\link[MsCoreUtils:smooth]{MsCoreUtils::smooth()}}.
+}
+}
+
+\section{Data analysis methods returning the result from the calculation}{
+
+
+The functions listed in this section return immediately the result from the
+calculation. To reduce memory demand (and allow parallel processing) the
+calculations a chunk-wise processing is generally performed.
+\itemize{
+\item \code{chunkapply()}: apply an arbitrary function to chunks of spectra. See
+\code{\link[=chunkapply]{chunkapply()}} for details and examples.
+\item \code{containsMz()}: checks for each of the spectra whether they contain mass
+peaks with an m/z equal to \code{mz} (given acceptable difference as defined by
+parameters \code{tolerance} and \code{ppm} - see \code{\link[=common]{common()}} for details). Parameter
+\code{which} allows to define whether any (\code{which = "any"}, the default) or
+all (\code{which = "all"}) of the \code{mz} have to match. The function returns
+\code{NA} if \code{mz} is of length 0 or is \code{NA}.
+\item \code{containsNeutralLoss()}: checks for each spectrum in \code{object} if it has a
+peak with an m/z value equal to its precursor m/z - \code{neutralLoss} (given
+acceptable difference as defined by parameters \code{tolerance} and \code{ppm}).
+Returns \code{NA} for MS1 spectra (or spectra without a precursor m/z).
+\item \code{entropy()}: calculates the entropy of each spectra based on the metrics
+suggested by Li et al. (https://doi.org/10.1038/s41592-021-01331-z).
+See also \code{\link[=nentropy]{nentropy()}} in the \emph{MsCoreUtils} package for details.
+\item \code{estimatePrecursorIntensity()}: defines the precursor intensities for MS2
+spectra using the intensity of the matching MS1 peak from the
+closest MS1 spectrum (i.e. the last MS1 spectrum measured before the
+respective MS2 spectrum). With \code{method = "interpolation"} it is also
+possible to calculate the precursor intensity based on an interpolation of
+intensity values (and retention times) of the matching MS1 peaks from the
+previous and next MS1 spectrum. See \code{\link[=estimatePrecursorIntensity]{estimatePrecursorIntensity()}} for
+examples and more details.
+\item \code{estimatePrecursorMz()}: \strong{for DDA data}: allows to estimate a fragment
+spectra's precursor m/z based on the reported precursor m/z and the data
+from the previous MS1 spectrum. See \code{\link[=estimatePrecursorMz]{estimatePrecursorMz()}} for details.
+\item \code{neutralLoss()}: calculates neutral loss spectra for fragment spectra. See
+\code{\link[=neutralLoss]{neutralLoss()}} for detailed documentation.
+\item \code{spectrapply()}: applies a given function to each individual spectrum or
+sets of a \code{Spectra} object. By default, the \code{Spectra} is split into
+individual spectra (i.e. \code{Spectra} of length 1) and the function \code{FUN}
+is applied to each of them. An alternative splitting can be defined with
+parameter \code{f}. Parameters for \code{FUN} can be passed using \code{...}.
+The returned result and its order depend on the function \code{FUN} and how
+\code{object} is split (hence on \code{f}, if provided). Parallel processing is
+supported and can be configured with parameter \code{BPPARAM}, is however only
+suggested for computational intense \code{FUN}.
+As an alternative to the (eventual parallel) processing of the full
+\code{Spectra}, \code{spectrapply()} supports also a chunk-wise processing. For this,
+parameter \code{chunkSize} needs to be specified. \code{object} is then split into
+chunks of size \code{chunkSize} which are then (stepwise) processed by \code{FUN}.
+This guarantees a lower memory demand (especially for on-disk backends)
+since only the data for one chunk needs to be loaded into memory in each
+iteration. Note that by specifying \code{chunkSize}, parameters \code{f} and
+\code{BPPARAM} will be ignored.
+See also \code{chunkapply()} above or examples below for details on chunk-wise
+processing.
+}
+}
+
+\section{The processing queue}{
+
+
+Operations that modify mass peak data, i.e. the m/z and intensity values of
+a \code{Spectra} are generally not applied immediately to the data but are
+\emph{cached} within the object's \emph{processing queue}. These operations are then
+applied to the data only upon request, for example when m/z and/or
+intensity values are extracted. This lazy execution guarantees that the
+same functionality can be applied to any \code{Spectra} object, regardless of
+the type of backend that is used. Thus, data manipulation operations can
+also be applied to data that is \emph{read only}. As a side effect, this enables
+also to \emph{undo} operations using the \code{reset()} function.
+
+Functions related to the processing queue are:
+\itemize{
+\item \code{applyProcessing()}: for \code{Spectra} objects that use a \strong{writeable} backend
+only: apply all steps from the lazy processing queue to the peak data and
+write it back to the data storage. Parameter \code{f} allows to specify how
+\code{object} should be split for parallel processing. This should either be
+equal to the \code{dataStorage}, or \code{f = rep(1, length(object))} to disable
+parallel processing alltogether. Other partitionings might result in
+errors (especially if a \code{MsBackendHdf5Peaks} backend is used).
+\item \code{processingLog()}: returns a \code{character} vector with the processing log
+messages.
+\item \code{reset()}: restores the data to its original state (as much as possible):
+removes any processing steps from the lazy processing queue and calls
+\code{reset()} on the backend which, depending on the backend, can also undo
+e.g. data filtering operations. Note that a \verb{reset*(} call after
+\code{applyProcessing()} will not have any effect. See examples below for more
+information.
+}
+}
+
+\examples{
+
+## Load a `Spectra` object with LC-MS/MS data.
+fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML",
+    package = "msdata")
+sps_dda <- Spectra(fl)
+sps_dda
+
+
+##  --------  FUNCTIONS RETURNING A SPECTRA  --------
+
+## Replace peak intensities below 40 with a value of 1
+sps_mod <- replaceIntensitiesBelow(sps_dda, threshold = 20, value = 1)
+sps_mod
+
+## Get the intensities of the first spectrum before and after the
+## operation
+intensity(sps_dda[1])
+intensity(sps_mod[1])
+
+## Remove all peaks with an intensity below 5.
+sps_mod <- filterIntensity(sps_dda, intensity = c(5, Inf))
+
+intensity(sps_mod)
+
+## In addition it is possible to pass a function to `filterIntensity()`: in
+## the example below we want to keep only peaks that have an intensity which
+## is larger than one third of the maximal peak intensity in that spectrum.
+keep_peaks <- function(x, prop = 3) {
+    x > max(x, na.rm = TRUE) / prop
+}
+sps_mod <- filterIntensity(sps_dda, intensity = keep_peaks)
+intensity(sps_mod)
+
+## We can also change the proportion by simply passing the `prop` parameter
+## to the function. To keep only peaks that have an intensity which is
+## larger than half of the maximum intensity:
+sps_mod <- filterIntensity(sps_dda, intensity = keep_peaks, prop = 2)
+intensity(sps_mod)
+
+## With the `scalePeaks()` function we can alternatively scale the
+## intensities of mass peaks per spectrum to relative intensities. This
+## is specifically useful for fragment (MS2) spectra. We below thus
+## scale the intensities per spectrum by the total sum of intensities
+## (such that the sum of all intensities per spectrum is 1).
+## Below we scale the intensities of all MS2 spectra in our data set.
+sps_mod <- scalePeaks(sps_dda, msLevel = 2L)
+
+## MS1 spectra were not affected
+sps_mod |>
+    filterMsLevel(1L) |>
+    intensity()
+
+## Intensities of MS2 spectra were scaled
+sps_mod |>
+    filterMsLevel(2L) |>
+    intensity()
+
+## Since data manipulation operations are by default not directly applied to
+## the data but only cached in the internal processing queue, it is also
+## possible to remove these data manipulations with the `reset()` function:
+tmp <- reset(sps_mod)
+tmp
+lengths(sps_dda) |> head()
+lengths(sps_mod) |> head()
+lengths(tmp) |> head()
+
+## Data manipulation operations cached in the processing queue can also be
+## applied to the mass peaks data with the `applyProcessing()` function, if
+## the `Spectra` uses a backend that supports that (i.e. allows replacing
+## the mass peaks data). Below we first change the backend to a
+## `MsBackendMemory()` and then use the `applyProcessing()` to modify the
+## mass peaks data
+sps_dda <- setBackend(sps_dda, MsBackendMemory())
+sps_mod <- filterIntensity(sps_dda, intensity = c(5, Inf))
+sps_mod <- applyProcessing(sps_mod)
+sps_mod
+
+## While we can't *undo* this filtering operation now using the `reset()`
+## function, accessing the data would now be faster, because the operation
+## does no longer to be applied to the original data before returning to the
+## user.
+
+
+##  --------  FUNCTIONS RETURNING THE RESULT  --------
+
+## With the `spectrapply()` function it is possible to apply an
+## arbitrary function to each spectrum in a Spectra.
+## In the example below we calculate the mean intensity for each spectrum
+## in a subset of the sciex_im data. Note that we can access all variables
+## of each individual spectrum either with the `$` operator or the
+## corresponding method.
+res <- spectrapply(sps_dda[1:20], FUN = function(x) mean(x$intensity[[1]]))
+head(res)
+
+## As an alternative, applying a function `FUN` to a `Spectra` can be
+## performed *chunk-wise*. The advantage of this is, that only the data for
+## one chunk at a time needs to be loaded into memory reducing the memory
+## demand. This type of processing can be performed by specifying the size
+## of the chunks (i.e. number of spectra per chunk) with the `chunkSize`
+## parameter
+spectrapply(sps_dda[1:20], lengths, chunkSize = 5L)
+
+## Precursor intensity estimation. Some manufacturers don't report the
+## precursor intensity for MS2 spectra:
+sps_dda |>
+    filterMsLevel(2L) |>
+    precursorIntensity()
+
+## This intensity can however be estimated from the previously measured
+## MS1 scan with the `estimatePrecursorIntensity()` function:
+pi <- estimatePrecursorIntensity(sps_dda)
+
+## This function returned the result as a `numeric` vector with one
+## value per spectrum:
+pi
+
+## We can replace the precursor intensity values of the originating
+## object:
+sps_dda$precursorIntensity <- pi
+sps_dda |>
+    filterMsLevel(2L) |>
+    precursorIntensity()
+
+}
+\seealso{
+\itemize{
+\item \code{\link[=compareSpectra]{compareSpectra()}} for calculation of spectra similarity scores.
+\item \code{\link[=processingChunkSize]{processingChunkSize()}} for information on parallel and chunk-wise data
+processing.
+\item \link{Spectra} for a general description of the \code{Spectra} object.
+}
+}
+\author{
+Sebastian Gibb, Johannes Rainer, Laurent Gatto, Philippine Louail, Nir Shahaf, Mar Garcia-Aloy
+}
diff --git a/man/combinePeaks.Rd b/man/combinePeaks.Rd
new file mode 100644
index 00000000..a59b8f24
--- /dev/null
+++ b/man/combinePeaks.Rd
@@ -0,0 +1,110 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Spectra.R
+\name{combinePeaks}
+\alias{combinePeaks}
+\alias{combinePeaks,Spectra-method}
+\title{Aggregating and combining mass peaks data}
+\usage{
+\S4method{combinePeaks}{Spectra}(
+  object,
+  tolerance = 0,
+  ppm = 20,
+  intensityFun = base::mean,
+  mzFun = base::mean,
+  weighted = TRUE,
+  msLevel. = uniqueMsLevels(object),
+  ...
+)
+}
+\arguments{
+\item{object}{A \code{Spectra} object.}
+
+\item{tolerance}{\code{numeric(1)} allowing to define a constant maximal
+accepted difference between m/z values for peaks to be grouped. Default
+is \code{tolerance = 0}.}
+
+\item{ppm}{\code{numeric(1)} defining a relative, m/z-dependent, maximal
+accepted difference between m/z values for peaks to be grouped. Default
+is \code{ppm = 20}.}
+
+\item{intensityFun}{Function to aggregate intensities for all peaks in
+each peak group into a single intensity value.}
+
+\item{mzFun}{Function to aggregate m/z values for all mass peaks within
+each peak group into a single m/z value. This parameter is ignored if
+\code{weighted = TRUE} (the default).}
+
+\item{weighted}{\code{logical(1)} whether m/z values of peaks within each peak
+group should be aggregated into a single m/z value using an
+intensity-weighted mean. Defaults to \code{weighted = TRUE}.}
+
+\item{msLevel.}{\code{integer} defining the MS level(s) of the spectra to which
+the function should be applied (defaults to all MS levels of \code{object}.}
+
+\item{...}{ignored.}
+}
+\description{
+In addition to aggregating content of spectra variables (describe in
+\code{\link[=combineSpectra]{combineSpectra()}}) it is also possible to aggregate and combine mass peaks
+data from individual spectra within a \code{Spectra}. These \code{combinePeaks()}
+function combines mass peaks \strong{within each spectrum} with a difference in
+their m/z values that is smaller than the maximal acceptable difference
+defined by \code{ppm} and \code{tolerance}. Parameters \code{intensityFun} and \code{mzFun}
+allow to define functions to aggregate the intensity and m/z values for
+each such group of peaks. With \code{weighted = TRUE} (the default), the m/z
+value of the combined peak is calculated using an intensity-weighted mean
+and parameter \code{mzFun} is ignored. The \code{\link[MsCoreUtils:group]{MsCoreUtils::group()}} function is
+used for the grouping of mass peaks. Parameter \code{msLevel.} allows to define
+selected MS levels for which peaks should be combined. This function
+returns a \code{Spectra} with the same number of spectra than the input object,
+but with possibly combined peaks within each spectrum.
+Additional peak variables (other than \code{"mz"} and \code{"intensity"}) are
+dropped (i.e. their values are replaced with \code{NA}) for combined peaks
+unless they are constant across the combined peaks. See also
+\code{\link[=reduceSpectra]{reduceSpectra()}} for a function to select a single \emph{representative}
+mass peak for each peak group.
+}
+\examples{
+
+## Create a Spectra from mzML files and use the `MsBackendMzR` on-disk
+## backend.
+sciex_file <- dir(system.file("sciex", package = "msdata"),
+    full.names = TRUE)
+sciex <- Spectra(sciex_file, backend = MsBackendMzR())
+
+## Combine mass peaks per spectrum with a difference in their m/z value
+## that is smaller than 20 ppm. The intensity values of such peaks are
+## combined by summing their values, while for the m/z values the median
+## is reported
+sciex_comb <- combinePeaks(sciex, ppm = 20,
+    intensityFun = sum, mzFun = median)
+
+## Comparing the number of mass peaks before and after aggregation
+lengths(sciex) |> head()
+lengths(sciex_comb) |> head()
+
+## Plotting the first spectrum before and after aggregation
+par(mfrow = c(1, 2))
+plotSpectra(sciex[2L])
+plotSpectra(sciex_comb[2L])
+
+## Using `reduceSpectra()` to keep for each group of mass peaks with a
+## difference in their m/z values < 20ppm the one with the highest intensity.
+sciex_red <- reduceSpectra(sciex, ppm = 20)
+
+## Comparing the number of mass peaks before and after the operation
+lengths(sciex) |> head()
+lengths(sciex_red) |> head()
+}
+\seealso{
+\itemize{
+\item \code{\link[=combineSpectra]{combineSpectra()}} for functions to combine or aggregate \code{Spectra}'s
+spectra data.
+\item \code{\link[=combinePeaksData]{combinePeaksData()}} for the function to combine the mass peaks data.
+\item \code{\link[=reduceSpectra]{reduceSpectra()}} and similar functions to filter mass peaks data.
+\item \link{Spectra} for a general description of the \code{Spectra} object.
+}
+}
+\author{
+Sebastian Gibb, Johannes Rainer, Laurent Gatto
+}
diff --git a/man/combineSpectra.Rd b/man/combineSpectra.Rd
new file mode 100644
index 00000000..d4f7bdb0
--- /dev/null
+++ b/man/combineSpectra.Rd
@@ -0,0 +1,240 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Spectra-functions.R, R/Spectra.R
+\name{concatenateSpectra}
+\alias{concatenateSpectra}
+\alias{combineSpectra}
+\alias{joinSpectraData}
+\alias{split}
+\alias{c,Spectra-method}
+\alias{split,Spectra,ANY-method}
+\title{Merging, aggregating and splitting Spectra}
+\usage{
+concatenateSpectra(x, ...)
+
+combineSpectra(
+  x,
+  f = x$dataStorage,
+  p = x$dataStorage,
+  FUN = combinePeaksData,
+  ...,
+  BPPARAM = bpparam()
+)
+
+joinSpectraData(x, y, by.x = "spectrumId", by.y, suffix.y = ".y")
+
+\S4method{c}{Spectra}(x, ...)
+
+\S4method{split}{Spectra,ANY}(x, f, drop = FALSE, ...)
+}
+\arguments{
+\item{x}{A \code{Spectra} object.}
+
+\item{...}{Additional arguments.}
+
+\item{f}{For \code{split()}: factor defining how to split \code{x}. See \code{\link[base:split]{base::split()}}
+for details.
+For \code{combineSpectra()}: \code{factor} defining the grouping of the spectra
+that should be combined. Defaults to \code{x$dataStorage}.}
+
+\item{p}{For \code{combineSpectra()}: \code{factor} defining how to split the input
+\code{Spectra} for parallel processing. Defaults to \code{x$dataStorage}, i.e.,
+depending on the used backend, per-file parallel processing will be
+performed.}
+
+\item{FUN}{For \code{combineSpectra()}: function to combine the (peak matrices)
+of the spectra. Defaults to \code{\link[=combinePeaksData]{combinePeaksData()}}.}
+
+\item{BPPARAM}{Parallel setup configuration. See \code{\link[=bpparam]{bpparam()}} for more
+information. This is passed directly to the \code{\link[=backendInitialize]{backendInitialize()}} method
+of the \linkS4class{MsBackend}.}
+
+\item{y}{A \code{DataFrame} with the spectra variables to join/add.}
+
+\item{by.x}{A \code{character(1)} specifying the spectra variable used
+for merging. Default is \code{"spectrumId"}.}
+
+\item{by.y}{A \code{character(1)} specifying the column used for
+merging. Set to \code{by.x} if missing.}
+
+\item{suffix.y}{A \code{character(1)} specifying the suffix to be used
+for making the names of columns in the merged spectra variables
+unique. This suffix will be used to amend \code{names(y)}, while
+\code{spectraVariables(x)} will remain unchanged.}
+
+\item{drop}{For \code{split()}: not considered.}
+}
+\description{
+Various functions are availabe to combine, aggregate or split data from one
+of more \code{Spectra} objects. These are:
+\itemize{
+\item \code{c()} and \code{concatenateSpectra()}: combines several \code{Spectra} objects into
+a single object. The resulting \code{Spectra} contains all data from all
+individual \code{Spectra}, i.e. the union of all their spectra variables.
+Concatenation will fail if the processing queue of any of the \code{Spectra}
+objects is not empty or if different backends are used for the \code{Spectra}
+objects. In such cases it is suggested to first change the backends of
+all \code{Spectra} to the same type of backend (using the \code{\link[=setBackend]{setBackend()}}
+function and to eventually (if needed) apply the processing queue using
+the \code{\link[=applyProcessing]{applyProcessing()}} function.
+\item \code{combineSpectra()}: combines sets of spectra (defined with parameter \code{f})
+into a single spectrum per set aggregating their MS data (i.e. their
+\emph{peaks data} matrices with the \emph{m/z} and intensity values of their
+mass peaks). The spectra variable values of the first spectrum per set
+are reported for the combined spectrum. The peak matrices of the spectra
+per set are combined using the function specified with parameter \code{FUN}
+which uses by default the \code{\link[=combinePeaksData]{combinePeaksData()}} function. See the
+documentation of \code{\link[=combinePeaksData]{combinePeaksData()}} for details on the aggregation of
+the peak data and the package vignette for examples.
+The sets of spectra can be specified with parameter \code{f} which is expected
+to be a \code{factor} or \code{vector} of length equal to the length of the
+\code{Spectra} specifying to which set a spectrum belongs to. The function
+returns a \code{Spectra} of length equal to the unique levels of \code{f}. The
+optional parameter \code{p} allows to define how the \code{Spectra} should be
+split for potential parallel processing. The default is
+\code{p = x$dataStorage} and hence a per storage file parallel processing is
+applied for \code{Spectra} with on disk data representations (such as the
+\code{\link[=MsBackendMzR]{MsBackendMzR()}}). This also prevents that spectra from different data
+files/samples are combined (eventually use e.g. \code{p = x$dataOrigin} or any
+other spectra variables defining the originating samples for a spectrum).
+Before combining the peaks data, all eventual present processing steps are
+applied (by calling \code{\link[=applyProcessing]{applyProcessing()}} on the \code{Spectra}). This function
+will replace the original \emph{m/z} and intensity values of a \code{Spectra} hence
+it can not be called on a \code{Spectra} with a \emph{read-only} backend. In such
+cases, the backend should be changed to a \emph{writeable} backend before
+using the \code{\link[=setBackend]{setBackend()}} function (to e.g. a \code{\link[=MsBackendMemory]{MsBackendMemory()}} backend).
+\item \code{joinSpectraData()}: Individual spectra variables can be directly
+added with the \verb{$<-} or \verb{[[<-} syntax. The \code{joinSpectraData()}
+function allows to merge a \code{DataFrame} to the existing spectra
+data of a \code{Spectra}. This function diverges from the \code{\link[=merge]{merge()}} method in
+two main ways:
+\itemize{
+\item The \code{by.x} and \code{by.y} column names must be of length 1.
+\item If variable names are shared in \code{x} and \code{y}, the spectra
+variables of \code{x} are not modified. It's only the \code{y}
+variables that are appended with the suffix defined in
+\code{suffix.y}. This is to avoid modifying any core spectra
+variables that would lead to an invalid object.
+\item Duplicated Spectra keys (i.e. \code{x[[by.x]]}) are not
+allowed. Duplicated keys in the \code{DataFrame} (i.e \code{y[[by.y]]})
+throw a warning and only the last occurrence is kept. These
+should be explored and ideally be removed using for
+\code{QFeatures::reduceDataFrame()}, \code{PMS::reducePSMs()} or similar
+functions.
+}
+\item \code{split()}: splits the \code{Spectra} object based on parameter \code{f} into a \code{list}
+of \code{Spectra} objects.
+}
+}
+\examples{
+
+## Create a Spectra providing a `DataFrame` containing a MS data.
+
+spd <- DataFrame(msLevel = c(1L, 2L), rtime = c(1.1, 1.2))
+spd$mz <- list(c(100, 103.2, 104.3, 106.5), c(45.6, 120.4, 190.2))
+spd$intensity <- list(c(200, 400, 34.2, 17), c(12.3, 15.2, 6.8))
+
+s <- Spectra(spd)
+s
+
+## Create a second Spectra from mzML files and use the `MsBackendMzR`
+## on-disk backend.
+sciex_file <- dir(system.file("sciex", package = "msdata"),
+    full.names = TRUE)
+sciex <- Spectra(sciex_file, backend = MsBackendMzR())
+sciex
+
+## Subset to the first 100 spectra to reduce running time of the examples
+sciex <- sciex[1:100]
+
+
+##  --------  COMBINE SPECTRA  --------
+
+## Combining the `Spectra` object `s` with the MS data from `sciex`.
+## Calling directly `c(s, sciex)` would result in an error because
+## both backends use a different backend. We thus have to first change
+## the backends to the same backend. We change the backend of the `sciex`
+## `Spectra` to a `MsBackendMemory`, the backend used by `s`.
+
+sciex <- setBackend(sciex, MsBackendMemory())
+
+## Combine the two `Spectra`
+all <- c(s, sciex)
+all
+
+## The new `Spectra` objects contains the union of spectra variables from
+## both:
+spectraVariables(all)
+
+## The spectra variables that were not present in `s`:
+setdiff(spectraVariables(all), spectraVariables(s))
+
+## The values for these were filled with missing values for spectra from
+## `s`:
+all$peaksCount |> head()
+
+
+##  --------  AGGREGATE SPECTRA  --------
+
+## Sets of spectra can be combined into a single, representative spectrum
+## per set using `combineSpectra()`. This aggregates the peaks data (i.e.
+## the spectra's m/z and intensity values) while using the values for all
+## spectra variables from the first spectrum per set. Below we define the
+## sets as all spectra measured in the *same second*, i.e. rounding their
+## retention time to the next closer integer value.
+f <- round(rtime(sciex))
+head(f)
+
+cmp <- combineSpectra(sciex, f = f)
+
+## The length of `cmp` is now equal to the length of unique levels in `f`:
+length(cmp)
+
+## The spectra variable value from the first spectrum per set is used in
+## the representative/combined spectrum:
+cmp$rtime
+
+## The peaks data was aggregated: the number of mass peaks of the first six
+## spectra from the original `Spectra`:
+lengths(sciex) |> head()
+
+## and for the first aggreagated spectra:
+lengths(cmp) |> head()
+
+## The default peaks data aggregation method joins all mass peaks. See
+## documentation of the `combinePeaksData()` function for more options.
+
+
+##  --------  SPLITTING DATA  --------
+
+## A `Spectra` can be split into a `list` of `Spectra` objects using the
+## `split()` function defining the sets into which the `Spectra` should
+## be splitted into with parameter `f`.
+sciex_split <- split(sciex, f)
+
+length(sciex_split)
+sciex_split |> head()
+
+
+##  --------  ADDING SPECTRA DATA  --------
+
+## Adding new spectra variables
+sciex1 <- filterDataOrigin(sciex, dataOrigin(sciex)[1])
+spv <- DataFrame(spectrumId = sciex1$spectrumId[3:12], ## used for merging
+                 var1 = rnorm(10),
+                 var2 = sample(letters, 10))
+spv
+
+sciex2 <- joinSpectraData(sciex1, spv, by.y = "spectrumId")
+
+spectraVariables(sciex2)
+spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")]
+}
+\seealso{
+\itemize{
+\item \code{\link[=combinePeaks]{combinePeaks()}} for functions to aggregate mass peaks data.
+\item \link{Spectra} for a general description of the \code{Spectra} object.
+}
+}
+\author{
+Sebastian Gibb, Johannes Rainer, Laurent Gatto
+}
diff --git a/man/compareSpectra.Rd b/man/compareSpectra.Rd
new file mode 100644
index 00000000..375671c4
--- /dev/null
+++ b/man/compareSpectra.Rd
@@ -0,0 +1,131 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Spectra.R
+\name{compareSpectra}
+\alias{compareSpectra}
+\alias{compareSpectra,Spectra,Spectra-method}
+\alias{compareSpectra,Spectra,missing-method}
+\title{Spectra similarity calculations}
+\usage{
+\S4method{compareSpectra}{Spectra,Spectra}(
+  x,
+  y,
+  MAPFUN = joinPeaks,
+  tolerance = 0,
+  ppm = 20,
+  FUN = ndotproduct,
+  ...,
+  SIMPLIFY = TRUE
+)
+
+\S4method{compareSpectra}{Spectra,missing}(
+  x,
+  y = NULL,
+  MAPFUN = joinPeaks,
+  tolerance = 0,
+  ppm = 20,
+  FUN = ndotproduct,
+  ...,
+  SIMPLIFY = TRUE
+)
+}
+\arguments{
+\item{x}{A \code{Spectra} object.}
+
+\item{y}{A \code{Spectra} object.}
+
+\item{MAPFUN}{For \code{compareSpectra()}: function to map/match peaks between
+the two compared spectra. See \code{\link[=joinPeaks]{joinPeaks()}} for more information and
+possible functions. Defaults to \code{\link[=joinPeaks]{joinPeaks()}}.}
+
+\item{tolerance}{\code{numeric(1)} allowing to define a constant maximal
+accepted difference between m/z values for peaks to be matched. This
+parameter is directly passed to \code{MAPFUN}.}
+
+\item{ppm}{\code{numeric(1)} defining a relative, m/z-dependent, maximal
+accepted difference between m/z values for peaks to be matched. This
+parameter is directly passed to \code{MAPFUN}.}
+
+\item{FUN}{function to compare intensities of peaks between two spectra.
+Defaults to \code{\link[=ndotproduct]{ndotproduct()}}.}
+
+\item{...}{Additional arguments passed to the internal functions.}
+
+\item{SIMPLIFY}{\code{logical(1)} defining  whether the result matrix should be
+\emph{simplified} to a \code{numeric} if possible (i.e. if either \code{x} or \code{y} is
+of length 1).}
+}
+\description{
+\code{compareSpectra()} compares each spectrum in \code{x} with each spectrum in \code{y}
+using the function provided with \code{FUN} (defaults to \code{\link[=ndotproduct]{ndotproduct()}}). If
+\code{y} is missing, each spectrum in \code{x} is compared with each other spectrum
+in \code{x}.
+The matching/mapping of peaks between the compared spectra is done with the
+\code{MAPFUN} function. The default \code{\link[=joinPeaks]{joinPeaks()}} matches peaks of both spectra
+and allows to keep all peaks from the first spectrum (\code{type = "left"}),
+from the second (\code{type = "right"}), from both (\code{type = "outer"}) and to
+keep only matching peaks (\code{type = "inner"}); see \code{\link[=joinPeaks]{joinPeaks()}} for more
+information and examples). The \code{MAPFUN} function should have parameters
+\code{x}, \code{y}, \code{xPrecursorMz} and \code{yPrecursorMz} as these values are passed to
+the function.
+
+In addition to \code{joinPeaks()} also \code{\link[=joinPeaksGnps]{joinPeaksGnps()}} is supported for
+GNPS-like similarity score calculations. Note that \code{joinPeaksGnps()} should
+only be used in combination with \code{FUN = MsCoreUtils::gnps}
+(see \code{\link[=joinPeaksGnps]{joinPeaksGnps()}} for more information and details). Use
+\code{MAPFUN = joinPeaksNone} to disable internal peak matching/mapping if a
+similarity scoring function is used that performs the matching internally.
+
+\code{FUN} is supposed to be a function to compare intensities of (matched)
+peaks of the two spectra that are compared. The function needs to take two
+matrices with columns \code{"mz"} and \code{"intensity"} as input and is supposed
+to return a single numeric as result. In addition to the two peak matrices
+the spectra's precursor m/z values are passed to the function as parameters
+\code{xPrecursorMz} (precursor m/z of the \code{x} peak matrix) and \code{yPrecursorMz}
+(precursor m/z of the \code{y} peak matrix). Additional parameters to functions
+\code{FUN} and \code{MAPFUN} can be passed with \code{...}. Parameters \code{ppm} and
+\code{tolerance} are passed to both \code{MAPFUN} and \code{FUN}.
+The function returns a \code{matrix} with the results of \code{FUN} for each
+comparison, number of rows equal to \code{length(x)} and number of columns
+equal \code{length(y)} (i.e. element in row 2 and column 3 is the result from
+the comparison of \code{x[2]} with \code{y[3]}). If \code{SIMPLIFY = TRUE} the \code{matrix}
+is \emph{simplified} to a \code{numeric} if length of \code{x} or \code{y} is one. See also
+the vignette for additional examples, such as using spectral entropy
+similarity in the scoring.
+}
+\examples{
+
+## Load a `Spectra` object with LC-MS/MS data.
+fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML",
+    package = "msdata")
+sps_dda <- Spectra(fl)
+sps_dda
+
+## Restrict to MS2 (fragment) spectra:
+sps_ms2 <- filterMsLevel(sps_dda, msLevel = 2L)
+
+## Compare spectra: comparing spectra 2 and 3 against spectra 10:20 using
+## the normalized dotproduct method.
+res <- compareSpectra(sps_ms2[2:3], sps_ms2[10:20])
+## first row contains comparisons of spectrum 2 with spectra 10 to 20 and
+## the second row comparisons of spectrum 3 with spectra 10 to 20
+res
+
+## We next calculate the pairwise similarity for the first 10 spectra
+compareSpectra(sps_ms2[1:10])
+
+## Use compareSpectra to determine the number of common (matching) peaks
+## with a ppm of 10:
+## type = "inner" uses a *inner join* to match peaks, i.e. keeps only
+## peaks that can be mapped betwen both spectra. The provided FUN returns
+## simply the number of matching peaks.
+compareSpectra(sps_ms2[2:3], sps_ms2[10:20], ppm = 10, type = "inner",
+    FUN = function(x, y, ...) nrow(x))
+
+## We repeat this calculation between all pairwise combinations
+## of the first 20 spectra
+compareSpectra(sps_ms2[1:20], ppm = 10, type = "inner",
+    FUN = function(x, y, ...) nrow(x))
+}
+\author{
+Sebastian Gibb, Johannes Rainer, Laurent Gatto
+}
diff --git a/man/filterMsLevel.Rd b/man/filterMsLevel.Rd
new file mode 100644
index 00000000..0ea3698b
--- /dev/null
+++ b/man/filterMsLevel.Rd
@@ -0,0 +1,689 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Spectra-functions.R, R/Spectra.R
+\name{deisotopeSpectra}
+\alias{deisotopeSpectra}
+\alias{reduceSpectra}
+\alias{filterPrecursorMaxIntensity}
+\alias{filterPrecursorIsotopes}
+\alias{filterPrecursorPeaks}
+\alias{filterMsLevel}
+\alias{[,Spectra-method}
+\alias{filterAcquisitionNum}
+\alias{filterDataOrigin}
+\alias{filterDataStorage}
+\alias{filterEmptySpectra}
+\alias{filterIsolationWindow}
+\alias{filterPolarity}
+\alias{filterPrecursorCharge}
+\alias{filterPrecursorMzRange}
+\alias{filterPrecursorMzValues}
+\alias{filterPrecursorScan}
+\alias{filterRanges}
+\alias{filterRt}
+\alias{filterValues}
+\alias{dropNaSpectraVariables}
+\alias{selectSpectraVariables}
+\alias{filterIntensity}
+\alias{filterMzRange}
+\alias{filterMzValues}
+\alias{dropNaSpectraVariables,Spectra-method}
+\alias{selectSpectraVariables,Spectra-method}
+\alias{filterAcquisitionNum,Spectra-method}
+\alias{filterEmptySpectra,Spectra-method}
+\alias{filterDataOrigin,Spectra-method}
+\alias{filterDataStorage,Spectra-method}
+\alias{filterFourierTransformArtefacts,Spectra-method}
+\alias{filterIntensity,Spectra-method}
+\alias{filterIsolationWindow,Spectra-method}
+\alias{filterMsLevel,Spectra-method}
+\alias{filterMzRange,Spectra-method}
+\alias{filterMzValues,Spectra-method}
+\alias{filterPolarity,Spectra-method}
+\alias{filterPrecursorMz,Spectra-method}
+\alias{filterPrecursorMzRange,Spectra-method}
+\alias{filterPrecursorMzValues,Spectra-method}
+\alias{filterPrecursorCharge,Spectra-method}
+\alias{filterPrecursorScan,Spectra-method}
+\alias{filterRt,Spectra-method}
+\alias{filterRanges,Spectra-method}
+\alias{filterValues,Spectra-method}
+\title{Filter and subset Spectra objects}
+\usage{
+deisotopeSpectra(
+  x,
+  substDefinition = isotopicSubstitutionMatrix("HMDB_NEUTRAL"),
+  tolerance = 0,
+  ppm = 20,
+  charge = 1
+)
+
+reduceSpectra(x, tolerance = 0, ppm = 20)
+
+filterPrecursorMaxIntensity(x, tolerance = 0, ppm = 20)
+
+filterPrecursorIsotopes(
+  x,
+  tolerance = 0,
+  ppm = 20,
+  substDefinition = isotopicSubstitutionMatrix("HMDB_NEUTRAL")
+)
+
+filterPrecursorPeaks(
+  object,
+  tolerance = 0,
+  ppm = 20,
+  mz = c("==", ">="),
+  msLevel. = uniqueMsLevels(object)
+)
+
+\S4method{dropNaSpectraVariables}{Spectra}(object)
+
+\S4method{selectSpectraVariables}{Spectra}(
+  object,
+  spectraVariables = union(spectraVariables(object), peaksVariables(object))
+)
+
+\S4method{[}{Spectra}(x, i, j, ..., drop = FALSE)
+
+\S4method{filterAcquisitionNum}{Spectra}(
+  object,
+  n = integer(),
+  dataStorage = character(),
+  dataOrigin = character()
+)
+
+\S4method{filterEmptySpectra}{Spectra}(object)
+
+\S4method{filterDataOrigin}{Spectra}(object, dataOrigin = character())
+
+\S4method{filterDataStorage}{Spectra}(object, dataStorage = character())
+
+\S4method{filterFourierTransformArtefacts}{Spectra}(
+  object,
+  halfWindowSize = 0.05,
+  threshold = 0.2,
+  keepIsotopes = TRUE,
+  maxCharge = 5,
+  isotopeTolerance = 0.005
+)
+
+\S4method{filterIntensity}{Spectra}(
+  object,
+  intensity = c(0, Inf),
+  msLevel. = uniqueMsLevels(object),
+  ...
+)
+
+\S4method{filterIsolationWindow}{Spectra}(object, mz = numeric())
+
+\S4method{filterMsLevel}{Spectra}(object, msLevel. = integer())
+
+\S4method{filterMzRange}{Spectra}(
+  object,
+  mz = numeric(),
+  msLevel. = uniqueMsLevels(object),
+  keep = TRUE
+)
+
+\S4method{filterMzValues}{Spectra}(
+  object,
+  mz = numeric(),
+  tolerance = 0,
+  ppm = 20,
+  msLevel. = uniqueMsLevels(object),
+  keep = TRUE
+)
+
+\S4method{filterPolarity}{Spectra}(object, polarity = integer())
+
+\S4method{filterPrecursorMz}{Spectra}(object, mz = numeric())
+
+\S4method{filterPrecursorMzRange}{Spectra}(object, mz = numeric())
+
+\S4method{filterPrecursorMzValues}{Spectra}(object, mz = numeric(), ppm = 20, tolerance = 0)
+
+\S4method{filterPrecursorCharge}{Spectra}(object, z = integer())
+
+\S4method{filterPrecursorScan}{Spectra}(object, acquisitionNum = integer(), f = dataOrigin(object))
+
+\S4method{filterRt}{Spectra}(object, rt = numeric(), msLevel. = uniqueMsLevels(object))
+
+\S4method{filterRanges}{Spectra}(
+  object,
+  spectraVariables = character(),
+  ranges = numeric(),
+  match = c("all", "any")
+)
+
+\S4method{filterValues}{Spectra}(
+  object,
+  spectraVariables = character(),
+  values = numeric(),
+  ppm = 0,
+  tolerance = 0,
+  match = c("all", "any")
+)
+}
+\arguments{
+\item{x}{\code{Spectra} object.}
+
+\item{substDefinition}{For \code{deisotopeSpectra()} and
+\code{filterPrecursorIsotopes()}: \code{matrix} or \code{data.frame} with definitions
+of isotopic substitutions. Uses by default isotopic substitutions
+defined from all compounds in the Human Metabolome Database (HMDB). See
+\code{\link[=isotopologues]{isotopologues()}} or \code{\link[=isotopicSubstitutionMatrix]{isotopicSubstitutionMatrix()}} in the
+\emph{MetaboCoreUtils} for details.}
+
+\item{tolerance}{For \code{filterMzValues()} and \code{reduceSpectra()}:
+\code{numeric(1)} allowing to define a constant maximal accepted difference
+between m/z values for peaks to be matched (or grouped). For
+\code{containsMz()} it can also be of length equal \code{mz} to specify a different
+tolerance for each m/z value.
+For \code{filterPrecursorMaxIntensity()}: \code{numeric(1)} defining the
+(constant) maximal accepted difference of precursor m/z values of
+spectra for grouping them into \emph{precursor groups}. For
+\code{filterPrecursorIsotopes()}: passed directly to the \code{\link[=isotopologues]{isotopologues()}}
+function. For \code{filterValues()}: \code{numeric} of any length allowing to
+define a maximal accepted difference between user input \code{values} and the
+\code{spectraVariables} values. If it is not equal to the length of the
+value provided with parameter \code{spectraVariables}, \code{tolerance[1]} will be
+recycled. Default is \code{tolerance = 0}.}
+
+\item{ppm}{For \code{filterMzValues()} and \code{reduceSpectra()}: \code{numeric(1)}
+defining a relative, m/z-dependent, maximal accepted difference between
+m/z values for peaks to be matched (or grouped).
+For \code{filterPrecursorMaxIntensity()}: \code{numeric(1)} defining the relative
+maximal accepted difference of precursor m/z values of spectra for
+grouping them into \emph{precursor groups}. For \code{filterPrecursorIsotopes()}:
+passed directly to the \code{\link[=isotopologues]{isotopologues()}} function.
+For \code{filterValues()}: \code{numeric} of any length allowing to define
+a maximal accepted difference between user input \code{values} and the
+\code{spectraVariables} values. If it is not equal to the length of the
+value provided with parameter \code{spectraVariables}, \code{ppm[1]} will be
+recycled.}
+
+\item{charge}{For \code{deisotopeSpectra()}: expected charge of the ionized
+compounds. See \code{\link[=isotopologues]{isotopologues()}} for details.}
+
+\item{object}{\code{Spectra} object.}
+
+\item{mz}{For \code{filterIsolationWindow()}: \code{numeric(1)} with the m/z value to
+filter the object. For \code{filterPrecursorMz()} and \code{filterMzRange()}:
+\code{numeric(2)} defining the lower and upper m/z boundary.
+For \code{filterMzValues()} and \code{filterPrecursorMzValues()}: \code{numeric} with
+the m/z values to match peaks or precursor m/z against.
+For \code{filterPrecursorPeaks()}: \code{character(1)} defining whether mass peaks
+with an m/z matching the spectrum's precursor m/z (\code{mz = "=="},
+the default) or mass peaks with a m/z that is equal or larger
+(\code{mz = ">="}) should be removed.}
+
+\item{msLevel.}{\code{integer} defining the MS level(s) of the spectra to which
+the function should be applied (defaults to all MS levels of \code{object}.
+For \code{filterMsLevel()}: the MS level to which \code{object} should be
+subsetted.}
+
+\item{spectraVariables}{For \code{selectSpectraVariables()}: \code{character} with the
+names of the spectra variables to which the backend should be
+subsetted. For \code{filterRanges()} and \code{filterValues()}: \code{character}
+vector specifying the column(s) from \code{spectraData(object)} on which
+to filter the data and that correspond to the the names of the
+spectra variables that should be used for the filtering.}
+
+\item{i}{For \code{[}: \code{integer}, \code{logical} or \code{character} to subset the
+object.}
+
+\item{j}{For \code{[}: not supported.}
+
+\item{...}{Additional arguments.}
+
+\item{drop}{For \code{[}: not considered.}
+
+\item{n}{for \code{filterAcquisitionNum()}: \code{integer} with the acquisition
+numbers to filter for.}
+
+\item{dataStorage}{For \code{filterDataStorage()}: \code{character} to define which
+spectra to keep.
+For \code{filterAcquisitionNum()}: optionally specify if filtering should
+occur only for spectra of selected \code{dataStorage}.}
+
+\item{dataOrigin}{For \code{filterDataOrigin()}: \code{character} to define which
+spectra to keep.
+For \code{filterAcquisitionNum()}: optionally specify if filtering should
+occurr only for spectra of selected \code{dataOrigin}.}
+
+\item{halfWindowSize}{For \code{filterFourierTransformArtefacts()}: \code{numeric(1)}
+defining the m/z window left and right of a peak where to remove
+fourier transform artefacts.}
+
+\item{threshold}{For \code{filterFourierTransformArtefacts()}: the relative
+intensity (to a peak) below which peaks are considered fourier
+artefacts. Defaults to \code{threshold = 0.2} hence removing peaks that
+have an intensity below 0.2 times the intensity of the tested peak
+(within the selected \code{halfWindowSize}).}
+
+\item{keepIsotopes}{For \code{filterFourierTransformArtefacts()}: whether isotope
+peaks should not be removed as fourier artefacts.}
+
+\item{maxCharge}{For \code{filterFourierTransformArtefacts()}: the maximum charge
+to be considered for isotopes.}
+
+\item{isotopeTolerance}{For \code{filterFourierTransformArtefacts()}: the m/z
+\code{tolerance} to be used to define whether peaks might be isotopes of
+the current tested peak.}
+
+\item{intensity}{For \code{filterIntensity()}: \code{numeric} of length 1 or 2
+defining either the lower or the lower and upper intensity limit for the
+filtering, or a \code{function} that takes the intensities as input and
+returns a \code{logical} (same length then peaks in the spectrum) whether the
+peak should be retained or not. Defaults to \code{intensity = c(0, Inf)} thus
+only peaks with \code{NA} intensity are removed.}
+
+\item{keep}{For \code{filterMzValues()} and \code{filterMzRange()}: \code{logical(1)}
+whether  the matching peaks should be retained (\code{keep = TRUE}, the
+default) or dropped (\code{keep = FALSE}).}
+
+\item{polarity}{for \code{filterPolarity()}: \code{integer} specifying the polarity to
+to subset \code{object}.}
+
+\item{z}{For \code{filterPrecursorCharge()}: \code{integer()} with the precursor
+charges to be used as filter.}
+
+\item{acquisitionNum}{for \code{filterPrecursorScan()}: \code{integer} with the
+acquisition number of the spectra to which the object should be
+subsetted.}
+
+\item{f}{For \code{filterPrecursorScan()}: defining which spectra
+belong to the same original data file (sample): Defaults to
+\code{f = dataOrigin(x)}.}
+
+\item{rt}{for \code{filterRt()}: \code{numeric(2)} defining the retention time range to
+be used to subset/filter \code{object}.}
+
+\item{ranges}{for \code{filterRanges()}: A \code{numeric} vector of paired values
+(upper and lower boundary) that define the ranges to filter the \code{object}.
+These paired values need to be in the same order as the
+\code{spectraVariables} parameter (see below).}
+
+\item{match}{For \code{filterRanges()} and \code{filterValues()}: \code{character(1) }
+defining whether the condition has to match for all provided
+\code{ranges}/\code{values} (\code{match = "all"}; the default), or for any of them
+(\code{match = "any"}) for spectra to be retained.}
+
+\item{values}{for \code{filterValues()}: A \code{numeric} vector that define the
+values to filter the Spectra data. These values need to be in the same
+order as the \code{spectraVariables} parameter.}
+}
+\description{
+A variety of functions to filter or subset \code{Spectra} objects are available.
+These can be generally separated into two main classes: I) \emph{classical}
+subset operations that immediately reduce the number of spectra in the
+object and II) filters that reduce the \strong{content} of the object without
+changing its length (i.e. the number of spectra). The latter can be further
+subdivided into functions that affect the content of the \code{spectraData} (i.e.
+the general spectrum metadata) and those that reduce the content of the
+object's \code{peaksData} (i.e. the m/z and intensity values of a spectrum's
+mass peaks).
+
+A description of functions from these 3 different categories are given below
+in sections \emph{Subset \code{Spectra}}, \emph{Filter content of \code{spectraData()}} and
+\emph{Filter content of \code{peaksData()}}, respectively.
+}
+\section{Subset \code{Spectra}}{
+
+
+These functions affect the number of spectra in a \code{Spectra} object creating
+a subset of the original object without affecting its content.
+\itemize{
+\item \code{[}: subsets the spectra keeping only selected elements (\code{i}). The method
+\strong{always} returns a \code{Spectra} object.
+\item \code{filterAcquisitionNum()}: filters the object keeping only spectra matching
+the provided acquisition numbers (argument \code{n}). If \code{dataOrigin} or
+\code{dataStorage} is also provided, \code{object} is subsetted to the spectra with
+an acquisition number equal to \code{n} \strong{in spectra with matching dataOrigin
+or dataStorage values} retaining all other spectra.
+Returns the filtered \code{Spectra}.
+\item \code{filterDataOrigin()}: filters the object retaining spectra matching the
+provided \code{dataOrigin}. Parameter \code{dataOrigin} has to be of type
+\code{character} and needs to match exactly the data origin value of the
+spectra to subset.
+Returns the filtered \code{Spectra} object (with spectra ordered according to
+the provided \code{dataOrigin} parameter).
+\item \code{filterDataStorage()}: filters the object retaining spectra stored in the
+specified \code{dataStorage}. Parameter \code{dataStorage} has to be of type
+\code{character} and needs to match exactly the data storage value of the
+spectra to subset.
+Returns the filtered \code{Spectra} object (with spectra ordered according to
+the provided \code{dataStorage} parameter).
+\item \code{filterEmptySpectra()}: removes empty spectra (i.e. spectra without peaks).
+Returns the filtered \code{Spectra} object (with spectra in their
+original order).
+\item \code{filterIsolationWindow()}: retains spectra that contain \code{mz} in their
+isolation window m/z range (i.e. with an \code{isolationWindowLowerMz} <= \code{mz}
+and \code{isolationWindowUpperMz} >= \code{mz}. Returns the filtered \code{Spectra}
+object (with spectra in their original order).
+\item \code{filterMsLevel()}: filters object by MS level keeping only spectra matching
+the MS level specified with argument \code{msLevel}. Returns the filtered
+\code{Spectra} (with spectra in their original order).
+\item \code{filterPolarity()}: filters the object keeping only spectra matching the
+provided polarity. Returns the filtered \code{Spectra} (with spectra in their
+original order).
+\item \code{filterPrecursorCharge()}: retains spectra with the defined precursor
+charge(s).
+\item \code{filterPrecursorIsotopes()}: groups MS2 spectra based on their precursor
+m/z and precursor intensity into predicted isotope groups and keep for each
+only the spectrum representing the monoisotopic precursor. MS1 spectra
+are returned as is. See documentation for \code{deisotopeSpectra()} below for
+details on isotope prediction and parameter description.
+\item \code{filterPrecursorMaxIntensity()}: filters the \code{Spectra} keeping for groups
+of (MS2) spectra with similar precursor m/z values (given parameters
+\code{ppm} and \code{tolerance}) the one with the highest precursor intensity. The
+function filters only MS2 spectra and returns all MS1 spectra. If
+precursor intensities are \code{NA} for all spectra within a spectra group, the
+first spectrum of that groups is returned.
+Note: some manufacturers don't provide precursor intensities. These can
+however also be estimated with \code{\link[=estimatePrecursorIntensity]{estimatePrecursorIntensity()}}.
+\item \code{filterPrecursorMzRange()} (previously \code{filterPrecursorMz()} which is now
+deprecated): retains spectra with a precursor m/z within the
+provided m/z range. See examples for details on selecting spectra with
+a precursor m/z for a target m/z accepting a small difference in \emph{ppm}.
+\item \code{filterPrecursorMzValues()}: retains spectra with precursor m/z matching
+any of the provided m/z values (given \code{ppm} and \code{tolerance}). Spectra with
+missing precursor m/z value (e.g. MS1 spectra) are dropped.
+\item \code{filterPrecursorScan()}: retains parent (e.g. MS1) and children scans (e.g.
+MS2) of acquisition number \code{acquisitionNum}. Returns the filtered
+\code{Spectra} (with spectra in their original order). Parameter \code{f} allows to
+define which spectra belong to the same sample or original data file (
+defaults to \code{f = dataOrigin(object)}).
+\item \code{filterRanges()}: allows filtering of the \code{Spectra} object based on user
+defined \emph{numeric} ranges (parameter \code{ranges}) for one or more available
+spectra variables in object (spectra variable names can be specified with
+parameter \code{spectraVariables}). Spectra for which the value of a spectra
+variable is within it's defined range are retained. If multiple
+ranges/spectra variables are defined, the \code{match} parameter can be used
+to specify whether all conditions (\code{match = "all"}; the default) or if
+any of the conditions must match (\code{match = "any"}; all spectra for which
+values are within any of the provided ranges are retained).
+\item \code{filterRt()}: retains spectra of MS level \code{msLevel} with retention
+times (in seconds) within (\code{>=}) \code{rt[1]} and (\code{<=})
+\code{rt[2]}. Returns the filtered \code{Spectra} (with spectra in their
+original order).
+\item \code{filterValues()}: allows filtering of the \code{Spectra} object based on
+similarities of \emph{numeric} values of one or more \code{spectraVariables(object)}
+(parameter \code{spectraVariables}) to provided values (parameter \code{values})
+given acceptable differences (parameters tolerance and ppm). If multiple
+values/spectra variables are defined, the \code{match} parameter can be used
+to specify whether all conditions (\code{match = "all"}; the default) or if
+any of the conditions must match (\code{match = "any"}; all spectra for which
+values are within any of the provided ranges are retained).
+}
+}
+
+\section{Filter content of \code{spectraData()}}{
+
+
+The functions described in this section filter the content from a
+\code{Spectra}'s spectra data, i.e. affect values of, or complete, spectra
+variables. None of these functions reduces the object's number of spectra.
+\itemize{
+\item \code{dropNaSpectraVariables()}: removes spectra variables (i.e. columns in the
+object's \code{spectraData} that contain only missing values (\code{NA}). Note that
+while columns with only \code{NA}s are removed, a \code{spectraData()} call after
+\code{dropNaSpectraVariables()} might still show columns containing \code{NA} values
+for \emph{core} spectra variables. The total number of spectra is not changed
+by this function.
+\item \code{selectSpectraVariables()}: reduces the information within the object to
+the selected spectra variables: all data for variables not specified will
+be dropped. For mandatory columns (i.e., those listed by
+\code{\link[=coreSpectraVariables]{coreSpectraVariables()}}, such as \emph{msLevel}, \emph{rtime} ...) only
+the values will be dropped but not the variable itself. Additional (or
+user defined) spectra variables will be completely removed.
+Returns the filtered \code{Spectra}.
+}
+}
+
+\section{Filter content of \code{peaksData()}}{
+
+
+The functions described in this section filter the content of the
+\code{Spectra}'s peaks data, i.e. either the number or the values (\emph{m/z} or
+intensity values) of the mass peaks. Also, the actual operation is only
+executed once peaks data is accessed (through \code{peaksData()},
+\code{mz()} or \code{intensity()}) or \code{applyProcessing()} is called.
+These operations don't affect the number of spectra in the \code{Spectra} object.
+\itemize{
+\item \code{deisotopeSpectra()}: \emph{deisotopes} each spectrum keeping only the
+monoisotopic peak for groups of isotopologues. Isotopologues are
+estimated using the \code{\link[=isotopologues]{isotopologues()}} function from the
+\emph{MetaboCoreUtils} package. Note that
+the default parameters for isotope prediction/detection have been
+determined using data from the Human Metabolome Database (HMDB) and
+isotopes for elements other than CHNOPS might not be detected. See
+parameter \code{substDefinition} in the documentation of \code{\link[=isotopologues]{isotopologues()}} for
+more information. The approach and code to define the parameters for
+isotope prediction is described
+\href{https://github.com/EuracBiomedicalResearch/isotopologues}{here}.
+\item \code{filterFourierTransformArtefacts()}: removes (Orbitrap) fast fourier
+artefact peaks from spectra (see examples below). The function iterates
+through all intensity ordered peaks in a spectrum and removes all peaks
+with an m/z within +/- \code{halfWindowSize} of the current peak if their
+intensity is lower than \code{threshold} times the current peak's intensity.
+Additional parameters \code{keepIsotopes}, \code{maxCharge} and \code{isotopeTolerance}
+allow to avoid removing of potential \verb{[13]C} isotope peaks (\code{maxCharge}
+being the maximum charge that should be considered and \code{isotopeTolerance}
+the absolute acceptable tolerance for matching their m/z).
+See \code{\link[=filterFourierTransformArtefacts]{filterFourierTransformArtefacts()}} for details and background and
+\code{deisitopeSpectra()} for an alternative.
+\item \code{filterIntensity()}: filters mass peaks in each spectrum keeping only
+those with intensities that are within the provided range or match the
+criteria of the provided function. For the former, parameter \code{intensity}
+has to be a \code{numeric} defining the intensity range, for the latter a
+\code{function} that takes the intensity values of the spectrum and returns
+a \code{logical} whether the peak should be retained or not (see examples
+below for details) - additional parameters to the function can be passed
+with \code{...}.
+To remove only peaks with intensities below a certain threshold, say
+100, use \code{intensity = c(100, Inf)}. Note: also a single value can be
+passed with the \code{intensity} parameter in which case an upper limit of
+\code{Inf} is used.
+Note that this function removes also peaks with missing intensities
+(i.e. an intensity of \code{NA}). Parameter \code{msLevel.} allows to restrict the
+filtering to spectra of the specified MS level(s).
+\item \code{filterMzRange()}: filters mass peaks in the object keeping or removing
+those in each spectrum that are within the provided m/z range. Whether
+peaks are retained or removed can be configured with parameter \code{keep}
+(default \code{keep = TRUE}).
+\item \code{filterMzValues()}: filters mass peaks in the object keeping all
+peaks in each spectrum that match the provided m/z value(s) (for
+\code{keep = TRUE}, the default) or removing all of them (for \code{keep = FALSE}).
+The m/z matching considers also the absolute \code{tolerance} and m/z-relative
+\code{ppm} values. \code{tolerance} and \code{ppm} have to be of length 1.
+\item \code{filterPeaksRanges()}: filters mass peaks of a \code{Spectra} object using any
+set of range-based filters on numeric spectra or peaks variables. See
+\code{\link[=filterPeaksRanges]{filterPeaksRanges()}} for more information.
+\item \code{filterPrecursorPeaks()}: removes peaks from each spectrum in \code{object} with
+an m/z equal or larger than the m/z of the precursor, depending on the
+value of parameter \code{mz}: for \verb{mz = ==" (the default) peaks with matching m/z (considering an absolute and relative acceptable difference depending on }tolerance\code{and}ppm\verb{, respectively) are removed. For }mz = ">="\verb{all peaks with an m/z larger or equal to the precursor m/z (minus}tolerance\verb{and the}ppm\verb{of the precursor m/z) are removed. Parameter}msLevel.\verb{allows to restrict the filter to certain MS levels (by default the filter is applied to all MS levels). Note that no peaks are removed if the precursor m/z is}NA` (e.g. typically for MS1 spectra).
+\item \code{reduceSpectra()}: keeps for groups of peaks with similar m/z values in
+(given \code{ppm} and \code{tolerance}) in each spectrum only the mass peak with the
+highest intensity removing all other peaks hence \emph{reducing} each
+spectrum to the highest intensity peaks per \emph{peak group}.
+Peak groups are defined using the \code{\link[=group]{group()}} function from the
+\emph{MsCoreUtils} package. See also the \code{\link[=combinePeaks]{combinePeaks()}} function for an
+alternative function to combine peaks within each spectrum.
+}
+}
+
+\examples{
+
+## Load a `Spectra` object with LC-MS/MS data.
+fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML",
+    package = "msdata")
+sps_dda <- Spectra(fl)
+sps_dda
+
+
+##  --------  SUBSET SPECTRA  --------
+
+## Subset to the first 3 spectra
+tmp <- sps_dda[1:3]
+tmp
+length(tmp)
+
+## Subset to all MS2 spectra; this could be done with [, or, more
+## efficiently, with the `filterMsLevel` function:
+sps_dda[msLevel(sps_dda) == 2L]
+filterMsLevel(sps_dda, 2L)
+
+## Filter the object keeping only MS2 spectra with an precursor m/z value
+## between a specified range:
+filterPrecursorMzRange(sps_dda, c(80, 90))
+
+## Filter the object to MS2 spectra with an precursor m/z matching a
+## pre-defined value (given ppm and tolerance)
+filterPrecursorMzValues(sps_dda, 85, ppm = 5, tolerance = 0.1)
+
+## The `filterRanges()` function allows to filter a `Spectra` based on
+## numerical ranges of any of its (numerical) spectra variables.
+## First, determine the variable(s) on which to base the filtering:
+sv <- c("rtime", "precursorMz", "peaksCount")
+## Note that ANY variables can be chosen here, and as many as wanted.
+
+## Define the ranges (pairs of values with lower and upper boundary) to be
+## used for the individual spectra variables. The first two values will be
+## used for the first spectra variable (e.g., `"rtime"` here), the next two
+## for the second (e.g. `"precursorMz"` here) and so on:
+ranges <- c(30, 350, 200, 500, 350, 600)
+
+## Input the parameters within the filterRanges function:
+filt_spectra <- filterRanges(sps_dda, spectraVariables = sv,
+                ranges = ranges)
+filt_spectra
+
+## `filterRanges()` can also be used to filter a `Spectra` object with
+## multiple ranges for the same `spectraVariable` (e.g, here `"rtime"`)
+sv <- c("rtime", "rtime")
+ranges <- c(30, 100, 200, 300)
+filt_spectra <- filterRanges(sps_dda, spectraVariables = sv,
+                ranges = ranges, match = "any")
+filt_spectra
+
+## While `filterRanges()` filtered on numeric ranges, `filterValues()`
+## allows to filter an object matching spectra variable values to user
+## provided values (allowing to configure allowed differences using the
+## `ppm` and `tolerance` parameters).
+## First determine the variable(s) on which to base the filtering:
+sv <- c("rtime", "precursorMz")
+## Note that ANY variables can be chosen here, and as many as wanted.
+
+## Define the values that will be used to filter the spectra based on their
+## similarities to their respective `spectraVariables`.
+## The first values in the parameters values, tolerance and ppm will be
+## used for the first spectra variable (e.g. `"rtime"` here), the next for
+## the second (e.g. `"precursorMz"` here) and so on:
+values <- c(350, 80)
+tolerance <- c(100, 0.1)
+ppm <- c(0, 50)
+
+## Input the parameters within the `filterValues()` function:
+filt_spectra <- filterValues(sps_dda, spectraVariables = sv,
+                values = values, tolerance = tolerance, ppm = ppm)
+filt_spectra
+
+
+##  --------  FILTER SPECTRA DATA  --------
+
+## Remove spectra variables without content (i.e. with only missing values)
+sps_noNA <- dropNaSpectraVariables(sps_dda)
+
+## This reduced the size of the object slightly
+print(object.size(sps_dda), unit = "MB")
+print(object.size(sps_noNA), unit = "MB")
+
+## With the `selectSpectraVariables()` function it is in addition possible
+## to subset the data of a `Spectra` to the selected columns/variables,
+## keeping only their data:
+tmp <- selectSpectraVariables(sps_dda, c("msLevel", "mz", "intensity",
+    "scanIndex"))
+print(object.size(tmp), units = "MB")
+
+## Except the selected variables, all data is now removed. Accessing
+## core spectra variables still works, but returns only NA
+rtime(tmp) |> head()
+
+
+##  --------  FILTER PEAKS DATA  --------
+
+## `filterMzValues()` filters the mass peaks data of a `Spectra` retaining
+## only those mass peaks with an m/z value matching the provided value(s).
+sps_sub <- filterMzValues(sps_dda, mz = c(103, 104), tolerance = 0.3)
+
+## The filtered `Spectra` has the same length
+length(sps_dda)
+length(sps_sub)
+
+## But the number of mass peaks changed
+lengths(sps_dda) |> head()
+lengths(sps_sub) |> head()
+
+## This function can also be used to remove specific peaks from a spectrum
+## by setting `keep = FALSE`.
+sps_sub <- filterMzValues(sps_dda, mz = c(103, 104),
+    tolerance = 0.3, keep = FALSE)
+lengths(sps_sub) |> head()
+
+## With the `filterMzRange()` function it is possible to keep (or remove)
+## mass peaks with m/z values within a specified numeric range.
+sps_sub <- filterMzRange(sps_dda, mz = c(100, 150))
+lengths(sps_sub) |> head()
+
+## See also the `filterPeaksRanges()` function for a more flexible framework
+## to filter mass peaks
+
+
+## Removing fourier transform artefacts seen in Orbitra data.
+
+## Loading an Orbitrap spectrum with artefacts.
+data(fft_spectrum)
+plotSpectra(fft_spectrum, xlim = c(264.5, 265.5))
+plotSpectra(fft_spectrum, xlim = c(264.5, 265.5), ylim = c(0, 5e6))
+
+fft_spectrum <- filterFourierTransformArtefacts(fft_spectrum)
+fft_spectrum
+plotSpectra(fft_spectrum, xlim = c(264.5, 265.5), ylim = c(0, 5e6))
+
+## Using a few examples peaks in your data you can optimize the parameters
+fft_spectrum_filtered <- filterFourierTransformArtefacts(fft_spectrum,
+                                               halfWindowSize = 0.2,
+                                               threshold = 0.005,
+                                               keepIsotopes = TRUE,
+                                               maxCharge = 5,
+                                               isotopeTolerance = 0.005
+                                               )
+
+fft_spectrum_filtered
+length(mz(fft_spectrum_filtered)[[1]])
+plotSpectra(fft_spectrum_filtered, xlim = c(264.5, 265.5), ylim = c(0, 5e6))
+
+
+## *Reducing* a `Spectra` keeping for groups of mass peaks (characterized
+## by similarity of their m/z values) only one representative peak. This
+## function helps cleaning fragment spectra.
+## Filter the data set to MS2 spectra
+ms2 <- filterMsLevel(sps_dda, 2L)
+
+## For groups of fragment peaks with a difference in m/z < 0.1, keep only
+## the largest one.
+ms2_red <- reduceSpectra(ms2, ppm = 0, tolerance = 0.1)
+lengths(ms2) |> tail()
+lengths(ms2_red) |> tail()
+}
+\seealso{
+\itemize{
+\item \code{\link[=combineSpectra]{combineSpectra()}} for functions to combine or aggregate \code{Spectra}.
+\item \code{\link[=combinePeaks]{combinePeaks()}} for functions to combine or aggregate a \code{Spectra}'s
+\code{peaksData()}
+}
+}
+\author{
+Sebastian Gibb, Johannes Rainer, Laurent Gatto, Philippine Louail, Nir Shahaf
+}
diff --git a/man/spectraData.Rd b/man/spectraData.Rd
new file mode 100644
index 00000000..49d2bee3
--- /dev/null
+++ b/man/spectraData.Rd
@@ -0,0 +1,598 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Spectra.R
+\name{spectraData}
+\alias{spectraData}
+\alias{acquisitionNum}
+\alias{centroided}
+\alias{collisionEnergy}
+\alias{dataOrigin}
+\alias{dataStorage}
+\alias{intensity}
+\alias{ionCount}
+\alias{isCentroided}
+\alias{isEmpty}
+\alias{isolationWindowLowerMz}
+\alias{isolationWindowUpperMz}
+\alias{isolationWindowTargetMz}
+\alias{lengths}
+\alias{msLevel}
+\alias{mz}
+\alias{peaksData}
+\alias{peaksVariables}
+\alias{polarity}
+\alias{precursorCharge}
+\alias{precursorIntensity}
+\alias{precursorMz}
+\alias{rtime}
+\alias{scanIndex}
+\alias{smoothed}
+\alias{spectraNames}
+\alias{spectraVariables}
+\alias{tic}
+\alias{uniqueMsLevels}
+\alias{asDataFrame}
+\alias{acquisitionNum,Spectra-method}
+\alias{centroided,Spectra-method}
+\alias{centroided<-,Spectra-method}
+\alias{collisionEnergy,Spectra-method}
+\alias{collisionEnergy<-,Spectra-method}
+\alias{coreSpectraVariables}
+\alias{dataOrigin,Spectra-method}
+\alias{dataOrigin<-,Spectra-method}
+\alias{dataStorage,Spectra-method}
+\alias{intensity,Spectra-method}
+\alias{ionCount,Spectra-method}
+\alias{isCentroided,Spectra-method}
+\alias{isEmpty,Spectra-method}
+\alias{isolationWindowLowerMz,Spectra-method}
+\alias{isolationWindowLowerMz<-,Spectra-method}
+\alias{isolationWindowTargetMz,Spectra-method}
+\alias{isolationWindowTargetMz<-,Spectra-method}
+\alias{isolationWindowUpperMz,Spectra-method}
+\alias{isolationWindowUpperMz<-,Spectra-method}
+\alias{length,Spectra-method}
+\alias{lengths,Spectra-method}
+\alias{msLevel,Spectra-method}
+\alias{mz,Spectra-method}
+\alias{peaksData,Spectra-method}
+\alias{peaksVariables,Spectra-method}
+\alias{polarity,Spectra-method}
+\alias{polarity<-,Spectra-method}
+\alias{precScanNum,Spectra-method}
+\alias{precursorCharge,Spectra-method}
+\alias{precursorIntensity,Spectra-method}
+\alias{precursorMz,Spectra-method}
+\alias{rtime,Spectra-method}
+\alias{rtime<-,Spectra-method}
+\alias{scanIndex,Spectra-method}
+\alias{smoothed,Spectra-method}
+\alias{smoothed<-,Spectra-method}
+\alias{spectraData,Spectra-method}
+\alias{spectraData<-,Spectra-method}
+\alias{spectraNames,Spectra-method}
+\alias{spectraNames<-,Spectra-method}
+\alias{spectraVariables,Spectra-method}
+\alias{tic,Spectra-method}
+\alias{uniqueMsLevels,Spectra-method}
+\alias{$,Spectra-method}
+\alias{$<-,Spectra-method}
+\alias{[[,Spectra-method}
+\alias{[[<-,Spectra-method}
+\title{Accessing mass spectrometry data}
+\usage{
+asDataFrame(
+  object,
+  i = seq_along(object),
+  spectraVars = spectraVariables(object)
+)
+
+\S4method{acquisitionNum}{Spectra}(object)
+
+\S4method{centroided}{Spectra}(object)
+
+\S4method{centroided}{Spectra}(object) <- value
+
+\S4method{collisionEnergy}{Spectra}(object)
+
+\S4method{collisionEnergy}{Spectra}(object) <- value
+
+coreSpectraVariables()
+
+\S4method{dataOrigin}{Spectra}(object)
+
+\S4method{dataOrigin}{Spectra}(object) <- value
+
+\S4method{dataStorage}{Spectra}(object)
+
+\S4method{intensity}{Spectra}(object, f = processingChunkFactor(object), ...)
+
+\S4method{ionCount}{Spectra}(object)
+
+\S4method{isCentroided}{Spectra}(object, ...)
+
+\S4method{isEmpty}{Spectra}(x)
+
+\S4method{isolationWindowLowerMz}{Spectra}(object)
+
+\S4method{isolationWindowLowerMz}{Spectra}(object) <- value
+
+\S4method{isolationWindowTargetMz}{Spectra}(object)
+
+\S4method{isolationWindowTargetMz}{Spectra}(object) <- value
+
+\S4method{isolationWindowUpperMz}{Spectra}(object)
+
+\S4method{isolationWindowUpperMz}{Spectra}(object) <- value
+
+\S4method{length}{Spectra}(x)
+
+\S4method{lengths}{Spectra}(x, use.names = FALSE)
+
+\S4method{msLevel}{Spectra}(object)
+
+\S4method{mz}{Spectra}(object, f = processingChunkFactor(object), ...)
+
+\S4method{peaksData}{Spectra}(
+  object,
+  columns = c("mz", "intensity"),
+  f = processingChunkFactor(object),
+  ...,
+  BPPARAM = bpparam()
+)
+
+\S4method{peaksVariables}{Spectra}(object)
+
+\S4method{polarity}{Spectra}(object)
+
+\S4method{polarity}{Spectra}(object) <- value
+
+\S4method{precScanNum}{Spectra}(object)
+
+\S4method{precursorCharge}{Spectra}(object)
+
+\S4method{precursorIntensity}{Spectra}(object)
+
+\S4method{precursorMz}{Spectra}(object)
+
+\S4method{rtime}{Spectra}(object)
+
+\S4method{rtime}{Spectra}(object) <- value
+
+\S4method{scanIndex}{Spectra}(object)
+
+\S4method{smoothed}{Spectra}(object)
+
+\S4method{smoothed}{Spectra}(object) <- value
+
+\S4method{spectraData}{Spectra}(object, columns = spectraVariables(object))
+
+\S4method{spectraData}{Spectra}(object) <- value
+
+\S4method{spectraNames}{Spectra}(object)
+
+\S4method{spectraNames}{Spectra}(object) <- value
+
+\S4method{spectraVariables}{Spectra}(object)
+
+\S4method{tic}{Spectra}(object, initial = TRUE)
+
+\S4method{uniqueMsLevels}{Spectra}(object, ...)
+
+\S4method{$}{Spectra}(x, name)
+
+\S4method{$}{Spectra}(x, name) <- value
+
+\S4method{[[}{Spectra}(x, i, j, ...)
+
+\S4method{[[}{Spectra}(x, i, j, ...) <- value
+}
+\arguments{
+\item{object}{A \code{Spectra} object.}
+
+\item{i}{For \code{asDataFrame()}: A \code{numeric} indicating which scans to coerce
+to a \code{DataFrame} (default is \code{seq_along(object)}).}
+
+\item{spectraVars}{\code{character()} indicating what spectra variables to add to
+the \code{DataFrame}. Default is \code{spectraVariables(object)}, i.e. all
+available variables.}
+
+\item{value}{A vector with values to replace the respective spectra
+variable. Needs to be of the correct data type for the spectra variable.}
+
+\item{f}{For \code{intensity()}, \code{mz()} and \code{peaksData()}: factor defining how
+data should be chunk-wise loaded an processed. Defaults to
+\code{\link[=processingChunkFactor]{processingChunkFactor()}}.}
+
+\item{...}{Additional arguments.}
+
+\item{x}{A \code{Spectra} object.}
+
+\item{use.names}{For \code{lengths()}: ignored.}
+
+\item{columns}{For \code{spectraData()} accessor: optional \code{character} with
+column names (spectra variables) that should be included in the
+returned \code{DataFrame}. By default, all columns are returned.
+For \code{peaksData()} accessor: optional \code{character} with requested columns
+in the individual \code{matrix} of the returned \code{list}. Defaults to
+\code{c("mz", "value")} but any values returned by \code{peaksVariables(object)}
+with \code{object} being the \code{Spectra} object are supported.}
+
+\item{BPPARAM}{Parallel setup configuration. See \code{\link[=bpparam]{bpparam()}} for more
+information. See also \code{\link[=processingChunkSize]{processingChunkSize()}} for more information
+on parallel processing.}
+
+\item{initial}{For \code{tic()}: \code{logical(1)} whether the initially
+reported total ion current should be reported, or whether the
+total ion current should be (re)calculated on the actual data
+(\code{initial = FALSE}, same as \code{ionCount()}).}
+
+\item{name}{For \code{$} and \verb{$<-}: the name of the spectra variable to return
+or set.}
+
+\item{j}{For \code{[}: not supported.}
+}
+\description{
+As detailed in the documentation of the \link{Spectra} class, a \code{Spectra} object
+is a container for mass spectrometry (MS) data that includes both the mass
+peaks data (or \emph{peaks data}, generally \emph{m/z} and intensity values) as well
+as spectra metadata (so called \emph{spectra variables}). Spectra variables
+generally define one value per spectrum, while for peaks variables one value
+per mass peak is defined and hence multiple values per spectrum (depending
+on the number of mass peaks of a spectrum).
+
+Data can be extracted from a \code{Spectra} object using dedicated accessor
+functions or also using the \code{$} operator. Depending on the backend class
+used by the \code{Spectra} to represent the data, data can also be added or
+replaced (again, using dedicated functions or using \verb{$<-}).
+}
+\section{Spectra variables}{
+
+
+A common set of \emph{core spectra variables} are defined for \code{Spectra}. These
+have a pre-defined data type and each \code{Spectra} will return a value for
+these if requested. If no value for a spectra variable is defined, a missing
+value (of the correct data type) is returned. The list of core spectra
+variables and their respective data type is:
+\itemize{
+\item \emph{acquisitionNum} \code{integer(1)}: the index of acquisition of a spectrum
+during an MS run.
+\item \emph{centroided} \code{logical(1)}: whether the spectrum is in profile or centroid
+mode.
+\item \emph{collisionEnergy} \code{numeric(1)}: collision energy used to create an MSn
+spectrum.
+\item \emph{dataOrigin} \code{character(1)}: the \emph{origin} of the spectrum's data, e.g. the
+mzML file from which it was read.
+\item \emph{dataStorage} \code{character(1)}: the (current) storage location of the
+spectrum data. This value depends on the backend used to handle and
+provide the data. For an \emph{in-memory} backend like the \code{MsBackendDataFrame}
+this will be \code{"<memory>"}, for an on-disk backend such as the
+\code{MsBackendHdf5Peaks} it will be the name of the HDF5 file where the
+spectrum's peak data is stored.
+\item \emph{isolationWindowLowerMz} \code{numeric(1)}: lower m/z for the isolation
+window in which the (MSn) spectrum was measured.
+\item \emph{isolationWindowTargetMz} \code{numeric(1)}: the target m/z for the isolation
+window in which the (MSn) spectrum was measured.
+\item \emph{isolationWindowUpperMz} \code{numeric(1)}: upper m/z for the isolation window
+in which the (MSn) spectrum was measured.
+\item \emph{msLevel} \code{integer(1)}: the MS level of the spectrum.
+\item \emph{polarity} \code{integer(1)}: the polarity of the spectrum (\code{0} and \code{1}
+representing negative and positive polarity, respectively).
+\item \emph{precScanNum} \code{integer(1)}: the scan (acquisition) number of the precursor
+for an MSn spectrum.
+\item \emph{precursorCharge} \code{integer(1)}: the charge of the precursor of an MSn
+spectrum.
+\item \emph{precursorIntensity} \code{numeric(1)}: the intensity of the precursor of an
+MSn spectrum.
+\item \emph{precursorMz} \code{numeric(1)}: the m/z of the precursor of an MSn spectrum.
+\item \emph{rtime} \code{numeric(1)}: the retention time of a spectrum.
+\item \emph{scanIndex} \code{integer(1)}: the index of a spectrum within a (raw) file.
+\item \emph{smoothed} \code{logical(1)}: whether the spectrum was smoothed.
+}
+
+For each of these spectra variable a dedicated accessor function is defined
+(such as \code{msLevel()} or \code{rtime()}) that allows to extract the values of
+that spectra variable for all spectra in a \code{Spectra} object. Also,
+replacement functions are defined, but not all backends might support
+replacing values for spectra variables. As described above, additional
+spectra variables can be defined or added. The \code{spectraVariables()} function
+can be used to
+
+Values for multiple spectra variables, or all spectra vartiables* can be
+extracted with the \code{spectraData()} function.
+}
+
+\section{Peaks variables}{
+
+
+\code{Spectra} also provide mass peak data with the \emph{m/z} and intensity values
+being the \emph{core} peaks variables:
+\itemize{
+\item \emph{intensity} \code{numeric}: intensity values for the spectrum's peaks.
+\item \emph{mz} \code{numeric}: the m/z values for the spectrum's peaks.
+}
+
+Values for these can be extracted with the \code{mz()} and \code{intensity()}
+functions, or the \code{peaksData()} function. The former functions return a
+\code{NumericList} with the respective values, while the latter returns a \code{List}
+with \code{numeric} two-column matrices. The list of peaks matrices can also
+be extracted using \code{as(x, "list")} or \code{as(x, "SimpleList")} with \code{x} being
+a \code{Spectra} object.
+
+Some \code{Spectra}/backends provide also values for additional peaks variables.
+The set of available peaks variables can be extracted with the
+\code{peaksVariables()} function.
+}
+
+\section{Functions to access MS data}{
+
+
+The set of available functions to extract data from, or set data in, a
+\code{Spectra} object are (in alphabetical order) listed below. Note that there
+are also other functions to extract information from a \code{Spectra} object
+documented in \code{\link[=addProcessing]{addProcessing()}}.
+\itemize{
+\item \code{$}, \verb{$<-}: gets (or sets) a spectra variable for all spectra in \code{object}.
+See examples for details. Note that replacing values of a peaks variable
+is not supported with a non-empty processing queue, i.e. if any filtering
+or data manipulations on the peaks data was performed. In these cases
+\code{\link[=applyProcessing]{applyProcessing()}} needs to be called first to apply all cached data
+operations.
+\item \code{[[}, \verb{[[<-}: access or set/add a single spectrum variable (column) in the
+backend.
+\item \code{acquisitionNum()}: returns the acquisition number of each
+spectrum. Returns an \code{integer} of length equal to the number of
+spectra (with \code{NA_integer_} if not available).
+\item \code{asDataFrame()}: converts the \code{Spectra} to a \code{DataFrame} (in long format)
+contining all data. Returns a \code{DataFrame}.
+\item \code{centroided()}, \verb{centroided<-}: gets or sets the centroiding
+information of the spectra. \code{centroided()} returns a \code{logical}
+vector of length equal to the number of spectra with \code{TRUE} if a
+spectrum is centroided, \code{FALSE} if it is in profile mode and \code{NA}
+if it is undefined. See also \code{isCentroided()} for estimating from
+the spectrum data whether the spectrum is centroided. \code{value}
+for \verb{centroided<-} is either a single \code{logical} or a \code{logical} of
+length equal to the number of spectra in \code{object}.
+\item \code{collisionEnergy()}, \verb{collisionEnergy<-}: gets or sets the
+collision energy for all spectra in \code{object}. \code{collisionEnergy()}
+returns a \code{numeric} with length equal to the number of spectra
+(\code{NA_real_} if not present/defined), \verb{collisionEnergy<-} takes a
+\code{numeric} of length equal to the number of spectra in \code{object}.
+\item \code{coreSpectraVariables()}: returns the \emph{core} spectra variables along with
+their expected data type.
+\item \code{dataOrigin()}, \verb{dataOrigin<-}: gets or sets the \emph{data origin} for each
+spectrum. \code{dataOrigin()} returns a \code{character} vector (same length than
+\code{object}) with the origin of the spectra. \verb{dataOrigin<-} expects a
+\code{character} vector (same length than \code{object}) with the replacement
+values for the data origin of each spectrum.
+\item \code{dataStorage()}: returns a \code{character} vector (same length than \code{object})
+with the data storage location of each spectrum.
+\item \code{intensity()}: gets the intensity values from the spectra. Returns
+a \code{\link[=NumericList]{NumericList()}} of \code{numeric} vectors (intensity values for each
+spectrum). The length of the list is equal to the number of
+\code{spectra} in \code{object}.
+\item \code{ionCount()}: returns a \code{numeric} with the sum of intensities for
+each spectrum. If the spectrum is empty (see \code{isEmpty()}),
+\code{NA_real_} is returned.
+\item \code{isCentroided()}: a heuristic approach assessing if the spectra in
+\code{object} are in profile or centroided mode. The function takes
+the \code{qtl}th quantile top peaks, then calculates the difference
+between adjacent m/z value and returns \code{TRUE} if the first
+quartile is greater than \code{k}. (See \code{Spectra:::.isCentroided()} for
+the code.)
+\item \code{isEmpty()}: checks whether a spectrum in \code{object} is empty
+(i.e. does not contain any peaks). Returns a \code{logical} vector of
+length equal number of spectra.
+\item \code{isolationWindowLowerMz()}, \verb{isolationWindowLowerMz<-}: gets or sets the
+lower m/z boundary of the isolation window.
+\item \code{isolationWindowTargetMz()}, \verb{isolationWindowTargetMz<-}: gets or sets the
+target m/z of the isolation window.
+\item \code{isolationWindowUpperMz()}, \verb{isolationWindowUpperMz<-}: gets or sets the
+upper m/z boundary of the isolation window.
+\item \code{length()}: gets the number of spectra in the object.
+\item \code{lengths()}: gets the number of peaks (m/z-intensity values) per
+spectrum. Returns an \code{integer} vector (length equal to the
+number of spectra). For empty spectra, \code{0} is returned.
+\item \code{msLevel()}: gets the spectra's MS level. Returns an integer vector (names
+being spectrum names, length equal to the number of spectra) with the MS
+level for each spectrum.
+\item \code{mz()}: gets the mass-to-charge ratios (m/z) from the
+spectra. Returns a \code{\link[=NumericList]{NumericList()}} or length equal to the number of
+spectra, each element a \code{numeric} vector with the m/z values of
+one spectrum.
+\item \code{peaksData()}: gets the \emph{peaks} data for all spectra in \code{object}. Peaks
+data consist of the m/z and intensity values as well as possible additional
+annotations (variables) of all peaks of each spectrum. The function
+returns a \code{\link[=SimpleList]{SimpleList()}} of two dimensional arrays (either \code{matrix} or
+\code{data.frame}), with each array providing the values for the requested
+\emph{peak variables} (by default \code{"mz"} and \code{"intensity"}). Optional parameter
+\code{columns} is passed to the backend's \code{peaksData()} function to allow
+the selection of specific (or additional) peaks variables (columns) that
+should be extracted (if available). Importantly,
+it is \strong{not} guaranteed that each backend supports this parameter (while
+each backend must support extraction of \code{"mz"} and \code{"intensity"} columns).
+Parameter \code{columns} defaults to \code{c("mz", "intensity")} but any value
+returned by \code{peaksVariables(object)} is supported.
+Note also that it is possible to extract the peak data with
+\code{as(x, "list")} and \code{as(x, "SimpleList")} as a \code{list} and \code{SimpleList},
+respectively. Note however that, in contrast to \code{peaksData()}, \code{as()}
+does not support the parameter \code{columns}.
+\item \code{peaksVariables()}: lists the available variables for mass peaks provided
+by the backend. Default peak variables are \code{"mz"} and \code{"intensity"} (which
+all backends need to support and provide), but some backends might provide
+additional variables.
+These variables correspond to the column names of the peak data array
+returned by \code{peaksData()}.
+\item \code{polarity()}, \verb{polarity<-}: gets or sets the polarity for each
+spectrum. \code{polarity()} returns an \code{integer} vector (length equal
+to the number of spectra), with \code{0} and \code{1} representing negative
+and positive polarities, respectively. \verb{polarity<-} expects an
+\code{integer} vector of length 1 or equal to the number of spectra.
+\item \code{precursorCharge()}, \code{precursorIntensity()}, \code{precursorMz()},
+\code{precScanNum()}, \code{precAcquisitionNum()}: gets the charge (\code{integer}),
+intensity (\code{numeric}), m/z (\code{numeric}), scan index (\code{integer})
+and acquisition number (\code{interger}) of the precursor for MS level >
+2 spectra from the object. Returns a vector of length equal to
+the number of spectra in \code{object}. \code{NA} are reported for MS1
+spectra of if no precursor information is available.
+\item \code{rtime()}, \verb{rtime<-}: gets or sets the retention times (in seconds)
+for each spectrum.  \code{rtime()} returns a \code{numeric} vector (length
+equal to the number of spectra) with the retention time for each
+spectrum.  \verb{rtime<-} expects a numeric vector with length equal
+to the number of spectra.
+\item \code{scanIndex()}: returns an \code{integer} vector with the \emph{scan index}
+for each spectrum. This represents the relative index of the
+spectrum within each file. Note that this can be different to the
+\code{acquisitionNum} of the spectrum which represents the index of the
+spectrum during acquisition/measurement (as reported in the mzML file).
+\item \code{smoothed()},\verb{smoothed<-}: gets or sets whether a spectrum is
+\emph{smoothed}. \code{smoothed()} returns a \code{logical} vector of length equal
+to the number of spectra. \verb{smoothed<-} takes a \code{logical} vector
+of length 1 or equal to the number of spectra in \code{object}.
+\item \code{spectraData()}: gets general spectrum metadata (annotation, also called
+header). \code{spectraData()} returns a \code{DataFrame}. Note that this
+method does by default \strong{not} return m/z or intensity values.
+\item \verb{spectraData<-}: \strong{replaces} the full spectra data of the \code{Spectra}
+object with the one provided with \code{value}. The \verb{spectraData<-} function
+expects a \code{DataFrame} to be passed as value with the same number of rows
+as there a spectra in \code{object}. Note that replacing values of
+peaks variables is not supported with a non-empty processing queue, i.e.
+if any filtering or data manipulations on the peaks data was performed.
+In these cases \code{\link[=applyProcessing]{applyProcessing()}} needs to be called first to apply all
+cached data operations and empty the processing queue.
+\item \code{spectraNames()}, \verb{spectraNames<-}: gets or sets the spectra names.
+\item \code{spectraVariables()}: returns a \code{character} vector with the
+available spectra variables (columns, fields or attributes of each
+spectrum) available in \code{object}. Note that \code{spectraVariables()} does not
+list the \emph{peak variables} (\code{"mz"}, \code{"intensity"} and eventual additional
+annotations for each MS peak). Peak variables are returned by
+\code{peaksVariables()}.
+\item \code{tic()}: gets the total ion current/count (sum of signal of a
+spectrum) for all spectra in \code{object}. By default, the value
+reported in the original raw data file is returned. For an empty
+spectrum, \code{0} is returned.
+\item \code{uniqueMsLevels()}: get the unique MS levels available in \code{object}. This
+function is supposed to be more efficient than \code{unique(msLevel(object))}.
+}
+}
+
+\examples{
+
+## Create a Spectra from mzML files and use the `MsBackendMzR` on-disk
+## backend.
+sciex_file <- dir(system.file("sciex", package = "msdata"),
+    full.names = TRUE)
+sciex <- Spectra(sciex_file, backend = MsBackendMzR())
+sciex
+
+## Get the number of spectra in the data set
+length(sciex)
+
+## Get the number of mass peaks per spectrum - limit to the first 6
+lengths(sciex) |> head()
+
+## Get the MS level for each spectrum - limit to the first 6 spectra
+msLevel(sciex) |> head()
+
+## Alternatively, we could also use $ to access a specific spectra variable.
+## This could also be used to add additional spectra variables to the
+## object (see further below).
+sciex$msLevel |> head()
+
+## Get the intensity and m/z values.
+intensity(sciex)
+mz(sciex)
+
+## Convert a subset of the Spectra object to a long DataFrame.
+asDataFrame(sciex, i = 1:3, spectraVars = c("rtime", "msLevel"))
+
+## Create a Spectra providing a `DataFrame` containing the spectrum data.
+
+spd <- DataFrame(msLevel = c(1L, 2L), rtime = c(1.1, 1.2))
+spd$mz <- list(c(100, 103.2, 104.3, 106.5), c(45.6, 120.4, 190.2))
+spd$intensity <- list(c(200, 400, 34.2, 17), c(12.3, 15.2, 6.8))
+
+s <- Spectra(spd)
+s
+
+## List all available spectra variables (i.e. spectrum data and metadata).
+spectraVariables(s)
+
+## For all *core* spectrum variables accessor functions are available. These
+## return NA if the variable was not set.
+centroided(s)
+dataStorage(s)
+rtime(s)
+precursorMz(s)
+
+## The core spectra variables are:
+coreSpectraVariables()
+
+## Add an additional metadata column.
+s$spectrum_id <- c("sp_1", "sp_2")
+
+## List spectra variables, "spectrum_id" is now also listed
+spectraVariables(s)
+
+## Get the values for the new spectra variable
+s$spectrum_id
+
+## Extract specific spectra variables.
+spectraData(s, columns = c("spectrum_id", "msLevel"))
+
+
+##  --------  PEAKS VARIABLES AND DATA  --------
+
+## Get the peak data (m/z and intensity values).
+pks <- peaksData(s)
+pks
+pks[[1]]
+pks[[2]]
+
+## Note that we could get the same resulb by coercing the `Spectra` to
+## a `list` or `SimpleList`:
+as(s, "list")
+as(s, "SimpleList")
+
+## Or use `mz()` and `intensity()` to extract the m/z and intensity values
+## separately
+mz(s)
+intensity(s)
+
+## Some `MsBackend` classes provide support for arbitrary peaks variables
+## (in addition to the mandatory `"mz"` and `"intensity"` values. Below
+## we create a simple data frame with an additional peak variable `"pk_ann"`
+## and create a `Spectra` with a `MsBackendMemory` for that data.
+## Importantly the number of values (per spectrum) need to be the same
+## for all peak variables.
+
+tmp <- data.frame(msLevel = c(2L, 2L), rtime = c(123.2, 123.5))
+tmp$mz <- list(c(103.1, 110.4, 303.1), c(343.2, 453.1))
+tmp$intensity <- list(c(130.1, 543.1, 40), c(0.9, 0.45))
+tmp$pk_ann <- list(c(NA_character_, "A", "P"), c("B", "P"))
+
+## Create the Spectra. With parameter `peaksVariables` we can define
+## the columns in `tmp` that contain peaks variables.
+sps <- Spectra(tmp, source = MsBackendMemory(),
+    peaksVariables = c("mz", "intensity", "pk_ann"))
+peaksVariables(sps)
+
+## Extract just the m/z and intensity values
+peaksData(sps)[[1L]]
+
+## Extract the full peaks data
+peaksData(sps, columns = peaksVariables(sps))[[1L]]
+
+## Access just the pk_ann variable
+sps$pk_ann
+
+
+}
+\seealso{
+\itemize{
+\item \code{\link[=addProcessing]{addProcessing()}} for functions to analyze \code{Spectra}.
+\item \link{Spectra} for a general description of the \code{Spectra} object.
+}
+}
+\author{
+Sebastian Gibb, Johannes Rainer, Laurent Gatto, Philippine Louail
+}