diff --git a/DESCRIPTION b/DESCRIPTION index 6608f837..5e2617d8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.15.0 +Version: 1.13.8 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different @@ -73,7 +73,7 @@ BugReports: https://github.com/RforMassSpectrometry/Spectra/issues URL: https://github.com/RforMassSpectrometry/Spectra biocViews: Infrastructure, Proteomics, MassSpectrometry, Metabolomics Encoding: UTF-8 -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.1 Roxygen: list(markdown=TRUE) Collate: 'hidden_aliases.R' diff --git a/NAMESPACE b/NAMESPACE index 3e9d518a..aef9e98e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -70,7 +70,6 @@ exportMethods(backendMerge) exportMethods(backendParallelFactor) exportMethods(bin) exportMethods(c) -exportMethods(cbind2) exportMethods(centroided) exportMethods(collisionEnergy) exportMethods(combinePeaks) @@ -297,5 +296,4 @@ importMethodsFrom(S4Vectors,extractROWS) importMethodsFrom(S4Vectors,isEmpty) importMethodsFrom(S4Vectors,lapply) importMethodsFrom(S4Vectors,split) -importMethodsFrom(methods,cbind2) importMethodsFrom(methods,show) diff --git a/NEWS.md b/NEWS.md index e76fb0bb..0f67f490 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,4 @@ -# Spectra 1.15 - -## Changes in 1.15.0 - -- Add `cbind2()` method to easily add multiple `spectraVariables` to the - `spectraData` +# Spectra 1.13 ## Changes in 1.13.8 diff --git a/R/MsBackend.R b/R/MsBackend.R index 74945cbf..9528c628 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -178,10 +178,6 @@ #' values to filter the `object`. `values` needs to be of same length than #' parameter `spectraVariables` and in the same order. #' -#' @param y for `cbind2()`: A `data.frame` or `DataFrame` with the -#' spectra variables to be added to the backend. Need to be of the same -#' length as the number of spectra in the backend. -#' #' @param x Object extending `MsBackend`. #' #' @param ... Additional arguments. @@ -290,11 +286,6 @@ #' `dropNaSpectraVariables()` might still show columns containing `NA` values #' for *core* spectra variables. #' -#' - `cbind2()`: allows to appends multiple spectra variables to the backend at -#' once. It does so *blindly* and is therefore at the risk of the user. For a -#' more controlled way of adding spectra variables, the `joinSpectraData()` -#' should be used. -#' #' - `centroided()`, `centroided<-`: gets or sets the centroiding #' information of the spectra. `centroided()` returns a `logical` #' vector of length equal to the number of spectra with `TRUE` if a @@ -969,26 +960,6 @@ setMethod("peaksVariables", "MsBackend", function(object) { c("mz", "intensity") }) - -setClassUnion("dataframeOrDataFrameOrmatrix", c("data.frame", "DataFrame", "matrix")) -#' @exportMethod cbind2 -#' -#' @importMethodsFrom methods cbind2 -#' -#' @rdname MsBackend -setMethod("cbind2", signature = c("MsBackend", "dataframeOrDataFrameOrmatrix"), - function(x, y = data.frame(), ...) { - if (is(y, "matrix")) - y <- as.data.frame(y) - if (nrow(y) != length(x)) - stop("Length of 'y' does not match the number of spectra in 'x'") - for (i in colnames(y)) { - x[[i]] <- y[, i] - } - x -}) - - #' @exportMethod centroided #' #' @aliases centroided<-,MsBackend-method diff --git a/R/MsBackendDataFrame.R b/R/MsBackendDataFrame.R index a97a36fc..424e64da 100644 --- a/R/MsBackendDataFrame.R +++ b/R/MsBackendDataFrame.R @@ -548,25 +548,6 @@ setMethod("[", "MsBackendDataFrame", function(x, i, j, ..., drop = FALSE) { .subset_backend_data_frame(x, i) }) -setClassUnion("dataframeOrDataFrameOrmatrix", - c("data.frame", "DataFrame", "matrix")) -#' @exportMethod cbind2 -#' -#' @importMethodsFrom methods cbind2 -#' -#' @rdname hidden_aliases -setMethod("cbind2", signature = c("MsBackendDataFrame", - "dataframeOrDataFrameOrmatrix"), - function(x, y = data.frame(), ...) { - if (is(y, "matrix")) - y <- as.data.frame(y) - if (nrow(y) != length(x)) - stop("Length of 'y' does not match the number of spectra in 'x'") - x@spectraData <- cbind(x@spectraData, y) - validObject(x) - x - }) - #' @rdname hidden_aliases setMethod("split", "MsBackendDataFrame", function(x, f, drop = FALSE, ...) { if (!is.factor(f)) diff --git a/R/MsBackendMemory.R b/R/MsBackendMemory.R index 3f6770c2..d38722ab 100644 --- a/R/MsBackendMemory.R +++ b/R/MsBackendMemory.R @@ -651,25 +651,6 @@ setMethod("[", "MsBackendMemory", function(x, i, j, ..., drop = FALSE) { .df_subset(x, i) }) -setClassUnion("dataframeOrDataFrameOrmatrix", - c("data.frame", "DataFrame", "matrix")) -#' @exportMethod cbind2 -#' -#' @importMethodsFrom methods cbind2 -#' -#' @rdname hidden_aliases -setMethod("cbind2", signature = c("MsBackendMemory", - "dataframeOrDataFrameOrmatrix"), - function(x, y = data.frame(), ...) { - if (is(y, "matrix")) - y <- as.data.frame(y) - if (nrow(y) != length(x)) - stop("Length of 'y' does not match the number of spectra in 'x'") - x@spectraData <- cbind(x@spectraData, y) - validObject(x) - x - }) - #' @rdname hidden_aliases setMethod("split", "MsBackendMemory", function(x, f, drop = FALSE, ...) { if (!is.factor(f)) diff --git a/R/Spectra.R b/R/Spectra.R index 0e78b954..291739f5 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -330,15 +330,6 @@ NULL #' - `[`: subsets the spectra keeping only selected elements (`i`). The method #' **always** returns a `Spectra` object. #' -#' - `cbind2()`: Appends multiple spectra variables from a `data.frame`, -#' `DataFrame` or `matrix` to the `Spectra` object at once. It does so -#' *blindly* (e.g. do not check rownames compatibility) and is therefore at -#' the risk of the user. For a more controlled way of adding spectra -#' variables, the `joinSpectraData()` should be used. It will return a -#' `Spectra` object with the appended spectra variables. `cbind2()` does -#' check however that the number of rows of the `data.frame` or `DataFrame` -#' matches the number of spectra in the `Spectra` object. -#' #' - `deisotopeSpectra()`: *deisotopes* each spectrum keeping only the #' monoisotopic peak for groups of isotopologues. Isotopologues are #' estimated using the [isotopologues()] function from the @@ -542,8 +533,11 @@ NULL #' should be explored and ideally be removed using for #' `QFeatures::reduceDataFrame()`, `PMS::reducePSMs()` or similar #' functions. +<<<<<<< HEAD #' For a more general function that allows to append `data.frame`, #' `DataFrame` and `matrix` see `cbind2()`. +======= +>>>>>>> parent of d063996 (Addition of cbind2()) #' #' Several `Spectra` objects can be concatenated into a single object with the #' `c()` or the `concatenateSpectra()` function. Concatenation will fail if the @@ -1099,9 +1093,7 @@ NULL #' #' @param x A `Spectra` object. #' -#' @param y A `Spectra` object. -#' - For `joinSpectraData()`: a `DataFrame`. -#' - For `cbind2()` a `data.frame`, `DataFrame` or `matrix`. +#' @param y A `Spectra` object. A `DataFrame` for `joinSpectraData()`. #' #' @param z For `filterPrecursorCharge()`: `integer()` with the precursor #' charges to be used as filter. @@ -1255,10 +1247,6 @@ NULL #' ## Subset to all MS2 spectra. #' data[msLevel(data) == 2] #' -#' ## Append new `spectraVariables` to the `spectraData` -#' df <- data.frame(cola = 4:5, colb = "b") -#' data_append <- cbind2(data, df) -#' #' ## Same with the filterMsLevel function #' filterMsLevel(data, 2) #' @@ -2229,16 +2217,6 @@ setMethod("[", "Spectra", function(x, i, j, ..., drop = FALSE) { x }) -setClassUnion("dataframeOrDataFrame", c("data.frame", "DataFrame")) -#' @rdname Spectra -#' -#' @export -setMethod("cbind2", signature(x = "Spectra", - y = "dataframeOrDataFrame"), function(x, y, ...) { - x@backend <- cbind2(x@backend, y, ...) - x - }) - #' @rdname Spectra setMethod("filterAcquisitionNum", "Spectra", function(object, n = integer(), dataStorage = character(), diff --git a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R index 84a69f60..fe10f10c 100644 --- a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R +++ b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R @@ -51,19 +51,6 @@ test_that("[", { expect_true(length(res) == 0L) }) -test_that("cbind2 works", { - seql <- length(be) - df <- data.frame(cola = seq_len(seql), colb = "b", colz = "z") - res <- cbind2(be, df) - expect_true(validObject(res)) - expect_equal(ncol(spectraData(res)), length(spectraVariables(be)) + 3) - expect_equal(res$cola, seq_len(seql)) - expect_equal(res$colb, rep("b", seql)) - expect_equal(res$colz, rep("z", seql)) - df2 <- data.frame(cola = 3:6, colb = "b", colz = "z") - expect_error(cbind2(be, df2), "does not match") -}) - #' dropNASpectraVariables: only for not read-only #' core spectra variables don't get removed, even if only NA. test_that("dropNaSpectraVariables", { diff --git a/man/MsBackend.Rd b/man/MsBackend.Rd index dc410ae4..1ee1d331 100644 --- a/man/MsBackend.Rd +++ b/man/MsBackend.Rd @@ -26,7 +26,6 @@ \alias{acquisitionNum,MsBackend-method} \alias{peaksData,MsBackend-method} \alias{peaksVariables,MsBackend-method} -\alias{cbind2,MsBackend,dataframeOrDataFrameOrmatrix-method} \alias{centroided,MsBackend-method} \alias{centroided<-,MsBackend-method} \alias{collisionEnergy,MsBackend-method} @@ -120,8 +119,6 @@ \S4method{peaksVariables}{MsBackend}(object) -\S4method{cbind2}{MsBackend,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...) - \S4method{centroided}{MsBackend}(object) \S4method{centroided}{MsBackend}(object) <- value @@ -302,12 +299,6 @@ in the individual \code{matrix} of the returned \code{list}. Defaults to \code{peaksVariables(object)} and depends on what \emph{peaks variables} the backend provides.} -\item{x}{Object extending \code{MsBackend}.} - -\item{y}{for \code{cbind2()}: A \code{data.frame} or \code{DataFrame} with the -spectra variables to be added to the backend. Need to be of the same -length as the number of spectra in the backend.} - \item{value}{replacement value for \verb{<-} methods. See individual method description or expected data type.} @@ -395,6 +386,8 @@ to be used to subset/filter \code{object}.} values to filter the \code{object}. \code{values} needs to be of same length than parameter \code{spectraVariables} and in the same order.} +\item{x}{Object extending \code{MsBackend}.} + \item{use.names}{For \code{lengths()}: whether spectrum names should be used.} \item{drop}{For \code{[}: not considered.} @@ -571,10 +564,6 @@ object's \code{spectraData} that contain only missing values (\code{NA}). Note t while columns with only \code{NA}s are removed, a \code{spectraData()} call after \code{dropNaSpectraVariables()} might still show columns containing \code{NA} values for \emph{core} spectra variables. -\item \code{cbind2()}: allows to appends multiple spectra variables to the backend at -once. It does so \emph{blindly} and is therefore at the risk of the user. For a -more controlled way of adding spectra variables, the \code{joinSpectraData()} -should be used. \item \code{centroided()}, \verb{centroided<-}: gets or sets the centroiding information of the spectra. \code{centroided()} returns a \code{logical} vector of length equal to the number of spectra with \code{TRUE} if a diff --git a/man/Spectra.Rd b/man/Spectra.Rd index f6a2ebc3..b82feee9 100644 --- a/man/Spectra.Rd +++ b/man/Spectra.Rd @@ -81,7 +81,6 @@ \alias{$<-,Spectra-method} \alias{[[,Spectra-method} \alias{[[<-,Spectra-method} -\alias{cbind2,Spectra,dataframeOrDataFrame-method} \alias{filterAcquisitionNum,Spectra-method} \alias{filterEmptySpectra,Spectra-method} \alias{filterDataOrigin,Spectra-method} @@ -223,6 +222,7 @@ filterPrecursorPeaks( \S4method{dataStorageBasePath}{Spectra}(object) <- value +<<<<<<< HEAD \S4method{cbind2}{Spectra,dataframeOrDataFrame}(x, y, ...) <<<<<<< HEAD } @@ -232,6 +232,8 @@ object and initialize the with data.. See section on creation of \code{Spectra} objects for details. For all other methods a \code{Spectra} object.} ======= +======= +>>>>>>> parent of d063996 (Addition of cbind2()) \S4method{filterAcquisitionNum}{Spectra}( object, n = integer(), @@ -436,9 +438,7 @@ into the same bin. Defaults to \code{FUN = sum} thus summing up intensities. For \code{spectrapply()} and \code{chunkapply()}: function to be applied to \code{Spectra}.} -\item{y}{A \code{Spectra} object. -- For \code{joinSpectraData()}: a \code{DataFrame}. -- For \code{cbind2()} a \code{data.frame}, \code{DataFrame} or \code{matrix}.} +\item{y}{A \code{Spectra} object. A \code{DataFrame} for \code{joinSpectraData()}.} \item{by.x}{A \code{character(1)} specifying the spectra variable used for merging. Default is \code{"spectrumId"}.} @@ -846,6 +846,577 @@ parameter \code{backend}. } } +<<<<<<< HEAD +======= +\section{Accessing spectra data}{ + +\itemize{ +\item \code{$}, \verb{$<-}: gets (or sets) a spectra variable for all spectra in \code{object}. +See examples for details. Note that replacing values of a peaks variable +is not supported with a non-empty processing queue, i.e. if any filtering +or data manipulations on the peaks data was performed. In these cases +\code{\link[=applyProcessing]{applyProcessing()}} needs to be called first to apply all cached data +operations. +\item \code{[[}, \verb{[[<-}: access or set/add a single spectrum variable (column) in the +backend. +\item \code{acquisitionNum()}: returns the acquisition number of each +spectrum. Returns an \code{integer} of length equal to the number of +spectra (with \code{NA_integer_} if not available). +\item \code{centroided()}, \verb{centroided<-}: gets or sets the centroiding +information of the spectra. \code{centroided()} returns a \code{logical} +vector of length equal to the number of spectra with \code{TRUE} if a +spectrum is centroided, \code{FALSE} if it is in profile mode and \code{NA} +if it is undefined. See also \code{isCentroided()} for estimating from +the spectrum data whether the spectrum is centroided. \code{value} +for \verb{centroided<-} is either a single \code{logical} or a \code{logical} of +length equal to the number of spectra in \code{object}. +\item \code{collisionEnergy()}, \verb{collisionEnergy<-}: gets or sets the +collision energy for all spectra in \code{object}. \code{collisionEnergy()} +returns a \code{numeric} with length equal to the number of spectra +(\code{NA_real_} if not present/defined), \verb{collisionEnergy<-} takes a +\code{numeric} of length equal to the number of spectra in \code{object}. +\item \code{coreSpectraVariables()}: returns the \emph{core} spectra variables along with +their expected data type. +\item \code{dataOrigin()}, \verb{dataOrigin<-}: gets or sets the \emph{data origin} for each +spectrum. \code{dataOrigin()} returns a \code{character} vector (same length than +\code{object}) with the origin of the spectra. \verb{dataOrigin<-} expects a +\code{character} vector (same length than \code{object}) with the replacement +values for the data origin of each spectrum. +\item \code{dataStorage()}: returns a \code{character} vector (same length than \code{object}) +with the data storage location of each spectrum. +\item \code{intensity()}: gets the intensity values from the spectra. Returns +a \code{\link[=NumericList]{NumericList()}} of \code{numeric} vectors (intensity values for each +spectrum). The length of the list is equal to the number of +\code{spectra} in \code{object}. +\item \code{ionCount()}: returns a \code{numeric} with the sum of intensities for +each spectrum. If the spectrum is empty (see \code{isEmpty()}), +\code{NA_real_} is returned. +\item \code{isCentroided()}: a heuristic approach assessing if the spectra in +\code{object} are in profile or centroided mode. The function takes +the \code{qtl}th quantile top peaks, then calculates the difference +between adjacent m/z value and returns \code{TRUE} if the first +quartile is greater than \code{k}. (See \code{Spectra:::.isCentroided()} for +the code.) +\item \code{isEmpty()}: checks whether a spectrum in \code{object} is empty +(i.e. does not contain any peaks). Returns a \code{logical} vector of +length equal number of spectra. +\item \code{isolationWindowLowerMz()}, \verb{isolationWindowLowerMz<-}: gets or sets the +lower m/z boundary of the isolation window. +\item \code{isolationWindowTargetMz()}, \verb{isolationWindowTargetMz<-}: gets or sets the +target m/z of the isolation window. +\item \code{isolationWindowUpperMz()}, \verb{isolationWindowUpperMz<-}: gets or sets the +upper m/z boundary of the isolation window. +\item \code{containsMz()}: checks for each of the spectra whether they contain mass +peaks with an m/z equal to \code{mz} (given acceptable difference as defined by +parameters \code{tolerance} and \code{ppm} - see \code{\link[=common]{common()}} for details). Parameter +\code{which} allows to define whether any (\code{which = "any"}, the default) or +all (\code{which = "all"}) of the \code{mz} have to match. The function returns +\code{NA} if \code{mz} is of length 0 or is \code{NA}. +\item \code{containsNeutralLoss()}: checks for each spectrum in \code{object} if it has a +peak with an m/z value equal to its precursor m/z - \code{neutralLoss} (given +acceptable difference as defined by parameters \code{tolerance} and \code{ppm}). +Returns \code{NA} for MS1 spectra (or spectra without a precursor m/z). +\item \code{length()}: gets the number of spectra in the object. +\item \code{lengths()}: gets the number of peaks (m/z-intensity values) per +spectrum. Returns an \code{integer} vector (length equal to the +number of spectra). For empty spectra, \code{0} is returned. +\item \code{msLevel()}: gets the spectra's MS level. Returns an integer vector (names +being spectrum names, length equal to the number of spectra) with the MS +level for each spectrum. +\item \code{mz()}: gets the mass-to-charge ratios (m/z) from the +spectra. Returns a \code{\link[=NumericList]{NumericList()}} or length equal to the number of +spectra, each element a \code{numeric} vector with the m/z values of +one spectrum. +\item \code{peaksData()}: gets the \emph{peaks} data for all spectra in \code{object}. Peaks +data consist of the m/z and intensity values as well as possible additional +annotations (variables) of all peaks of each spectrum. The function +returns a \code{\link[=SimpleList]{SimpleList()}} of two dimensional arrays (either \code{matrix} or +\code{data.frame}), with each array providing the values for the requested +\emph{peak variables} (by default \code{"mz"} and \code{"intensity"}). Optional parameter +\code{columns} is passed to the backend's \code{peaksData()} function to allow +the selection of specific (or additional) peaks variables (columns) that +should be extracted (if available). Importantly, +it is \strong{not} guaranteed that each backend supports this parameter (while +each backend must support extraction of \code{"mz"} and \code{"intensity"} columns). +Parameter \code{columns} defaults to \code{c("mz", "intensity")} but any value +returned by \code{peaksVariables(object)} is supported. +Note also that it is possible to extract the peak data with +\code{as(x, "list")} and \code{as(x, "SimpleList")} as a \code{list} and \code{SimpleList}, +respectively. Note however that, in contrast to \code{peaksData()}, \code{as()} +does not support the parameter \code{columns}. +\item \code{peaksVariables()}: lists the available variables for mass peaks provided +by the backend. Default peak variables are \code{"mz"} and \code{"intensity"} (which +all backends need to support and provide), but some backends might provide +additional variables. +These variables correspond to the column names of the peak data array +returned by \code{peaksData()}. +\item \code{polarity()}, \verb{polarity<-}: gets or sets the polarity for each +spectrum. \code{polarity()} returns an \code{integer} vector (length equal +to the number of spectra), with \code{0} and \code{1} representing negative +and positive polarities, respectively. \verb{polarity<-} expects an +\code{integer} vector of length 1 or equal to the number of spectra. +\item \code{precursorCharge()}, \code{precursorIntensity()}, \code{precursorMz()}, +\code{precScanNum()}, \code{precAcquisitionNum()}: gets the charge (\code{integer}), +intensity (\code{numeric}), m/z (\code{numeric}), scan index (\code{integer}) +and acquisition number (\code{interger}) of the precursor for MS level > +2 spectra from the object. Returns a vector of length equal to +the number of spectra in \code{object}. \code{NA} are reported for MS1 +spectra of if no precursor information is available. +\item \code{rtime()}, \verb{rtime<-}: gets or sets the retention times (in seconds) +for each spectrum. \code{rtime()} returns a \code{numeric} vector (length +equal to the number of spectra) with the retention time for each +spectrum. \verb{rtime<-} expects a numeric vector with length equal +to the number of spectra. +\item \code{scanIndex()}: returns an \code{integer} vector with the \emph{scan index} +for each spectrum. This represents the relative index of the +spectrum within each file. Note that this can be different to the +\code{acquisitionNum} of the spectrum which represents the index of the +spectrum during acquisition/measurement (as reported in the mzML file). +\item \code{smoothed()},\verb{smoothed<-}: gets or sets whether a spectrum is +\emph{smoothed}. \code{smoothed()} returns a \code{logical} vector of length equal +to the number of spectra. \verb{smoothed<-} takes a \code{logical} vector +of length 1 or equal to the number of spectra in \code{object}. +\item \code{spectraData()}: gets general spectrum metadata (annotation, also called +header). \code{spectraData()} returns a \code{DataFrame}. Note that this +method does by default \strong{not} return m/z or intensity values. +\item \verb{spectraData<-}: \strong{replaces} the full spectra data of the \code{Spectra} +object with the one provided with \code{value}. The \verb{spectraData<-} function +expects a \code{DataFrame} to be passed as value with the same number of rows +as there a spectra in \code{object}. Note that replacing values of +peaks variables is not supported with a non-empty processing queue, i.e. +if any filtering or data manipulations on the peaks data was performed. +In these cases \code{\link[=applyProcessing]{applyProcessing()}} needs to be called first to apply all +cached data operations and empty the processing queue. +\item \code{spectraNames()}, \verb{spectraNames<-}: gets or sets the spectra names. +\item \code{spectraVariables()}: returns a \code{character} vector with the +available spectra variables (columns, fields or attributes of each +spectrum) available in \code{object}. Note that \code{spectraVariables()} does not +list the \emph{peak variables} (\code{"mz"}, \code{"intensity"} and eventual additional +annotations for each MS peak). Peak variables are returned by +\code{peaksVariables()}. +\item \code{tic()}: gets the total ion current/count (sum of signal of a +spectrum) for all spectra in \code{object}. By default, the value +reported in the original raw data file is returned. For an empty +spectrum, \code{0} is returned. +\item \code{uniqueMsLevels()}: get the unique MS levels available in \code{object}. This +function is supposed to be more efficient than \code{unique(msLevel(object))}. +} +} + +\section{Data subsetting, filtering and merging}{ + + +Subsetting and filtering of \code{Spectra} objects can be performed with the below +listed methods. +\itemize{ +\item \code{[}: subsets the spectra keeping only selected elements (\code{i}). The method +\strong{always} returns a \code{Spectra} object. +\item \code{deisotopeSpectra()}: \emph{deisotopes} each spectrum keeping only the +monoisotopic peak for groups of isotopologues. Isotopologues are +estimated using the \code{\link[=isotopologues]{isotopologues()}} function from the +\emph{MetaboCoreUtils} package. Note that +the default parameters for isotope prediction/detection have been +determined using data from the Human Metabolome Database (HMDB) and +isotopes for elements other than CHNOPS might not be detected. See +parameter \code{substDefinition} in the documentation of \code{\link[=isotopologues]{isotopologues()}} for +more information. The approach and code to define the parameters for +isotope prediction is described +\href{https://github.com/EuracBiomedicalResearch/isotopologues}{here}. +\item \code{dropNaSpectraVariables()}: removes spectra variables (i.e. columns in the +object's \code{spectraData} that contain only missing values (\code{NA}). Note that +while columns with only \code{NA}s are removed, a \code{spectraData()} call after +\code{dropNaSpectraVariables()} might still show columns containing \code{NA} values +for \emph{core} spectra variables. +\item \code{filterAcquisitionNum()}: filters the object keeping only spectra matching +the provided acquisition numbers (argument \code{n}). If \code{dataOrigin} or +\code{dataStorage} is also provided, \code{object} is subsetted to the spectra with +an acquisition number equal to \code{n} \strong{in spectra with matching dataOrigin +or dataStorage values} retaining all other spectra. +Returns the filtered \code{Spectra}. +\item \code{filterDataOrigin()}: filters the object retaining spectra matching the +provided \code{dataOrigin}. Parameter \code{dataOrigin} has to be of type +\code{character} and needs to match exactly the data origin value of the +spectra to subset. +Returns the filtered \code{Spectra} object (with spectra ordered according to +the provided \code{dataOrigin} parameter). +\item \code{filterDataStorage()}: filters the object retaining spectra stored in the +specified \code{dataStorage}. Parameter \code{dataStorage} has to be of type +\code{character} and needs to match exactly the data storage value of the +spectra to subset. +Returns the filtered \code{Spectra} object (with spectra ordered according to +the provided \code{dataStorage} parameter). +\item \code{filterEmptySpectra()}: removes empty spectra (i.e. spectra without peaks). +Returns the filtered \code{Spectra} object (with spectra in their +original order). +\item \code{filterFourierTransformArtefacts()}: removes (Orbitrap) fast fourier +artefact peaks from spectra (see examples below). The function iterates +through all intensity ordered peaks in a spectrum and removes all peaks +with an m/z within +/- \code{halfWindowSize} of the current peak if their +intensity is lower than \code{threshold} times the current peak's intensity. +Additional parameters \code{keepIsotopes}, \code{maxCharge} and \code{isotopeTolerance} +allow to avoid removing of potential \verb{[13]C} isotope peaks (\code{maxCharge} +being the maximum charge that should be considered and \code{isotopeTolerance} +the absolute acceptable tolerance for matching their m/z). +See \code{\link[=filterFourierTransformArtefacts]{filterFourierTransformArtefacts()}} for details and background and +\code{deisitopeSpectra()} for an alternative. +\item \code{filterIntensity()}: filters each spectrum keeping only peaks with +intensities that are within the provided range or match the criteria of +the provided function. For the former, parameter \code{intensity} has to be a +\code{numeric} defining the intensity range, for the latter a \code{function} that +takes the intensity values of the spectrum and returns a \code{logical} whether +the peak should be retained or not (see examples below for details) - +additional parameters to the function can be passed with \code{...}. To +remove only peaks with intensities below a certain threshold, say 100, use +\code{intensity = c(100, Inf)}. Note: also a single value can be passed with +the \code{intensity} parameter in which case an upper limit of \code{Inf} is used. +Note that this function removes also peaks with missing intensities +(i.e. an intensity of \code{NA}). Parameter \code{msLevel.} allows to restrict the +filtering to spectra of the specified MS level(s). +\item \code{filterIsolationWindow()}: retains spectra that contain \code{mz} in their +isolation window m/z range (i.e. with an \code{isolationWindowLowerMz} <= \code{mz} +and \code{isolationWindowUpperMz} >= \code{mz}. Returns the filtered \code{Spectra} +object (with spectra in their original order). +\item \code{filterMsLevel()}: filters object by MS level keeping only spectra matching +the MS level specified with argument \code{msLevel}. Returns the filtered +\code{Spectra} (with spectra in their original order). +\item \code{filterMzRange()}: filters the object keeping or removing peaks in each +spectrum that are within the provided m/z range. Whether peaks are +retained or removed can be configured with parameter \code{keep} (default +\code{keep = TRUE}). +\item \code{filterMzValues()}: filters the object keeping \strong{all} peaks in each +spectrum that match the provided m/z value(s) (for \code{keep = TRUE}, the +default) or removing \strong{all} of them (for \code{keep = FALSE}). The m/z +matching considers also the absolute \code{tolerance} and m/z-relative +\code{ppm} values. \code{tolerance} and \code{ppm} have to be of length 1. +\item \code{filterPolarity()}: filters the object keeping only spectra matching the +provided polarity. Returns the filtered \code{Spectra} (with spectra in their +original order). +\item \code{filterPrecursorCharge()}: retains spectra with the defined precursor +charge(s). +\item \code{filterPrecursorIsotopes()}: groups MS2 spectra based on their precursor +m/z and precursor intensity into predicted isotope groups and keep for each +only the spectrum representing the monoisotopic precursor. MS1 spectra +are returned as is. See documentation for \code{deisotopeSpectra()} below for +details on isotope prediction and parameter description. +\item \code{filterPrecursorMaxIntensity()}: filters the \code{Spectra} keeping for groups +of (MS2) spectra with similar precursor m/z values (given parameters +\code{ppm} and \code{tolerance}) the one with the highest precursor intensity. The +function filters only MS2 spectra and returns all MS1 spectra. If +precursor intensities are \code{NA} for all spectra within a spectra group, the +first spectrum of that groups is returned. +Note: some manufacturers don't provide precursor intensities. These can +however also be estimated with \code{\link[=estimatePrecursorIntensity]{estimatePrecursorIntensity()}}. +\item \code{filterPrecursorMzRange()} (previously \code{filterPrecursorMz()} which is now +deprecated): retains spectra with a precursor m/z within the +provided m/z range. See examples for details on selecting spectra with +a precursor m/z for a target m/z accepting a small difference in \emph{ppm}. +\item \code{filterPrecursorMzValues()}: retains spectra with precursor m/z matching +any of the provided m/z values (given \code{ppm} and \code{tolerance}). Spectra with +missing precursor m/z value (e.g. MS1 spectra) are dropped. +\item \code{filterPrecursorPeaks()}: removes peaks from each spectrum in \code{object} with +an m/z equal or larger than the m/z of the precursor, depending on the +value of parameter \code{mz}: for \verb{mz = ==" (the default) peaks with matching m/z (considering an absolute and relative acceptable difference depending on }tolerance\code{and}ppm\verb{, respectively) are removed. For }mz = ">="\verb{all peaks with an m/z larger or equal to the precursor m/z (minus}tolerance\verb{and the}ppm\verb{of the precursor m/z) are removed. Parameter}msLevel.\verb{allows to restrict the filter to certain MS levels (by default the filter is applied to all MS levels). Note that no peaks are removed if the precursor m/z is}NA` (e.g. typically for MS1 spectra). +\item \code{filterPrecursorScan()}: retains parent (e.g. MS1) and children scans (e.g. +MS2) of acquisition number \code{acquisitionNum}. Returns the filtered +\code{Spectra} (with spectra in their original order). Parameter \code{f} allows to +define which spectra belong to the same sample or original data file ( +defaults to \code{f = dataOrigin(object)}). +\item \code{filterRt()}: retains spectra of MS level \code{msLevel} with retention +times (in seconds) within (\code{>=}) \code{rt[1]} and (\code{<=}) +\code{rt[2]}. Returns the filtered \code{Spectra} (with spectra in their +original order). +\item \code{filterRanges()}: allows filtering of the \code{Spectra} object based on user +defined \emph{numeric} ranges (parameter \code{ranges}) for one or more available +spectra variables in object (spectra variable names can be specified with +parameter \code{spectraVariables}). Spectra for which the value of a spectra +variable is within it's defined range are retained. If multiple +ranges/spectra variables are defined, the \code{match} parameter can be used +to specify whether all conditions (\code{match = "all"}; the default) or if +any of the conditions must match (\code{match = "any"}; all spectra for which +values are within any of the provided ranges are retained). +\item \code{filterValues()}: allows filtering of the \code{Spectra} object based on +similarities of \emph{numeric} values of one or more \code{spectraVariables(object)} +(parameter \code{spectraVariables}) to provided values (parameter \code{values}) +given acceptable differences (parameters tolerance and ppm). If multiple +values/spectra variables are defined, the \code{match} parameter can be used +to specify whether all conditions (\code{match = "all"}; the default) or if +any of the conditions must match (\code{match = "any"}; all spectra for which +values are within any of the provided ranges are retained). +\item \code{reduceSpectra()}: for groups of peaks within highly similar m/z values +within each spectrum (given \code{ppm} and \code{tolerance}), this function keeps +only the peak with the highest intensity removing all other peaks hence +\emph{reducing} each spectrum to the highest intensity peaks per \emph{peak group}. +Peak groups are defined using the \code{\link[=group]{group()}} function from the +\emph{MsCoreUtils} package. +\item \code{reset()}: restores the data to its original state (as much as possible): +removes any processing steps from the lazy processing queue and calls +\code{reset()} on the backend which, depending on the backend, can also undo +e.g. data filtering operations. Note that a \verb{reset*(} call after +\code{applyProcessing()} will not have any effect. See examples below for more +information. +\item \code{selectSpectraVariables()}: reduces the information within the object to +the selected spectra variables: all data for variables not specified will +be dropped. For mandatory columns (i.e., those listed by +\code{\link[=coreSpectraVariables]{coreSpectraVariables()}}, such as \emph{msLevel}, \emph{rtime} ...) only +the values will be dropped but not the variable itself. Additional (or +user defined) spectra variables will be completely removed. +Returns the filtered \code{Spectra}. +\item \code{split()}: splits the \code{Spectra} object based on parameter \code{f} into a \code{list} +of \code{Spectra} objects. +\item \code{joinSpectraData()}: Individual spectra variables can be directly +added with the \verb{$<-} or \verb{[[<-} syntax. The \code{joinSpectraData()} +function allows to merge a \code{DataFrame} to the existing spectra +data. This function diverges from the \code{\link[=merge]{merge()}} method in two +main ways: +\itemize{ +\item The \code{by.x} and \code{by.y} column names must be of length 1. +\item If variable names are shared in \code{x} and \code{y}, the spectra +variables of \code{x} are not modified. It's only the \code{y} +variables that are appended the suffix defined in +\code{suffix.y}. This is to avoid modifying any core spectra +variables that would lead to an invalid object. +\item Duplicated Spectra keys (i.e. \code{x[[by.x]]}) are not +allowed. Duplicated keys in the \code{DataFrame} (i.e \code{y[[by.y]]}) +throw a warning and only the last occurrence is kept. These +should be explored and ideally be removed using for +\code{QFeatures::reduceDataFrame()}, \code{PMS::reducePSMs()} or similar +functions. +} +} + +Several \code{Spectra} objects can be concatenated into a single object with the +\code{c()} or the \code{concatenateSpectra()} function. Concatenation will fail if the +processing queue of any of the \code{Spectra} objects is not empty or if +different backends are used in the \code{Spectra} objects. The spectra variables +of the resulting \code{Spectra} object is the union of the spectra variables of +the individual \code{Spectra} objects. +} + +\section{Data manipulation and analysis methods}{ + + +Many data manipulation operations, such as those listed in this section, are +not applied immediately to the spectra, but added to a +\emph{lazy processing/manipulation queue}. Operations stored in this queue are +applied on-the-fly to spectra data each time it is accessed. This lazy +execution guarantees the same functionality for \code{Spectra} objects with +any backend, i.e. backends supporting to save changes to spectrum data +(\code{\link[=MsBackendMemory]{MsBackendMemory()}}, \code{\link[=MsBackendDataFrame]{MsBackendDataFrame()}} or \code{\link[=MsBackendHdf5Peaks]{MsBackendHdf5Peaks()}}) as +well as read-only backends (such as the \code{\link[=MsBackendMzR]{MsBackendMzR()}}). +Note that for the former it is possible to apply the processing queue and +write the modified peak data back to the data storage with the +\code{applyProcessing()} function. +\itemize{ +\item \code{addProcessing()}: adds an arbitrary function that should be applied to the +peaks matrix of every spectrum in \code{object}. The function (can be passed +with parameter \code{FUN}) is expected to take a peaks matrix as input and to +return a peaks matrix. A peaks matrix is a numeric matrix with two columns, +the first containing the m/z values of the peaks and the second the +corresponding intensities. The function has to have \code{...} in its +definition. Additional arguments can be passed with \code{...}. With parameter +\code{spectraVariables} it is possible to define additional spectra variables +from \code{object} that should be passed to the function \code{FUN}. These will be +passed by their name (e.g. specifying \code{spectraVariables = "precursorMz"} +will pass the spectra's precursor m/z as a parameter named \code{precursorMz} +to the function. The only exception is the spectra's MS level, these will +be passed to the function as a parameter called \code{spectrumMsLevel} (i.e. +with \code{spectraVariables = "msLevel"} the MS levels of each spectrum will be +submitted to the function as a parameter called \code{spectrumMsLevel}). +Examples are provided in the package vignette. +\item \code{applyProcessing()}: for \code{Spectra} objects that use a \strong{writeable} backend +only: apply all steps from the lazy processing queue to the peak data and +write it back to the data storage. Parameter \code{f} allows to specify how +\code{object} should be split for parallel processing. This should either be +equal to the \code{dataStorage}, or \code{f = rep(1, length(object))} to disable +parallel processing alltogether. Other partitionings might result in +errors (especially if a \code{MsBackendHdf5Peaks} backend is used). +\item \code{bin()}: aggregates individual spectra into discrete (m/z) bins. Binning is +performed only on spectra of the specified MS level(s) (parameter +\code{msLevel}, by default all MS levels of \code{x}). The bins can be defined with +parameter \code{breaks} which by default are equally sized bins, with size +being defined by parameter \code{binSize}, from the minimal to the maximal m/z +of all spectra (of MS level \code{msLevel}) within \code{x}. The same bins are used +for all spectra in \code{x}. All intensity values for peaks falling into the +same bin are aggregated using the function provided with parameter \code{FUN} +(defaults to \code{FUN = sum}, i.e. all intensities are summed up). Note that +the binning operation is applied to the peak data on-the-fly upon data +access and it is possible to \emph{revert} the operation with the \code{reset()} +function (see description of \code{reset()} above). +\item \code{combinePeaks()}: combines mass peaks within each spectrum with a +difference in their m/z values that is smaller than the maximal +acceptable difference defined by \code{ppm} and \code{tolerance}. Parameters +\code{intensityFun} and \code{mzFun} allow to define functions to aggregate the +intensity and m/z values for each such group of peaks. With +\code{weighted = TRUE} (the default), the m/z value of the combined peak is +calculated using an intensity-weighted mean and parameter \code{mzFun} is +ignored. The \code{\link[MsCoreUtils:group]{MsCoreUtils::group()}} function is used for the grouping of +mass peaks. Parameter \code{msLevel.} allows to define selected MS levels for +which peaks should be combined. This function returns a \code{Spectra} with +the same number of spectra than the input object, but with possibly +combined peaks within each spectrum. +dropped (i.e. their values are replaced with \code{NA}) for combined peaks +unless they are constant across the combined peaks. See also +\code{reduceSpectra()} for a function to select a single \emph{representative} +mass peak for each peak group. +\item \code{combineSpectra()}: combines sets of spectra into a single spectrum per +set. For each spectrum group (set), spectra variables from the first +spectrum are used and the peak matrices are combined using the function +specified with \code{FUN}, which defaults to \code{\link[=combinePeaksData]{combinePeaksData()}}. Please +refer to the \code{\link[=combinePeaksData]{combinePeaksData()}} help page for details and options of +the actual combination of peaks across the sets of spectra and to the +package vignette for examples and alternative ways to aggregate spectra. +The sets of spectra can be specified with parameter \code{f}. +In addition it is possible to define, with parameter \code{p} if and how to +split the input data for parallel processing. +This defaults to \code{p = x$dataStorage} and hence a per-file parallel +processing is applied for \code{Spectra} with file-based backends (such as the +\code{\link[=MsBackendMzR]{MsBackendMzR()}}). +Prior combination of the spectra all processings queued in the lazy +evaluation queue are applied. Be aware that calling \code{combineSpectra()} on a +\code{Spectra} object with certain backends that allow modifications might +\strong{overwrite} the original data. This does not happen with a +\code{MsBackendMemory} or \code{MsBackendDataFrame} backend, but with a +\code{MsBackendHdf5Peaks} backend the m/z and intensity values in the original +hdf5 file(s) will be overwritten. +The function returns a \code{Spectra} of length equal to the unique levels +of \code{f}. +\item \code{compareSpectra()}: compares each spectrum in \code{x} with each spectrum in \code{y} +using the function provided with \code{FUN} (defaults to \code{\link[=ndotproduct]{ndotproduct()}}). If +\code{y} is missing, each spectrum in \code{x} is compared with each other spectrum +in \code{x}. +The matching/mapping of peaks between the compared spectra is done with the +\code{MAPFUN} function. The default \code{\link[=joinPeaks]{joinPeaks()}} matches peaks of both spectra +and allows to keep all peaks from the first spectrum (\code{type = "left"}), +from the second (\code{type = "right"}), from both (\code{type = "outer"}) and to +keep only matching peaks (\code{type = "inner"}); see \code{\link[=joinPeaks]{joinPeaks()}} for more +information and examples). The \code{MAPFUN} function should have parameters +\code{x}, \code{y}, \code{xPrecursorMz} and \code{yPrecursorMz} as these values are passed to +the function. In addition to \code{joinPeaks()} also \code{\link[=joinPeaksGnps]{joinPeaksGnps()}} is +supported for GNPS-like similarity score calculations. Note that +\code{joinPeaksGnps()} should only be used in combination with +\code{FUN = MsCoreUtils::gnps} (see \code{\link[=joinPeaksGnps]{joinPeaksGnps()}} for more information and +details). Use \code{MAPFUN = joinPeaksNone} to disable internal peak +matching/mapping if a similarity scoring function is used that performs +the matching internally. +\code{FUN} is supposed to be a function to compare intensities of (matched) +peaks of the two spectra that are compared. The function needs to take two +matrices with columns \code{"mz"} and \code{"intensity"} as input and is supposed +to return a single numeric as result. In addition to the two peak matrices +the spectra's precursor m/z values are passed to the function as parameters +\code{xPrecursorMz} (precursor m/z of the \code{x} peak matrix) and \code{yPrecursorMz} +(precursor m/z of the \code{y} peak matrix). Additional parameters to functions +\code{FUN} and \code{MAPFUN} can be passed with \code{...}. Parameters \code{ppm} and +\code{tolerance} are passed to both \code{MAPFUN} and \code{FUN}. +The function returns a \code{matrix} with the results of \code{FUN} for each +comparison, number of rows equal to \code{length(x)} and number of columns +equal \code{length(y)} (i.e. element in row 2 and column 3 is the result from +the comparison of \code{x[2]} with \code{y[3]}). If \code{SIMPLIFY = TRUE} the \code{matrix} +is \emph{simplified} to a \code{numeric} if length of \code{x} or \code{y} is one. See also +the vignette for additional examples, such as using spectral entropy +similarity in the scoring. +\item \code{deisotopeSpectra()}: \emph{deisotopes} each spectrum keeping only the +monoisotopic peak for groups of isotopologues. Isotopologues are +estimated using the \code{\link[=isotopologues]{isotopologues()}} function from the \emph{MetaboCoreUtils} +package. Note that the default parameters for isotope +prediction/detection have been determined using data from the Human +Metabolome Database (HMDB) and isotopes for elements other than CHNOPS +might not be detected. See parameter \code{substDefinition} in the +documentation of \code{\link[=isotopologues]{isotopologues()}} for more information. The approach +and code to define the parameters for isotope prediction is described +\href{https://github.com/EuracBiomedicalResearch/isotopologues}{here}. +\item \code{entropy()}: calculates the entropy of each spectra based on the metrics +suggested by Li et al. (https://doi.org/10.1038/s41592-021-01331-z). +See also \code{\link[=nentropy]{nentropy()}} in the \emph{MsCoreUtils} package for details. +\item \code{estimatePrecursorIntensity()}: defines the precursor intensities for MS2 +spectra using the intensity of the matching MS1 peak from the +closest MS1 spectrum (i.e. the last MS1 spectrum measured before the +respective MS2 spectrum). With \code{method = "interpolation"} it is also +possible to calculate the precursor intensity based on an interpolation of +intensity values (and retention times) of the matching MS1 peaks from the +previous and next MS1 spectrum. See \code{\link[=estimatePrecursorIntensity]{estimatePrecursorIntensity()}} for +examples and more details. +\item \code{estimatePrecursorMz()}: \strong{for DDA data}: allows to estimate a fragment +spectra's precursor m/z based on the reported precursor m/z and the data +from the previous MS1 spectrum. See \code{\link[=estimatePrecursorMz]{estimatePrecursorMz()}} for details. +\item \code{neutralLoss()}: calculates neutral loss spectra for fragment spectra. See +\code{\link[=neutralLoss]{neutralLoss()}} for detailed documentation. +\item \code{processingLog()}: returns a \code{character} vector with the processing log +messages. +\item \code{reduceSpectra()}: keeps for groups of peaks with similar m/z values in +(given \code{ppm} and \code{tolerance}) in each spectrum only the peak with the +highest intensity removing all other peaks hence \emph{reducing} each +spectrum to the highest intensity peaks per \emph{peak group}. +Peak groups are defined using the \code{\link[=group]{group()}} function from the +\emph{MsCoreUtils} package. See also the \code{combinePeaks()} function for an +alternative function to combine peaks within each spectrum. +\item \code{scalePeaks()}: scales intensities of peaks within each spectrum depending +on parameter \code{by}. With \code{by = sum} (the default) peak intensities are +divided by the sum of peak intensities within each spectrum. The sum of +intensities is thus 1 for each spectrum after scaling. Parameter +\code{msLevel.} allows to apply the scaling of spectra of a certain MS level. +By default (\code{msLevel. = uniqueMsLevels(x)}) intensities for all +spectra will be scaled. +\item \code{spectrapply()}: applies a given function to each individual spectrum or +sets of a \code{Spectra} object. By default, the \code{Spectra} is split into +individual spectra (i.e. \code{Spectra} of length 1) and the function \code{FUN} +is applied to each of them. An alternative splitting can be defined with +parameter \code{f}. Parameters for \code{FUN} can be passed using \code{...}. +The returned result and its order depend on the function \code{FUN} and how +\code{object} is split (hence on \code{f}, if provided). Parallel processing is +supported and can be configured with parameter \code{BPPARAM}, is however only +suggested for computational intense \code{FUN}. +As an alternative to the (eventual parallel) processing of the full +\code{Spectra}, \code{spectrapply()} supports also a chunk-wise processing. For this, +parameter \code{chunkSize} needs to be specified. \code{object} is then split into +chunks of size \code{chunkSize} which are then (stepwise) processed by \code{FUN}. +This guarantees a lower memory demand (especially for on-disk backends) +since only the data for one chunk needs to be loaded into memory in each +iteration. Note that by specifying \code{chunkSize}, parameters \code{f} and +\code{BPPARAM} will be ignored. +See also \code{\link[=chunkapply]{chunkapply()}} or examples below for details on chunk-wise +processing. +\item \code{smooth()}: smooths individual spectra using a moving window-based approach +(window size = \code{2 * halfWindowSize}). Currently, the +Moving-Average- (\code{method = "MovingAverage"}), +Weighted-Moving-Average- (\verb{method = "WeightedMovingAverage")}, +weights depending on the distance of the center and calculated +\code{1/2^(-halfWindowSize:halfWindowSize)}) and +Savitzky-Golay-Smoothing (\code{method = "SavitzkyGolay"}) are supported. +For details how to choose the correct \code{halfWindowSize} please see +\code{\link[MsCoreUtils:smooth]{MsCoreUtils::smooth()}}. +\item \code{pickPeaks()}: picks peaks on individual spectra using a moving +window-based approach (window size = \code{2 * halfWindowSize}). For noisy +spectra there are currently two different noise estimators available, +the \emph{M}edian \emph{A}bsolute \emph{D}eviation (\code{method = "MAD"}) and +Friedman's Super Smoother (\code{method = "SuperSmoother"}), +as implemented in the \code{\link[MsCoreUtils:noise]{MsCoreUtils::noise()}}. +The method supports also to optionally \emph{refine} the m/z value of +the identified centroids by considering data points that belong (most +likely) to the same mass peak. Therefore the m/z value is calculated as an +intensity weighted average of the m/z values within the peak region. +The peak region is defined as the m/z values (and their respective +intensities) of the \code{2 * k} closest signals to the centroid or the closest +valleys (\code{descending = TRUE}) in the \code{2 * k} region. For the latter the \code{k} +has to be chosen general larger. See \code{\link[MsCoreUtils:refineCentroids]{MsCoreUtils::refineCentroids()}} for +details. +If the ratio of the signal to the highest intensity of the peak is below +\code{threshold} it will be ignored for the weighted average. +\item \code{replaceIntensitiesBelow()}: replaces intensities below a specified +threshold with the provided \code{value}. Parameter \code{threshold} can be either +a single numeric value or a function which is applied to all non-\code{NA} +intensities of each spectrum to determine a threshold value for each +spectrum. The default is \code{threshold = min} which replaces all values +which are <= the minimum intensity in a spectrum with \code{value} (the +default for \code{value} is \code{0}). Note that the function specified with +\code{threshold} is expected to have a parameter \code{na.rm} since \code{na.rm = TRUE} +will be passed to the function. If the spectrum is in profile mode, +ranges of successive non-0 peaks <= \code{threshold} are set to 0. +Parameter \code{msLevel.} allows to apply this to only spectra of certain MS +level(s). +} +} + +>>>>>>> parent of d063996 (Addition of cbind2()) \examples{ ## Create a Spectra providing a `DataFrame` containing the spectrum data. @@ -984,10 +1555,6 @@ spectraData(data) ## Subset to all MS2 spectra. data[msLevel(data) == 2] -## Append new `spectraVariables` to the `spectraData` -df <- data.frame(cola = 4:5, colb = "b") -data_append <- cbind2(data, df) - ## Same with the filterMsLevel function filterMsLevel(data, 2) diff --git a/man/hidden_aliases.Rd b/man/hidden_aliases.Rd index de5a31b8..a203f8c6 100644 --- a/man/hidden_aliases.Rd +++ b/man/hidden_aliases.Rd @@ -66,7 +66,6 @@ \alias{tic,MsBackendDataFrame-method} \alias{$,MsBackendDataFrame-method} \alias{$<-,MsBackendDataFrame-method} -\alias{cbind2,MsBackendDataFrame,dataframeOrDataFrameOrmatrix-method} \alias{split,MsBackendDataFrame,ANY-method} \alias{filterAcquisitionNum,MsBackendDataFrame-method} \alias{backendInitialize,MsBackendHdf5Peaks-method} @@ -137,7 +136,6 @@ \alias{$,MsBackendMemory-method} \alias{$<-,MsBackendMemory-method} \alias{[,MsBackendMemory-method} -\alias{cbind2,MsBackendMemory,dataframeOrDataFrameOrmatrix-method} \alias{split,MsBackendMemory,ANY-method} \alias{filterAcquisitionNum,MsBackendMemory-method} \alias{backendInitialize,MsBackendMzR-method} @@ -290,8 +288,6 @@ selectSpectraVariables(object, ...) \S4method{[}{MsBackendDataFrame}(x, i, j, ..., drop = FALSE) -\S4method{cbind2}{MsBackendDataFrame,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...) - \S4method{split}{MsBackendDataFrame,ANY}(x, f, drop = FALSE, ...) \S4method{filterAcquisitionNum}{MsBackendDataFrame}( @@ -444,8 +440,6 @@ selectSpectraVariables(object, ...) \S4method{[}{MsBackendMemory}(x, i, j, ..., drop = FALSE) -\S4method{cbind2}{MsBackendMemory,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...) - \S4method{split}{MsBackendMemory,ANY}(x, f, drop = FALSE, ...) \S4method{filterAcquisitionNum}{MsBackendMemory}( diff --git a/tests/testthat/test_MsBackendDataFrame.R b/tests/testthat/test_MsBackendDataFrame.R index ec3aeec6..3b8fe64c 100644 --- a/tests/testthat/test_MsBackendDataFrame.R +++ b/tests/testthat/test_MsBackendDataFrame.R @@ -613,22 +613,6 @@ test_that("[,MsBackendDataFrame works", { expect_equal(res@spectraData$file, c("b", "a")) }) -test_that("cbind2, MsBackendDataFrame works", { - be <- MsBackendDataFrame() - df <- DataFrame(scanIndex = 1:2, a = "a", b = "b") - be <- backendInitialize(be, df) - df2 <- data.frame(cola = 3:4, colb = "b", colz = "z") - res <- cbind2(be, df2) - expect_true(validObject(res)) - expect_equal(ncol(spectraData(res)), ncol(spectraData(be)) +3) - expect_equal(res$cola, c(3, 4)) - expect_equal(res$colb, c("b", "b")) - expect_equal(res$colz, c("z", "z")) - expect_equal(res$scanIndex, 1:2) - df3 <- data.frame(colv = 1:6, colw = "b") - expect_error(cbind2(be, df3), "does not match") -}) - test_that("selectSpectraVariables,MsBackendDataFrame works", { be <- MsBackendDataFrame() res <- selectSpectraVariables(be, c("dataStorage", "msLevel")) diff --git a/tests/testthat/test_MsBackendMemory.R b/tests/testthat/test_MsBackendMemory.R index 2cdacc37..59776143 100644 --- a/tests/testthat/test_MsBackendMemory.R +++ b/tests/testthat/test_MsBackendMemory.R @@ -538,22 +538,6 @@ test_that("[,MsBackendMemory works", { expect_equal(res@spectraData$file, c("b", "a")) }) -test_that("cbind2, MsBackendMemory works", { - be <- new("MsBackendMemory") - df <- data.frame(scanIndex = 1:2, a = "a", b = "b") - be <- backendInitialize(be, df) - df2 <- data.frame(cola = 3:4, colb = "b", colz = "z") - res <- cbind2(be, df2) - expect_true(validObject(res)) - expect_equal(ncol(spectraData(res)), ncol(spectraData(be)) +3) - expect_equal(res$cola, c(3, 4)) - expect_equal(res$colb, c("b", "b")) - expect_equal(res$colz, c("z", "z")) - expect_equal(res$scanIndex, 1:2) - df3 <- data.frame(colv = 1:6, colw = "b") - expect_error(cbind2(be, df3), "does not match") -}) - test_that("split,MsBackendMemory works", { be <- new("MsBackendMemory") be <- backendInitialize(be, test_df)