diff --git a/.editorconfig b/.editorconfig index 71842659..0cebcc70 100644 --- a/.editorconfig +++ b/.editorconfig @@ -6,7 +6,7 @@ root = true charset = utf-8 end_of_line = lf trim_trailing_whitespace = true -insert_final_newline = false +insert_final_newline = true [*.R] indent_style = space @@ -22,4 +22,4 @@ indent_style = tab [*.yml] indent_style = space -indent_size = 2 +indent_size = 2 \ No newline at end of file diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index 85b29ffa..5c1ebc66 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -53,7 +53,7 @@ jobs: fail-fast: false matrix: config: - - { os: ubuntu-latest, r: '4.4', bioc: '3.20', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } + - { os: ubuntu-latest, r: 'devel', bioc: 'devel', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } - { os: macOS-latest, r: '4.4', bioc: '3.20'} - { os: windows-latest, r: '4.4', bioc: '3.20'} env: @@ -239,7 +239,7 @@ jobs: - name: Test coverage if: github.ref == 'refs/heads/main' && env.run_covr == 'true' && runner.os == 'Linux' run: | - covr::codecov() + covr::codecov(token = "${{ secrets.CODECOV_TOKEN }}") shell: Rscript {0} - name: Install package diff --git a/DESCRIPTION b/DESCRIPTION index 7727f9b8..2057828e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.15.13 +Version: 1.17.1 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different diff --git a/NEWS.md b/NEWS.md index b3d0404d..f498b62d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# Spectra 1.17 + +## Changes in 1.17.1 + +- Refactor `containsMz()` to support chunk-wise processing. + # Spectra 1.15 ## Changes in 1.15.13 diff --git a/R/Spectra-functions.R b/R/Spectra-functions.R index 99014163..93d9f2db 100644 --- a/R/Spectra-functions.R +++ b/R/Spectra-functions.R @@ -63,7 +63,13 @@ NULL #' @description #' #' This function applies the processing queue and an arbitrary function to -#' the peaks matrix of each spectrum of the `Spectra` object `object`. +#' the peaks matrix of each spectrum of the `Spectra` object `object`. It has +#' build-in parallel and/or chunk-wise processing enabled through parameter +#' `f`, that allows to define how the `Spectra` (or rather its backend) needs +#' to be splitted. The default `f = .parallel_processing_factor(object)` splits +#' the backend by chunk (if a finite chunk size is defined for the `Spectra`) +#' or by it's optimal parallel processing factor. See the description of +#' the `.parallel_processing_factor()` function below for information. #' #' @param object `Spectra` object. #' @@ -78,7 +84,8 @@ NULL #' #' @param f `factor` or `vector` that can be coerced to one defining how the #' data should be split for parallel processing. Set to `NULL` or -#' `factor()` to disable splitting and parallel processing. +#' `factor()` to disable splitting and parallel processing. See function +#' description above for details and information. #' #' @param columns `character` defining the columns that should be returned. #' This will be passed to the backend's `peaksData` function. @@ -571,39 +578,8 @@ combineSpectra <- function(x, f = x$dataStorage, p = x$dataStorage, #' @description #' -#' Internal function to check if any (or all) of the provided `mz` values are -#' in the spectras' m/z. -#' -#' @param x `Spectra` object -#' -#' @param mz `numeric` of m/z value(s) to check in each spectrum of `x`. -#' -#' @param tolarance `numeric(1)` with the tolerance. -#' -#' @param ppm `numeric(1)` with the ppm. -#' -#' @param condFun `function` such as `any` or `all`. -#' -#' @param parallel `BiocParallel` parameter object. -#' -#' @return `logical` same length than `x`. -#' -#' @author Johannes Rainer -#' -#' @importFrom MsCoreUtils common -#' -#' @noRd -.has_mz <- function(x, mz = numeric(), tolerance = 0, ppm = 20, condFun = any, - parallel = SerialParam()) { - mzs <- mz(x, BPPARAM = parallel) - vapply(mzs, FUN = function(z) - condFun(common(mz, z, tolerance = tolerance, ppm = ppm)), logical(1)) -} - -#' @description -#' -#' Same as `.has_mz` only that a different `mz` is used for each spectrum in -#' `x`. Length of `mz` is thus expected to be equal to length of `x`. +#' Check for presence of an m/z value in each spectrum. Each spectrum gets +#' its own m/z. #' #' @param mz `numeric` **same length as `x`**. #' diff --git a/R/Spectra.R b/R/Spectra.R index 14ebbf2c..73520422 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -3278,23 +3278,18 @@ setMethod("containsMz", "Spectra", function(object, mz = numeric(), tolerance = 0, ppm = 20, which = c("any", "all"), BPPARAM = bpparam()) { - cond_fun <- match.fun(match.arg(which)) - if (all(is.na(mz))) - return(rep(NA, length(object))) - mz <- unique(sort(mz)) - BPPARAM <- backendBpparam(object@backend, BPPARAM) - ## TODO: fix to use .peaksapply instead. - if (is(BPPARAM, "SerialParam")) - .has_mz(object, mz, tolerance = tolerance, ppm = ppm, - condFun = cond_fun, parallel = BPPARAM) - else { - sp <- SerialParam() - f <- as.factor(dataStorage(object)) - res <- .lapply(object, FUN = .has_mz, mz = mz, tolerance = tolerance, - condFun = cond_fun, parallel = sp, f = f, - BPPARAM = BPPARAM) - unsplit(res, f = f) - } + if (length(object)) { + cond_fun <- match.fun(match.arg(which)) + if (all(is.na(mz))) + return(rep(NA, length(object))) + mz <- unique(sort(mz)) + BPPARAM <- backendBpparam(object@backend, BPPARAM) + unlist(.peaksapply( + object, FUN = .peaks_contain_mz, mz = mz, tolerance = tolerance, + ppm = ppm, condFun = cond_fun, BPPARAM = BPPARAM), + use.names = FALSE + ) + } else logical() }) #' @rdname addProcessing @@ -3327,12 +3322,12 @@ setMethod("containsNeutralLoss", "Spectra", function(object, neutralLoss = 0, #' @export setMethod("entropy", "Spectra", function(object, normalized = TRUE) { if (length(object)) { - if (normalized) entropy_fun <- nentropy - else entropy_fun <- entropy - unlist(.peaksapply( - object, FUN = function(pks, ...) entropy_fun(pks[, "intensity"])), - use.names = FALSE - ) + if (normalized) entropy_fun <- nentropy + else entropy_fun <- entropy + unlist(.peaksapply( + object, FUN = function(pks, ...) entropy_fun(pks[, "intensity"])), + use.names = FALSE + ) } else numeric() }) #' @rdname addProcessing diff --git a/R/peaks-functions.R b/R/peaks-functions.R index f34adde9..dc19e353 100644 --- a/R/peaks-functions.R +++ b/R/peaks-functions.R @@ -737,3 +737,13 @@ joinPeaksNone <- function(x, y, ...) { if (keep) x[sel, , drop = FALSE] else x[!sel, , drop = FALSE] } + +#' Check for presence of peaks defined by their m/z value. Note that this +#' function does **not** return a peak matrix, but only a logical of length 1! +#' +#' @return `logical(1)` +#' @noRd +.peaks_contain_mz <- function(x, mz = numeric(), tolerance = 0, ppm = 20, + condFun = any, ...) { + condFun(common(mz, x[, "mz"], tolerance = tolerance, ppm = ppm)) +} diff --git a/tests/testthat/_snaps/plotMzDelta/plotmzdelta-1000.svg b/tests/testthat/_snaps/plotMzDelta/plotmzdelta-1000.svg index e16506da..e041fc61 100644 --- a/tests/testthat/_snaps/plotMzDelta/plotmzdelta-1000.svg +++ b/tests/testthat/_snaps/plotMzDelta/plotmzdelta-1000.svg @@ -1,579 +1,254 @@ - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + +Histogram of Mass Delta Distributions +M/Z delta +Frequency + + + + + +50 +100 +150 +200 + + + + + + + +0 +500 +1000 +1500 +2000 +2500 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +peg +A +R +N +D +C +E +Q/K +G +H +I/L +M +F +P +S +T +W +Y +V diff --git a/tests/testthat/test_Spectra-functions.R b/tests/testthat/test_Spectra-functions.R index 8df50d71..2dbcf372 100644 --- a/tests/testthat/test_Spectra-functions.R +++ b/tests/testthat/test_Spectra-functions.R @@ -352,32 +352,6 @@ test_that("dropNaSpectraVariables works", { function(z) !any(is.na(z))))) }) -test_that(".has_mz works", { - sps <- Spectra(sciex_mzr)[1:10] - sps <- setBackend(sps, MsBackendDataFrame()) - mzs <- mz(sps) - x <- c(mzs[[2]][5], mzs[[3]][8]) - - res <- .has_mz(sps, mz = x, ppm = 0) - expect_true(length(res) == length(sps)) - expect_true(is.logical(res)) - - spd <- DataFrame(msLevel = c(2L, 2L, 2L), rtime = c(1, 2, 3)) - spd$mz <- list(c(12, 14, 45, 56), c(14.1, 34, 56.1), c(12.1, 14.15, 34.1)) - spd$intensity <- list(c(10, 20, 30, 40), c(11, 21, 31), c(12, 22, 32)) - sps <- Spectra(spd) - - res <- .has_mz(sps, mz = c(14, 34)) - expect_equal(res, c(TRUE, TRUE, FALSE)) - res <- .has_mz(sps, mz = c(14, 34), tolerance = 0.15) - expect_equal(res, c(TRUE, TRUE, TRUE)) - - res <- .has_mz(sps, mz = c(14, 34), condFun = all) - expect_true(all(!res)) - res <- .has_mz(sps, mz = c(14, 34), condFun = all, tolerance = 0.15) - expect_equal(res, c(FALSE, TRUE, TRUE)) -}) - test_that(".has_mz_each works", { spd <- DataFrame(msLevel = c(2L, 2L, 2L), rtime = c(1, 2, 3)) spd$mz <- list(c(12, 14, 45, 56), c(14.1, 34, 56.1), c(12.1, 14.15, 34.1)) diff --git a/tests/testthat/test_peaks-functions.R b/tests/testthat/test_peaks-functions.R index f28452dd..ef0978c8 100644 --- a/tests/testthat/test_peaks-functions.R +++ b/tests/testthat/test_peaks-functions.R @@ -722,3 +722,16 @@ test_that(".peaks_filter_ranges works", { ranges = ranges, keep = FALSE) expect_equal(res, x) }) + +test_that(".peaks_contain_mz works", { + pks <- cbind(mz = c(1.3, 1.5, 32.1, 45.6), c(1, 2, 3, 4)) + + expect_false(.peaks_contain_mz(pks)) + expect_true(.peaks_contain_mz(pks, 1.5)) + expect_false(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = all)) + expect_true(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = any)) + expect_true(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = any, + tolerance = 0.1)) + expect_true(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = all, + tolerance = 0.1)) +})