Skip to content

Commit

Permalink
feat: add fillCoreSpectraVariables function
Browse files Browse the repository at this point in the history
- Add a `fillCoreSpectraVariables()` function that allows to fill a spectra data
  frame with eventually missing core spectra variables (with the correct data
  type). This function will be particularly helpful for newly implemented
  `MsBackend` classes.
  • Loading branch information
jorainer committed Feb 19, 2025
1 parent 31ab50b commit 506783b
Show file tree
Hide file tree
Showing 14 changed files with 190 additions and 93 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: Spectra
Title: Spectra Infrastructure for Mass Spectrometry Data
Version: 1.17.5
Version: 1.17.6
Description: The Spectra package defines an efficient infrastructure
for storing and handling mass spectrometry spectra and functionality to
subset, process, visualize and compare spectra data. It provides different
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export(coreSpectraVariables)
export(countIdentifications)
export(deisotopeSpectra)
export(estimatePrecursorMz)
export(fillCoreSpectraVariables)
export(filterPeaksRanges)
export(filterPrecursorIsotopes)
export(filterPrecursorMaxIntensity)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Spectra 1.17

## Change in 1.17.6

- Add new `fillCoreSpectraVariables()` function that allows to add eventually
missing *core* spectra variables (with the correct data type) to a data frame.

## Change in 1.17.5

- Move generics `processingChunkSize()`, `processingChunkFactor()` and
Expand Down
54 changes: 54 additions & 0 deletions R/MsBackend-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,57 @@ NULL
" not found"))
NULL
}

#' @title Fill spectra data with columns for missing core variables
#'
#' @description
#'
#' `fillCoreSpectraVariables()` fills a provided `data.frame`
#' with columns for eventually missing *core* spectra variables.
#' The missing core variables are added as new columns with missing values
#' (`NA`) of the correct data type.
#' Use [coreSpectraVariables()] to list the set of core variables and their
#' data types.
#'
#' @param x `data.frame` or `DataFrame` with potentially present core
#' variable columns.
#'
#' @param columns `character` with the names of the (core) spectra variables
#' that should be added if not already present in `x`. Defaults to
#' `columns = names(coreSpectraVariables())`.
#'
#' @return input data frame `x` with missing core variables added (with the
#' correct data type).
#'
#' @importFrom methods as
#'
#' @export
#'
#' @examples
#'
#' ## Define a data frame
#' a <- data.frame(msLevel = c(1L, 1L, 2L), other_column = "b")
#'
#' ## Add missing core chromatogram variables to this data frame
#' fillCoreSpectraVariables(a)
#'
#' ## The data.frame thus contains columns for all core spectra
#' ## variables in the respective expected data type (but filled with
#' ## missing values).
fillCoreSpectraVariables <- function(x = data.frame(),
columns = names(coreSpectraVariables())) {
nr <- nrow(x)
cv <- .SPECTRA_DATA_COLUMNS[names(.SPECTRA_DATA_COLUMNS) %in% columns]
miss <- cv[setdiff(names(cv), c(colnames(x), c("mz", "intensity")))]
if (length(miss))
x <- cbind(x, lapply(miss, function(z, n) rep(as(NA, z), n), nr))
if (any(columns == "mz")) {
a <- numeric()
x$mz <- NumericList(replicate(nr, a), compress = FALSE)
}
if (any(columns == "intensity")) {
a <- numeric()
x$intensity <- NumericList(replicate(nr, a), compress = FALSE)
}
x
}
4 changes: 3 additions & 1 deletion R/MsBackend.R
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,9 @@
#' a `DataFrame`, `spectraData<-` expects a `DataFrame` with the same number
#' of rows as there are spectra in `object`. Note that `spectraData()` has to
#' return the full data, i.e. also the m/z and intensity values (as a `list`
#' or `SimpleList` in columns `"mz"` and `"intensity"`.
#' or `SimpleList` in columns `"mz"` and `"intensity"`. See also
#' [fillCoreSpectraVariables()] for a function that can *complete* a spectra
#' data data frame with eventually missing *core* spectra variables.
#'
#' - `spectraNames()`: returns a `character` vector with the names of
#' the spectra in `object` or `NULL` if not set. `spectraNames<-` allows to
Expand Down
45 changes: 23 additions & 22 deletions R/MsBackendCached.R
Original file line number Diff line number Diff line change
Expand Up @@ -323,30 +323,31 @@ setMethod("spectraVariables", "MsBackendCached", function(object) {
columns <- columns[!columns %in% x@spectraVariables]
core_cols <- intersect(columns, names(.SPECTRA_DATA_COLUMNS))
core_cols <- core_cols[!core_cols %in% c(local_cols, x@spectraVariables)]
res <- NULL
res <- base::as.data.frame(matrix(ncol = 0, nrow = x@nspectra))
if (length(local_cols))
res <- as(x@localData[, local_cols, drop = FALSE], "DataFrame")
res <- x@localData[, local_cols, drop = FALSE]
if (length(core_cols)) {
mzvals <- NULL
intvals <- NULL
lst <- NumericList(numeric(), compress = FALSE)
if (any(core_cols == "mz")) {
core_cols <- core_cols[core_cols != "mz"]
mzvals <- lst[rep(1, times = length(x))]
}
if (any(core_cols == "intensity")) {
core_cols <- core_cols[core_cols != "intensity"]
intvals <- lst[rep(1, times = length(x))]
}
if (length(core_cols))
tmp <- DataFrame(lapply(.SPECTRA_DATA_COLUMNS[core_cols],
function(z, n) rep(as(NA, z), n), length(x)))
else tmp <- make_zero_col_DFrame(x@nspectra)
tmp$mz <- mzvals
tmp$intensity <- intvals
if (length(res))
res <- cbind(res, tmp)
else res <- tmp
res <- fillCoreSpectraVariables(res, columns = core_cols)
## mzvals <- NULL
## intvals <- NULL
## lst <- NumericList(numeric(), compress = FALSE)
## if (any(core_cols == "mz")) {
## core_cols <- core_cols[core_cols != "mz"]
## mzvals <- lst[rep(1, times = length(x))]
## }
## if (any(core_cols == "intensity")) {
## core_cols <- core_cols[core_cols != "intensity"]
## intvals <- lst[rep(1, times = length(x))]
## }
## if (length(core_cols))
## tmp <- DataFrame(lapply(.SPECTRA_DATA_COLUMNS[core_cols],
## function(z, n) rep(as(NA, z), n), length(x)))
## else tmp <- make_zero_col_DFrame(x@nspectra)
## tmp$mz <- mzvals
## tmp$intensity <- intvals
## if (length(res))
## res <- cbind(res, tmp)
## else res <- tmp
if (any(core_cols == "dataStorage"))
res$dataStorage <- dataStorage(x)
}
Expand Down
31 changes: 18 additions & 13 deletions R/MsBackendDataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -468,20 +468,25 @@ setMethod("spectraData", "MsBackendDataFrame",
res <- object@spectraData[, df_columns, drop = FALSE]
other_columns <- setdiff(columns,colnames(object@spectraData))
if (length(other_columns)) {
other_res <- lapply(other_columns, .get_column,
x = object@spectraData)
names(other_res) <- other_columns
is_mz_int <- names(other_res) %in% c("mz", "intensity")
if (!all(is_mz_int))
res <- cbind(res, as(other_res[!is_mz_int], "DataFrame"))
if (any(names(other_res) == "mz"))
res$mz <- if (length(other_res$mz)) other_res$mz
else NumericList(compress = FALSE)
if (any(names(other_res) == "intensity"))
res$intensity <- if (length(other_res$intensity))
other_res$intensity
else NumericList(compress = FALSE)
res <- fillCoreSpectraVariables(res, columns = other_columns)
## other_res <- lapply(other_columns, .get_column,
## x = object@spectraData)
## names(other_res) <- other_columns
## is_mz_int <- names(other_res) %in% c("mz", "intensity")
## if (!all(is_mz_int))
## res <- cbind(res, as(other_res[!is_mz_int], "DataFrame"))
## if (any(names(other_res) == "mz"))
## res$mz <- if (length(other_res$mz)) other_res$mz
## else NumericList(compress = FALSE)
## if (any(names(other_res) == "intensity"))
## res$intensity <- if (length(other_res$intensity))
## other_res$intensity
## else NumericList(compress = FALSE)
}
if (!all(columns %in% colnames(res)))
stop("Column(s) ", paste0(columns[!columns %in% names(res)],
collapse = ", "), " not available.",
call. = FALSE)
res[, columns, drop = FALSE]
})

Expand Down
11 changes: 6 additions & 5 deletions R/MsBackendMemory-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,11 @@ MsBackendMemory <- function() {
## Get missing core variables.
other_columns <- setdiff(sp_vars, colnames(object@spectraData))
if (length(other_columns)) {
other_res <- lapply(other_columns, .get_column,
x = object@spectraData)
names(other_res) <- other_columns
res <- cbind(res, as.data.frame(other_res))
res <- fillCoreSpectraVariables(res, other_columns)
## other_res <- lapply(other_columns, .get_column,
## x = object@spectraData)
## names(other_res) <- other_columns
## res <- cbind(res, as.data.frame(other_res))
}
if (any(columns == "mz"))
res$mz <- mz(object)
Expand Down Expand Up @@ -132,4 +133,4 @@ MsBackendMemory <- function() {
if (lcn != length(cur_cn) || !all(cn == cur_cn))
stop("provided matrices don't have the same column names")
})
}
}
24 changes: 13 additions & 11 deletions R/MsBackendMzR-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,15 @@ MsBackendMzR <- function() {
sp_cols <- columns[columns %in% cn]
res <- x@spectraData[, sp_cols, drop = FALSE]
if(!nrow(x@spectraData)) {
res$mz <- NumericList(compress = FALSE)
res$intensity <- NumericList(compress = FALSE)
other_cols <- setdiff(columns, c(sp_cols, "mz", "intensity"))
if (length(other_cols)) {
res_add <- lapply(.SPECTRA_DATA_COLUMNS[other_cols],
do.call, args = list())
res <- cbind(res, res_add)
}
## res$mz <- NumericList(compress = FALSE)
## res$intensity <- NumericList(compress = FALSE)
## other_cols <- setdiff(columns, c(sp_cols, "mz", "intensity"))
## if (length(other_cols)) {
## res_add <- lapply(.SPECTRA_DATA_COLUMNS[other_cols],
## do.call, args = list())
## res <- cbind(res, res_add)
## }
res <- fillCoreSpectraVariables(res, setdiff(columns, c(sp_cols)))
return(res[, columns, drop = FALSE])
}
any_mz <- any(columns == "mz")
Expand All @@ -125,9 +126,10 @@ MsBackendMzR <- function() {
}
other_cols <- setdiff(columns, c(sp_cols, "mz", "intensity"))
if (length(other_cols)) {
other_res <- lapply(other_cols, .get_column, x = x@spectraData)
names(other_res) <- other_cols
res <- cbind(res, as(other_res, "DataFrame"))
res <- fillCoreSpectraVariables(res, other_cols)
## other_res <- lapply(other_cols, .get_column, x = x@spectraData)
## names(other_res) <- other_cols
## res <- cbind(res, as(other_res, "DataFrame"))
}
res[, columns, drop = FALSE]
}
Expand Down
4 changes: 3 additions & 1 deletion man/MsBackend.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions tests/testthat/test_MsBackend-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,23 @@ test_that(".valid_ms_backend_files_exist", {
expect_null(.valid_ms_backend_files_exist(character()))
expect_null(.valid_ms_backend_files_exist(NA_character_))
})

test_that("fillCoreSpectraVariables works", {
a <- data.frame()
res <- fillCoreSpectraVariables(a)
expect_true(is.data.frame(res))
expect_true(ncol(res) == length(coreSpectraVariables()))
expect_equal(sort(colnames(res)), sort(names(coreSpectraVariables())))

a <- data.frame(other_col = 1:3, msLevel = 1L)
res <- fillCoreSpectraVariables(a)
expect_true(all(c(names(coreSpectraVariables()), "other_col") %in%
colnames(res)))
expect_equal(a$other_col, res$other_col)
expect_equal(a$msLevel, res$msLevel)

res <- fillCoreSpectraVariables(a, c("rtime", "precursorMz"))
expect_equal(colnames(res),
c("other_col", "msLevel", "rtime", "precursorMz"))
expect_true(all(is.na(res$rtime)))
})
19 changes: 18 additions & 1 deletion tests/testthat/test_MsBackendCached.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ test_that(".spectra_data MsBackendCached works", {

res <- .spectra_data(be)
expect_true(nrow(res) == 0)
expect_equal(sort(colnames(res)), sort(names(Spectra:::.SPECTRA_DATA_COLUMNS)))
expect_equal(sort(colnames(res)), sort(names(.SPECTRA_DATA_COLUMNS)))

df <- data.frame(a = 1:4, msLevel = c(1L, 2L, 1L, 3L))
be <- backendInitialize(be, data = df)
Expand Down Expand Up @@ -80,6 +80,23 @@ test_that(".spectra_data MsBackendCached works", {
expect_true(is.null(res))
})

test_that("spectraData,MsBackendCached works", {
be <- new("MsBackendCached")
be@nspectra <- 14L
res <- spectraData(be)
expect_s4_class(res, "DataFrame")
expect_equal(.valid_column_datatype(res), NULL)
expect_true(all(names(.SPECTRA_DATA_COLUMNS) %in% colnames(res)))

expect_error(spectraData(be, "other_col"), "not available")

res <- spectraData(be, "smoothed")
expect_s4_class(res, "DataFrame")
expect_equal(colnames(res), "smoothed")
expect_true(all(is.na(res$smoothed)))
expect_true(is.logical(res$smoothed))
})

test_that("[,MsBackendCached works", {
be <- MsBackendCached()

Expand Down
14 changes: 14 additions & 0 deletions tests/testthat/test_MsBackendMemory.R
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,9 @@ test_that("spectraData,MsBackendMemory works", {
expect_equal(res$mz, IRanges::NumericList(test_df$mz, compress = FALSE))
expect_equal(res$intensity,
IRanges::NumericList(test_df$intensity, compress = FALSE))
expect_true(is.integer(res$acquisitionNum))
expect_true(all(is.na(res$acquisitionNum)))
expect_equal(.valid_column_datatype(res), NULL)

tmp <- test_df
tmp$pk_anno <- list(c("a", "b", "c"), c("", "d"), letters[12:15])
Expand All @@ -231,6 +234,7 @@ test_that("spectraData,MsBackendMemory works", {
expect_equal(res$intensity,
IRanges::NumericList(test_df$intensity, compress = FALSE))
expect_equal(res$pk_anno, tmp$pk_anno)
expect_equal(.valid_column_datatype(res), NULL)

tmp$add_anno <- list(c(1:3), 1:2, 1:4)
be <- backendInitialize(be, tmp)
Expand All @@ -254,6 +258,16 @@ test_that("spectraData,MsBackendMemory works", {
expect_s4_class(res, "DataFrame")
expect_equal(colnames(res), "pk_anno")
expect_equal(res$pk_anno, tmp$pk_anno)
res <- spectraData(be, "acquisitionNum")
expect_s4_class(res, "DataFrame")
expect_equal(colnames(res), "acquisitionNum")
expect_true(is.integer(res$acquisitionNum))
expect_true(all(is.na(res$acquisitionNum)))
res <- spectraData(be, "smoothed")
expect_s4_class(res, "DataFrame")
expect_equal(colnames(res), "smoothed")
expect_true(is.logical(res$smoothed))
expect_true(all(is.na(res$smoothed)))
})

test_that("spectraData<-,MsBackendMemory works", {
Expand Down
Loading

0 comments on commit 506783b

Please sign in to comment.