diff --git a/R/preprocessing.R b/R/preprocessing.R index 1de879390..069a3b002 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -3120,6 +3120,15 @@ RelativeCounts <- function(data, scale.factor = 1, verbose = TRUE) { if (verbose) { cat("Performing relative-counts-normalization\n", file = stderr()) } + + #setting scale.factor to be the median of counts across all columns if scale.factor is the string "median" + if (is.character(scale.factor) && scale.factor == "median") { + if(verbose){ + cat("Calculating median scale factor\n", file = stderr()) + } + scale.factor <- median(Matrix::colSums(data)) + } + norm.data <- data norm.data@x <- norm.data@x / rep.int(Matrix::colSums(norm.data), diff(norm.data@p)) * scale.factor return(norm.data) @@ -4336,6 +4345,15 @@ LogNormalize.V3Matrix <- function( if (verbose) { cat("Performing log-normalization\n", file = stderr()) } + + #setting scale.factor to be the median of counts across all columns if scale.factor is the string "median" + if (is.character(scale.factor) && scale.factor == "median") { + if(verbose){ + cat("Calculating median scale factor\n", file = stderr()) + } + scale.factor <- median(Matrix::colSums(data)) + } + norm.data <- LogNorm(data, scale_factor = scale.factor, display_progress = verbose) colnames(x = norm.data) <- colnames(x = data) rownames(x = norm.data) <- rownames(x = data) diff --git a/R/preprocessing5.R b/R/preprocessing5.R index a767ab24b..f50f08037 100644 --- a/R/preprocessing5.R +++ b/R/preprocessing5.R @@ -256,6 +256,20 @@ LogNormalize.default <- function( if (isTRUE(x = verbose)) { pb <- txtProgressBar(file = stderr(), style = 3) } + + #setting scale.factor to be the median of counts across all columns if scale.factor is the string "median" + if (is.character(scale.factor) && scale.factor == "median") { + if(verbose){ + cat("Calculating median scale factor\n", file = stderr()) + } + sums <- if (margin == 1L) { + rowSums(data) # Sum of each row (gene) if margin is 1L + } else { + colSums(data) # Sum of each column (cell) if margin is 2L + } + scale.factor = median(sums) + } + for (i in seq_len(length.out = ncells)) { x <- if (margin == 1L) { data[i, ] @@ -288,6 +302,15 @@ LogNormalize.IterableMatrix <- function( verbose = TRUE, ... ) { + + #setting scale.factor to be the median of counts across all columns if scale.factor is the string "median" + if (is.character(scale.factor) && scale.factor == "median") { + if(verbose){ + cat("Calculating median scale factor\n", file = stderr()) + } + scale.factor <- median(colSums(data)) + } + data <- BPCells::t(BPCells::t(data) / colSums(data)) # Log normalization data <- log1p(data * scale.factor) @@ -860,6 +883,33 @@ DISP <- function( p <- p + 1L } np <- length(x = p) - 1L + + #adding a progress bar for median calculation is verbose is TRUE + if (is.character(scale.factor) && scale.factor == "median" && isTRUE(x = verbose)) { + cat("Calculating median scale factor\n", file = stderr()) + pb_median <- txtProgressBar(style = 3L, file = stderr()) + } + + #setting scale.factor to be the median of counts across all columns if scale.factor is the string "median" + if (is.character(scale.factor) && scale.factor == "median") { + col_sums <- numeric(np) + for (i in seq_len(length.out = np)) { + idx <- seq.int(from = p[i], to = p[i + 1] - 1L) + xidx <- slot(object = data, name = entryname)[idx] + col_sums[i] <- sum(xidx) + + if (isTRUE(x = verbose)) { + setTxtProgressBar(pb_median, value = i / np) + } + } + + if (isTRUE(x = verbose)) { + close(pb_median) + } + + scale.factor <- median(col_sums) + } + if (isTRUE(x = verbose)) { pb <- txtProgressBar(style = 3L, file = stderr()) } diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R index a28af6ddd..a5db73b06 100644 --- a/tests/testthat/test_preprocessing.R +++ b/tests/testthat/test_preprocessing.R @@ -97,6 +97,21 @@ test_that("Relative count normalization returns expected values", { expect_equal(rc.counts[2, 1], 14285.71, tolerance = 1e-6) }) +denseMatrix <- as.matrix(pbmc.test) # Matrix to test LogNormalize.V3Matrix and RelativeCounts methods +test_that("LogNormalize.V3Matrix computes median scale factor correctly", { + expectedMedian <- median(colSums(denseMatrix)) + resultFromExpectedMedian <- LogNormalize.V3Matrix(data = denseMatrix, scale.factor = expectedMedian, margin = 2L, verbose = FALSE) + resultFromScaleFactorSetToMedian <- LogNormalize.V3Matrix(data = denseMatrix, scale.factor = "median", margin = 2L, verbose = FALSE) + expect_equal(as.matrix(resultFromExpectedMedian), as.matrix(resultFromScaleFactorSetToMedian), tolerance = 1e-6) +}) + +test_that("RelativeCounts computes median scale factor correctly", { + expectedMedian <- median(colSums(denseMatrix)) + resultFromExpectedMedian <- RelativeCounts(data = denseMatrix, scale.factor = expectedMedian, verbose = FALSE) + resultFromScaleFactorSetToMedian <- RelativeCounts(data = denseMatrix, scale.factor = "median", verbose = FALSE) + expect_equal(as.matrix(resultFromExpectedMedian), as.matrix(resultFromScaleFactorSetToMedian), tolerance = 1e-6) +}) + # Tests for v5 NormalizeData # -------------------------------------------------------------------------------- context("v5 NormalizeData") @@ -175,6 +190,44 @@ test_that("LogNormalize normalizes properly for BPCells", { ) }) +test_that("LogNormalize.IterableMatrix computes median scale factor correctly", { + skip_on_cran() + library(Matrix) + skip_if_not_installed("BPCells") + library(BPCells) + mat_bpcells <- t(as(t(object[['RNA']]$counts ), "IterableMatrix")) + expectedMedian <- median(colSums(mat_bpcells)) + resultFromExpectedMedian <- LogNormalize.IterableMatrix(data = mat_bpcells, scale.factor = expectedMedian, margin = 2L, verbose = FALSE) + resultFromScaleFactorSetToMedian <- LogNormalize.IterableMatrix(data = mat_bpcells, scale.factor = "median", margin = 2L, verbose = FALSE) + expect_equal(as.matrix(resultFromExpectedMedian), as.matrix(resultFromScaleFactorSetToMedian), tolerance = 1e-6) +}) + +denseMatrix <- as.matrix(pbmc.test) # Matrix to test LogNormalize.default when scale.factor is set to "median" +test_that("LogNormalize.default computes median scale factor correctly for both margin values", { + expectedMedianForMargin1L <- median(rowSums(denseMatrix)) + expectedMedianForMargin2L <- median(colSums(denseMatrix)) + + resultFromExpectedMedianForMargin1L <- LogNormalize.default(data = denseMatrix, scale.factor = expectedMedianForMargin1L, margin = 1L, verbose = FALSE) + resultFromExpectedMedianForMargin2L <- LogNormalize.default(data = denseMatrix, scale.factor = expectedMedianForMargin2L, margin = 2L, verbose = FALSE) + + resultsFromScaleFactorSetToMedianForMargin1L <- LogNormalize.default(data = denseMatrix, scale.factor = "median", margin = 1L, verbose = FALSE)#if the normalization is across rows (genes) + resultsFromScaleFactorSetToMedianForMargin2L <- LogNormalize.default(data = denseMatrix, scale.factor = "median", margin = 2L, verbose = FALSE)#if the normalization is across columns (cells) + + expect_equal(as.matrix(resultFromExpectedMedianForMargin1L), as.matrix(resultsFromScaleFactorSetToMedianForMargin1L), tolerance = 1e-6) + expect_equal(as.matrix(resultFromExpectedMedianForMargin2L), as.matrix(resultsFromScaleFactorSetToMedianForMargin2L), tolerance = 1e-6) +}) + +theSparseMatrix <- as.sparse(denseMatrix) # Sparse Matrix to test .SparseNormalize computes median scale factor correctly +test_that("LogNormalize.default computes median scale factor correctly for both margin values", { + expectedMedian <- median(colSums(theSparseMatrix)) + + resultFromExpectedMedian <- .SparseNormalize(data = theSparseMatrix, scale.factor = expectedMedian, verbose = FALSE) + resultsFromScaleFactorSetToMedian <- .SparseNormalize(data = theSparseMatrix, scale.factor = "median", verbose = FALSE) + + expect_equal(resultFromExpectedMedian, resultsFromScaleFactorSetToMedian, tolerance = 1e-6) +}) + + # Tests for ScaleData # -------------------------------------------------------------------------------- context("ScaleData")