diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index 6009ec248..7eda71f8e 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -776,7 +776,10 @@ RunLSI.Seurat <- function( #' the number for the dimension names. PC by default #' @param seed.use Set a random seed. By default, sets the seed to 42. Setting #' NULL will not set a seed. -#' @param approx Use truncated singular value decomposition to approximate PCA +#' @param approx Use truncated singular value decomposition to approximate PCA. See \code{\link[irlba]{irlba}} +#' @param center logical(1) or numeric. Center features (cells if \code{rev.pca=TRUE}) before computing PCs (recommended). See \code{\link[stats]{prcomp}} +#' @param scale logical(1) or numeric. Scale features (cells if \code{rev.pca=TRUE}) before computing PCs (recommended). See \code{\link[stats]{prcomp}} +#' @param scale. Deprecated. Alias for \code{scale} #' #' @importFrom irlba irlba #' @importFrom stats prcomp @@ -797,46 +800,93 @@ RunPCA.default <- function( reduction.key = "PC_", seed.use = 42, approx = TRUE, + center = !approx, # to retain previous (non-sensible) defaults + scale = if(!missing(scale.)) scale. else FALSE, # to retain previous defaults + scale., ... ) { + if(!approx) { + pcs.name <- if(rev.pca) "`gene.loadings`" else "`cell.embeddings`" + # Stage 1: + Seurat.RunPCA.use.correct.scaling <- getOption(x = "Seurat.RunPCA.use.correct.scaling") + if (is.null(Seurat.RunPCA.use.correct.scaling)) { + warning("If used with `approx=FALSE`, RunPCA does not scale ", pcs.name, " correctly. This will change", + " in a future version.\nTo switch to the new, correct behavior now and get rid of this" , # to be changed to + " warning message, use `options(Seurat.RunPCA.use.correct.scaling = TRUE)` or use `approx=TRUE` (recommended).", + "\nTo maintain the current (wrong) behavior and get rid of this warning message,", + " set `options(Seurat.RunPCA.use.correct.scaling = FALSE)` (not recommended).", + "\nSee https://github.com/satijalab/seurat/issues/2237 for further information.") + Seurat.RunPCA.use.correct.scaling <- FALSE + } + # # Stage 2: # fill in current date for yyyyy in stage 4 warning + # Seurat.RunPCA.use.correct.scaling <- getOption(x = "Seurat.RunPCA.use.correct.scaling", default = TRUE) + # # Stage 3: # fill in current date for xxxxx in stage 4 warnings + # Seurat.RunPCA.use.correct.scaling <- getOption(x = "Seurat.RunPCA.use.correct.scaling", default = TRUE) + # if (!Seurat.RunPCA.use.correct.scaling) { + # warning("Using `options(Seurat.RunPCA.use.correct.scaling = FALSE)` is deprecated in will cause an error in future versions of Seurat.", + # "\nSee https://github.com/satijalab/seurat/issues/2237 for further information.") + # } + # # Stage 4: remove if clause below + # Seurat.RunPCA.use.correct.scaling <- getOption(x = "Seurat.RunPCA.use.correct.scaling", default = TRUE) + # if (!is.null(Seurat.RunPCA.use.correct.scaling)) { + # if (Seurat.RunPCA.use.correct.scaling) { + # warning("Using `options(Seurat.RunPCA.use.correct.scaling = FALSE)` is deprecated since xxxxx.", + # " Specifing `options(Seurat.RunPCA.use.correct.scaling = TRUE)` is no longer needed since yyyyy, please to not set it anymore.", + # "\nSee https://github.com/satijalab/seurat/issues/2237 for further information.") + # } else { + # stop("`options(Seurat.RunPCA.use.correct.scaling = FALSE)` has been deprecated since xxxx and is no longer supported.", + # "\nSee https://github.com/satijalab/seurat/issues/2237 for further information.") + # } + # } + # # Stage 5: remove this if clause + } + if(!missing(scale.) && !missing(scale)) stop("Either `scale` or `scale.` can be spcified, not both!") + if(is.null(scale)) scale <- FALSE + if(is.null(center)) center <- FALSE + do.scale <- !is.logical(scale) || scale + calc.scale <- is.logical(scale) && scale + do.center <- !is.logical(center) || center + calc.center <- is.logical(center) && center + if (!is.null(x = seed.use)) { set.seed(seed = seed.use) } - if (rev.pca) { - npcs <- min(npcs, ncol(x = object) - 1) - pca.results <- irlba(A = object, nv = npcs, ...) - total.variance <- sum(RowVar(x = t(x = object))) - sdev <- pca.results$d/sqrt(max(1, nrow(x = object) - 1)) - if (weight.by.var) { - feature.loadings <- pca.results$u %*% diag(pca.results$d) - } else{ - feature.loadings <- pca.results$u - } - cell.embeddings <- pca.results$v - } - else { - total.variance <- sum(RowVar(x = object)) - if (approx) { - npcs <- min(npcs, nrow(x = object) - 1) - pca.results <- irlba(A = t(x = object), nv = npcs, ...) - feature.loadings <- pca.results$v - sdev <- pca.results$d/sqrt(max(1, ncol(object) - 1)) - if (weight.by.var) { - cell.embeddings <- pca.results$u %*% diag(pca.results$d) - } else { - cell.embeddings <- pca.results$u - } - } else { - npcs <- min(npcs, nrow(x = object)) - pca.results <- prcomp(x = t(object), rank. = npcs, ...) - feature.loadings <- pca.results$rotation - sdev <- pca.results$sdev - if (weight.by.var) { - cell.embeddings <- pca.results$x %*% diag(pca.results$sdev[1:npcs]^2) - } else { - cell.embeddings <- pca.results$x - } - } + t.object <- if(rev.pca) object else t(x = object) + feature.means <- Matrix::colMeans(t.object) + feature.sq.means <- Matrix::colMeans(t.object^2) + + center_ <- if (calc.center) feature.means else if (do.center) center else 0 + # feature.vars <- colMeans(t(nt.object - center_)^2) <=> + feature.vars <- feature.sq.means - 2*feature.means*center_ + center_^2 + feature.vars <- feature.vars * 1/(1-1/nrow(t.object)) + scale_ <- if (calc.scale) sqrt(feature.vars) else if(do.scale) scale else 1 + total.variance <- sum(feature.vars/scale_^2) + + if (approx){ + if(!do.scale) scale_ <- NULL + if(!do.center) center_ <- NULL + npcs <- min(npcs, dim(t.object) - 1L) # irlba does not allow computation of all PCs + pca.results <- irlba(A = t.object, nv = npcs, scale = scale_, center = center_, ...) + sdev <- pca.results$d/sqrt(nrow(t.object) - 1) + feature.loadings <- pca.results$v + cell.embeddings <- pca.results$u %*% diag(pca.results$d) + } else { + pca.results <- prcomp(x = t.object, rank. = npcs, center = center, scale. = scale, ...) + sdev <- pca.results$sdev + feature.loadings <- pca.results$rotation + cell.embeddings <- pca.results$x + } + if (!approx && !Seurat.RunPCA.use.correct.scaling ) { # to be removed at stage 4 + if (weight.by.var) { # + cell.embeddings <- cell.embeddings %*% diag(sdev[1:npcs]^2) # + } # + } else if (!weight.by.var) { + cell.embeddings <- cell.embeddings %*% diag(1/sdev[1:npcs]/sqrt(nrow(t.object) - 1)) + } + if (rev.pca){ + tmp <- feature.loadings + feature.loadings <- cell.embeddings + cell.embeddings <- tmp } rownames(x = feature.loadings) <- rownames(x = object) colnames(x = feature.loadings) <- paste0(reduction.key, 1:npcs) @@ -865,6 +915,7 @@ RunPCA.default <- function( #' using the variable features for the Assay. Note that the features must be present #' in the scaled data. Any requested features that are not scaled or have 0 variance #' will be dropped, and the PCA will be run using the remaining features. +#' @param slot Name of slot of assay PCA is being run on #' #' @rdname RunPCA #' @export @@ -882,12 +933,19 @@ RunPCA.Assay <- function( nfeatures.print = 30, reduction.key = "PC_", seed.use = 42, - ... + approx = TRUE, + center = if(slot=="scale.data") !approx else TRUE, # for backwards compatibilty. `slot!="scale.data"` would be more sensible + scale = if(!missing(scale.)) scale. else slot!="scale.data", # for backwards compatibilty + scale., + ..., + slot = "scale.data" ) { + if(!missing(scale.) && !missing(scale)) stop("Either `scale` or `scale.` can be spcified, not both!") data.use <- PrepDR( object = object, features = features, - verbose = verbose + verbose = verbose, + slot = slot ) reduction.data <- RunPCA( object = data.use, @@ -900,8 +958,10 @@ RunPCA.Assay <- function( nfeatures.print = nfeatures.print, reduction.key = reduction.key, seed.use = seed.use, - ... - + approx = approx, + ..., + center = center, + scale = scale ) return(reduction.data) } @@ -925,7 +985,9 @@ RunPCA.Seurat <- function( reduction.name = "pca", reduction.key = "PC_", seed.use = 42, - ... + approx = TRUE, + ..., + slot = "scale.data" ) { assay <- assay %||% DefaultAssay(object = object) assay.data <- GetAssay(object = object, assay = assay) @@ -941,7 +1003,9 @@ RunPCA.Seurat <- function( nfeatures.print = nfeatures.print, reduction.key = reduction.key, seed.use = seed.use, - ... + approx = approx, + ..., + slot = slot ) object[[reduction.name]] <- reduction.data object <- LogSeuratCommand(object = object) @@ -1935,31 +1999,36 @@ L2Norm <- function(vec) { # @param object Assay object # @param features Features to use as input for the dimensional reduction technique. # Default is variable features -# @ param verbose Print messages and warnings +# @param verbose Print messages and warnings +# @param slot slot to use as input for the dimensional reduction technique # # PrepDR <- function( object, features = NULL, - verbose = TRUE + verbose = TRUE, + slot = "scale.data" ) { if (length(x = VariableFeatures(object = object)) == 0 && is.null(x = features)) { - stop("Variable features haven't been set. Run FindVariableFeatures() or provide a vector of feature names.") + stop("Variable features haven't been set. Run FindVariableFeatures() or use the `features`argument to specify whichfeatures to use.") } - data.use <- GetAssayData(object = object, slot = "scale.data") + data.use <- GetAssayData(object = object, slot = slot) if (nrow(x = data.use ) == 0) { - stop("Data has not been scaled. Please run ScaleData and retry") + switch(slot, + "scale.data" = stop("Data has not been scaled. Please run ScaleData and retry"), + "data" = stop("Data has not been normalized. Please run NormalizeData and retry"), + stop(paste0("Slot '", slot, "' not found in assay '", object@key, "'."))) } features <- features %||% VariableFeatures(object = object) features.keep <- unique(x = features[features %in% rownames(x = data.use)]) if (length(x = features.keep) < length(x = features)) { features.exclude <- setdiff(x = features, y = features.keep) if (verbose) { - warning(paste0("The following ", length(x = features.exclude), " features requested have not been scaled (running reduction without them): ", paste0(features.exclude, collapse = ", "))) + warning(paste0("The following ", length(x = features.exclude), " features requested have not been found in slot '", slot, "' of assay '", object@key, "' (running reduction without them): ", paste0(features.exclude, collapse = ", "))) } } features <- features.keep - features.var <- apply(X = data.use[features, ], MARGIN = 1, FUN = var) + features.var <- Matrix::rowMeans(data.use[features, ]^2) - Matrix::rowMeans(data.use[features, ])^2 features.keep <- features[features.var > 0] if (length(x = features.keep) < length(x = features)) { features.exclude <- setdiff(x = features, y = features.keep) diff --git a/man/RunPCA.Rd b/man/RunPCA.Rd index a719ea2f1..7bb166aa8 100644 --- a/man/RunPCA.Rd +++ b/man/RunPCA.Rd @@ -10,49 +10,58 @@ RunPCA(object, ...) \method{RunPCA}{default}( - object, - assay = NULL, + object, + assay = NULL, npcs = 50, - rev.pca = FALSE, - weight.by.var = TRUE, + rev.pca = FALSE, + weight.by.var = TRUE, verbose = TRUE, - ndims.print = 1:5, - nfeatures.print = 30, + ndims.print = 1:5, + nfeatures.print = 30, reduction.key = "PC_", - seed.use = 42, - approx = TRUE, - ... -) + seed.use = 42, + approx = TRUE, + center = !approx, + scale = if(!missing(scale.)) scale. else FALSE, + scale., + ...) \method{RunPCA}{Assay}( - object, - assay = NULL, + object, + assay = NULL, features = NULL, - npcs = 50, - rev.pca = FALSE, - weight.by.var = TRUE, + npcs = 50, + rev.pca = FALSE, + weight.by.var = TRUE, verbose = TRUE, - ndims.print = 1:5, - nfeatures.print = 30, + ndims.print = 1:5, + nfeatures.print = 30, reduction.key = "PC_", - seed.use = 42, - ... + seed.use = 42, + approx = TRUE, + center = if (slot == "scale.data") !approx else TRUE, + scale = if (!missing(scale.)) scale. else slot != "scale.data", + scale., + ..., + slot = "scale.data" ) \method{RunPCA}{Seurat}( - object, - assay = NULL, + object, + assay = NULL, features = NULL, - npcs = 50, - rev.pca = FALSE, - weight.by.var = TRUE, + npcs = 50, + rev.pca = FALSE, + weight.by.var = TRUE, verbose = TRUE, - ndims.print = 1:5, - nfeatures.print = 30, + ndims.print = 1:5, + nfeatures.print = 30, reduction.name = "pca", - reduction.key = "PC_", - seed.use = 42, - ... + reduction.key = "PC_", + seed.use = 42, + approx = TRUE, + ..., + slot = "scale.data" ) } \arguments{ @@ -83,13 +92,21 @@ the number for the dimension names. PC by default} \item{seed.use}{Set a random seed. By default, sets the seed to 42. Setting NULL will not set a seed.} -\item{approx}{Use truncated singular value decomposition to approximate PCA} +\item{approx}{Use truncated singular value decomposition to approximate PCA. See \code{\link[irlba]{irlba}}} -\item{features}{Features to compute PCA on. If features=NULL, PCA will be run +\item{center}{logical(1) or numeric. Center features (cells if \code{rev.pca=TRUE}) before computing PCs (recommended). See \code{\link[stats]{prcomp}}} + +\item{scale}{logical(1) or numeric. Scale features (cells if \code{rev.pca=TRUE}) before computing PCs (recommended). See \code{\link[stats]{prcomp}}} + +\item{scale.}{Deprecated. Alias for \code{scale}} + +\item{features}{Features to compute PCA on. If features=NULL, PCA will be run using the variable features for the Assay. Note that the features must be present in the scaled data. Any requested features that are not scaled or have 0 variance will be dropped, and the PCA will be run using the remaining features.} +\item{slot}{Name of slot of assay PCA is being run on} + \item{reduction.name}{dimensional reduction name, pca by default} } \value{ diff --git a/tests/testthat/pcalist.approx...FALSE..rds b/tests/testthat/pcalist.approx...FALSE..rds new file mode 100644 index 000000000..c30e7b0e5 Binary files /dev/null and b/tests/testthat/pcalist.approx...FALSE..rds differ diff --git a/tests/testthat/pcalist.approx...FALSE..weight.by.var...FALSE..rds b/tests/testthat/pcalist.approx...FALSE..weight.by.var...FALSE..rds new file mode 100644 index 000000000..58e7f45cb Binary files /dev/null and b/tests/testthat/pcalist.approx...FALSE..weight.by.var...FALSE..rds differ diff --git a/tests/testthat/pcalist.approx...FALSE..weight.by.var...TRUE..rds b/tests/testthat/pcalist.approx...FALSE..weight.by.var...TRUE..rds new file mode 100644 index 000000000..c30e7b0e5 Binary files /dev/null and b/tests/testthat/pcalist.approx...FALSE..weight.by.var...TRUE..rds differ diff --git a/tests/testthat/pcalist.approx...TRUE..rds b/tests/testthat/pcalist.approx...TRUE..rds new file mode 100644 index 000000000..a1e0dced1 Binary files /dev/null and b/tests/testthat/pcalist.approx...TRUE..rds differ diff --git a/tests/testthat/pcalist.approx...TRUE..weight.by.var...FALSE..rds b/tests/testthat/pcalist.approx...TRUE..weight.by.var...FALSE..rds new file mode 100644 index 000000000..16119bb61 Binary files /dev/null and b/tests/testthat/pcalist.approx...TRUE..weight.by.var...FALSE..rds differ diff --git a/tests/testthat/pcalist.approx...TRUE..weight.by.var...TRUE..rds b/tests/testthat/pcalist.approx...TRUE..weight.by.var...TRUE..rds new file mode 100644 index 000000000..a1e0dced1 Binary files /dev/null and b/tests/testthat/pcalist.approx...TRUE..weight.by.var...TRUE..rds differ diff --git a/tests/testthat/test_dimensional_reduction.R b/tests/testthat/test_dimensional_reduction.R index 9fadd6ea6..d5aa31e9c 100644 --- a/tests/testthat/test_dimensional_reduction.R +++ b/tests/testthat/test_dimensional_reduction.R @@ -43,18 +43,297 @@ test_that("pca returns total variance (see #982)", { # Scale and compute PCA, using RunPCA obj <- ScaleData(object = obj, verbose = FALSE) - pca_result <- suppressWarnings(expr = RunPCA( + for (scale in c(TRUE, FALSE)) { + pca_result <- suppressWarnings(expr = RunPCA( + object = obj, + features = rownames(x = obj), + verbose = FALSE, + scale = scale + )) + + # Using stats::prcomp + scaled_data <- Seurat::GetAssayData(object = obj, slot = "scale.data") + prcomp_result <- stats::prcomp(scaled_data, center = FALSE, scale. = scale) + + # Compare + expect_equivalent( + slot(object = pca_result[["pca"]], name = "misc")$total.variance, + sum(prcomp_result$sdev^2), + label = paste0("RunPCA with `scale=", scale, "`")) + } + +}) + +expect_point_symmetric <- function(object, expected, ..., info = NULL, label = NULL, expected.label = NULL) { + act <- testthat::quasi_label(rlang::enquo(object), label, arg = "object") + exp <- testthat::quasi_label(rlang::enquo(expected), expected.label, arg = "expected") + object_mirrored_if_needed <- t(t(act$val) * sign(act$val[1,]) * sign(exp$val[1,])) + comp <- testthat::compare(object_mirrored_if_needed, exp$val, ...) + expect(comp$equal, sprintf("%s not point symmetric to %s.\n%s", + act$lab, exp$lab, comp$message), info = info) + invisible(act$val) +} + +test_that("PCA of scaled data with and withou scaling is identical", { + #test new behavior + Seurat.RunPCA.use.correct.scaling.bak <- options(Seurat.RunPCA.use.correct.scaling = TRUE) + on.exit(options(Seurat.RunPCA.use.correct.scaling.bak)) + + # Generate dummy data exp matrix + set.seed(seed = 1) + dummyexpMat <- matrix( + data = stats::rexp(n = 1e4, rate = 1), + ncol = 100, nrow = 100 + ) + npcs <- 10 + colnames(x = dummyexpMat) <- paste0("cell", seq(ncol(x = dummyexpMat))) + row.names(x = dummyexpMat) <- paste0("gene", seq(nrow(x = dummyexpMat))) + + # Create Seurat object for testing + obj <- CreateSeuratObject(counts = dummyexpMat) + + # Normalize and compute PCA + obj <- NormalizeData(object = obj, verbose = FALSE) + + # Scale + obj <- ScaleData(object = obj, verbose = FALSE) + + # compute PCA with different values of related parameters + for (approx in list(list(approx=TRUE), list(approx=FALSE))) { + pars <- c(approx) + pars.str <- paste0(deparse(pars), collapse="") + pca.res.rescale <- do.call( + what = RunPCA, + args = c( + list(object = obj, features = rownames(x = obj), npcs = npcs, verbose = FALSE), + pars, + list(scale=TRUE, center=TRUE, slot="scale.data") + ) + )[["pca"]] + pca.res.onscale <- do.call( + what = RunPCA, + args = c( + list(object = obj, features = rownames(x = obj), npcs = npcs, verbose = FALSE), + pars, + list(scale=FALSE, center=FALSE, slot="scale.data") + ) + )[["pca"]] + + expect_point_symmetric( + object = slot(object = pca.res.rescale, name = "cell.embeddings"), + expected = slot(object = pca.res.onscale, name = "cell.embeddings"), + label = paste0("RunPCA rescale with ", pars.str), + tolerance = 1E-7 + ) + expect_point_symmetric( + object = slot(object = pca.res.rescale, name = "feature.loadings"), + expected = slot(object = pca.res.onscale, name = "feature.loadings"), + label = paste0("RunPCA rescale with ", pars.str), + tolerance = 1E-7 + ) + expect_equal( + object = slot(object = pca.res.rescale, name = "stdev"), + expected = slot(object = pca.res.onscale, name = "stdev"), + label = paste0("RunPCA rescale with ", pars.str), + tolerance = 1E-7 + ) + } +}) + +test_that("pca with implicit scaling yields identical result", { + #test new behavior + Seurat.RunPCA.use.correct.scaling.bak <- options(Seurat.RunPCA.use.correct.scaling = TRUE) + on.exit(options(Seurat.RunPCA.use.correct.scaling.bak)) + # Generate dummy data exp matrix + set.seed(seed = 1) + dummyexpMat <- matrix( + data = stats::rexp(n = 1e4, rate = 1), + ncol = 100, nrow = 100 + ) + npcs <- 10 + colnames(x = dummyexpMat) <- paste0("cell", seq(ncol(x = dummyexpMat))) + row.names(x = dummyexpMat) <- paste0("gene", seq(nrow(x = dummyexpMat))) + + # Create Seurat object for testing + obj <- CreateSeuratObject(counts = dummyexpMat) + + # Normalize and compute PCA + obj <- NormalizeData(object = obj, verbose = FALSE) + + + # scale and compute PCA with different values of related parameters + for (approx in c(TRUE, FALSE)) { + for (scale in c(TRUE, FALSE)) { + for (center in c(TRUE, FALSE)) { + pars <- list(approx = approx, scale = scale, center = center) + pars.str <- paste0(deparse(pars), collapse="") + + # Scale + obj <- ScaleData(object = obj, do.scale = scale, do.center = center, verbose = FALSE) + + pca.res.onnorm <- RunPCA(object = obj, features = rownames(x = obj), npcs = npcs, + verbose = FALSE, scale = scale, center = center, slot="data")[["pca"]] + + pca.res.onscale <- RunPCA(object = obj, features = rownames(x = obj), npcs = npcs, + verbose = FALSE, scale = FALSE, center = FALSE, slot="scale.data")[["pca"]] + + expect_point_symmetric( + object = slot(object = pca.res.onnorm, name = "cell.embeddings"), + expected = slot(object = pca.res.onscale, name = "cell.embeddings"), + label = paste0("RunPCA with ", pars.str), + tolerance = 1E-7 + ) + expect_point_symmetric( + object = slot(object = pca.res.onnorm, name = "feature.loadings"), + expected = slot(object = pca.res.onscale, name = "feature.loadings"), + label = paste0("RunPCA with ", pars.str), + tolerance = 1E-7 + ) + expect_equal( + object = slot(object = pca.res.onnorm, name = "stdev"), + expected = slot(object = pca.res.onscale, name = "stdev"), + label = paste0("RunPCA with ", pars.str), + tolerance = 1E-7 + ) + expect_equal( + object = slot(object = pca.res.onnorm, name = "misc"), + expected = slot(object = pca.res.onscale, name = "misc"), + label = paste0("RunPCA with ", pars.str), + tolerance = 1E-7 + ) + } + } + } +}) + +test_that("pca embedding weighting works", { + #test new behavior + Seurat.RunPCA.use.correct.scaling.bak <- options(Seurat.RunPCA.use.correct.scaling = TRUE) + on.exit(options(Seurat.RunPCA.use.correct.scaling.bak)) + # Generate dummy data exp matrix + set.seed(seed = 1) + npcs <- 50 + dummyexpMat <- matrix( + data = stats::rexp(n = 2e4, rate = 1), + ncol = 200, nrow = 100 + ) + colnames(x = dummyexpMat) <- paste0("cell", seq(ncol(x = dummyexpMat))) + row.names(x = dummyexpMat) <- paste0("gene", seq(nrow(x = dummyexpMat))) + + # Create Seurat object for testing + obj <- CreateSeuratObject(counts = dummyexpMat) + + # Normalize + obj <- NormalizeData(object = obj, verbose = FALSE) + # Scale + obj <- ScaleData(object = obj, verbose = FALSE) + + # un(weighted/scaled) + # compute PCA + obj <- suppressWarnings(expr = RunPCA( object = obj, features = rownames(x = obj), - verbose = FALSE + verbose = FALSE, + reduction.name = "pca.prcomp.unscaled", + npcs = npcs, + weight.by.var = FALSE, + approx = FALSE )) - # Using stats::prcomp - scaled_data <- Seurat::GetAssayData(object = obj, slot = "scale.data") - prcomp_result <- stats::prcomp(scaled_data, center = FALSE, scale. = FALSE) + obj <- suppressWarnings(expr = RunPCA( + object = obj, + features = rownames(x = obj), + verbose = FALSE, + reduction.name = "pca.irlba.unscaled", + npcs = npcs, + weight.by.var = FALSE, + approx = TRUE + )) # Compare - expect_equivalent(slot(object = pca_result[["pca"]], name = "misc")$total.variance, - sum(prcomp_result$sdev^2)) + expect_equivalent( + diag(x = cov(x = slot(object = obj[["pca.prcomp.unscaled"]], name = "cell.embeddings"))), + rep(x = 1/(ncol(x = obj)-1), times=npcs) + ) + expect_equivalent( + diag(x = cov(x = slot(object = obj[["pca.irlba.unscaled"]], name = "cell.embeddings"))), + rep(x = 1/(ncol(x = obj)-1), times=npcs) + ) + # weighted/scaled + # compute PCA + obj <- suppressWarnings(expr = RunPCA( + object = obj, + features = rownames(x = obj), + verbose = FALSE, + reduction.name = "pca.prcomp.var_scaled", + npcs = npcs, + weight.by.var = TRUE, + approx = FALSE + )) + + obj <- suppressWarnings(expr = RunPCA( + object = obj, + features = rownames(x = obj), + verbose = FALSE, + reduction.name = "pca.irlba.var_scaled", + npcs = npcs, + weight.by.var = TRUE, + approx = TRUE + )) + + # Compare + expect_equivalent( + diag(x = cov(x = slot(object = obj[["pca.prcomp.var_scaled"]], name = "cell.embeddings"))), + slot(object = obj[["pca.prcomp.var_scaled"]], name = "stdev")[1:npcs]^2 + ) + expect_equivalent( + diag(x = cov(x = slot(object = obj[["pca.irlba.var_scaled"]], name = "cell.embeddings"))), + slot(object = obj[["pca.prcomp.var_scaled"]], name = "stdev")[1:npcs]^2 + ) +}) + +test_that("pca reduction behaves as previously", { + #test default behavior + Seurat.RunPCA.use.correct.scaling.bak <- options(Seurat.RunPCA.use.correct.scaling = NULL) # change to FALSE, when changing default to TRUE + on.exit(options(Seurat.RunPCA.use.correct.scaling.bak)) + # Generate dummy data exp matrix + set.seed(seed = 1) + npcs <- 3 + dummyexpMat <- matrix( + data = stats::rexp(n = 2e4, rate = 1), + ncol = 200, nrow = 100 + ) + colnames(x = dummyexpMat) <- paste0("cell", seq(ncol(x = dummyexpMat))) + row.names(x = dummyexpMat) <- paste0("gene", seq(nrow(x = dummyexpMat))) + + # Create Seurat object for testing + obj <- CreateSeuratObject(counts = dummyexpMat) + + # Normalize + obj <- NormalizeData(object = obj, verbose = FALSE) + # Scale + obj <- ScaleData(object = obj, verbose = FALSE) + + # compute PCA with different values of related parameters + for (approx in list(list(approx=TRUE), list(approx=FALSE))) { + for (weight.by.var in list(list(weight.by.var=TRUE), list(weight.by.var=FALSE), list())) { + pars <- c(approx, weight.by.var) + pars.str <- paste0(deparse(pars), collapse="") + expect_known_value( + object = suppressWarnings( + expr = do.call( + what = RunPCA, + args = c( + list(object = obj, features = rownames(x = obj), npcs = npcs), + pars + ) + ) + )[["pca"]], + file = paste0("pca", make.names(pars.str), ".rds"), + label = paste0("RunPCA with ", pars.str) + ) + } + } }) +