From 30385869f95f968ea5317ab1100940ad6d28b2ce Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Wed, 11 Dec 2024 12:50:04 -0800 Subject: [PATCH 1/2] fix little formatting --- R/preprocessing.R | 127 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/R/preprocessing.R b/R/preprocessing.R index f7524278d..cc238aaac 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -545,6 +545,133 @@ Load10X_Spatial <- function( object[[slice]] <- image return(object) } +#' Add 10X Cell Types to a Seurat Object +#' +#' This function reads cell type annotations from a CSV file and adds them to the metadata of a Seurat object. +#' If the cell type file does not exist, the original Seurat object is returned unchanged. +#' +#' @param data.dir A string specifying the directory containing the "cell_types" folder with the "cell_types.csv" file. +#' @param object A Seurat object to which the cell type annotations will be added. +#' +#' @return A Seurat object with updated metadata including cell type annotations if the file is found. +#' +#' @details +#' The function searches for a CSV file named "cell_types.csv" in the "cell_types" subdirectory within `data.dir`. +#' The CSV file should contain at least a "barcode" column that matches the cell barcodes in the Seurat object. +#' Additional columns in the CSV file will be merged into the Seurat object's metadata. +#' +#' @importFrom utils read.csv +#' @importFrom tibble rownames_to_column column_to_rownames +#' @importFrom base file.path file.exists merge +#' +#' @examples +#' \dontrun{ +#' # Specify the data directory containing the "cell_types" folder +#' data.dir <- "/path/to/data" +#' +#' # Create a Seurat object (example) +#' seurat_obj <- CreateSeuratObject(counts = some_counts_matrix) +#' +#' # Add cell type annotations to the Seurat object +#' seurat_obj <- Add_10X_CellTypes(data.dir, seurat_obj) +#' } + +Add_10X_CellTypes <- function(data.dir, object) { + cell_types_path <- file.path(data.dir, "cell_types", "cell_types.csv") + if (file.exists(cell_types_path)) { + cell.types <- read.csv(cell_types_path) + object@meta.data <- dplyr::left_join(tibble::rownames_to_column(object@meta.data, "barcode"), + cell.types, by = "barcode") %>% + tibble::column_to_rownames("barcode") + return(object) + } else { + return(object) + } +} + +#' Load a 10x Genomics Single Cell Experiment into a \code{Seurat} object +#' +#' @inheritParams Read10X +#' @inheritParams SeuratObject::CreateSeuratObject +#' @param data.dir Directory containing the H5 file specified by \code{filename} +#' and the image data in a subdirectory called \code{spatial} +#' @param filename Name of H5 file containing the feature barcode matrix +#' @param to.upper Converts all feature names to upper case. This can provide an +#' approximate conversion of mouse to human gene names which can be useful in an +#' explorative analysis. For cross-species comparisons, orthologous genes should +#' be identified across species and used instead. +#' @param ... Arguments passed to \code{\link{Read10X_h5}} +#' +#' @return A \code{Seurat} object +#' +#' +#' @export +#' @concept preprocessing +#' +#' @examples +#' \dontrun{ +#' data_dir <- 'path/to/data/directory' +#' list.files(data_dir) # Should show filtered_feature_bc_matrix.h5 +#' Load10X(data.dir = data_dir) +#' } +#' +Load10X <- function(data.dir, filename = "filtered_feature_bc_matrix.h5", + assay = "RNA", to.upper = FALSE, ...) { + + if (length(data.dir) > 1) { + stop("`data.dir` expects a single directory path but received multiple values.") + } + if (!file.exists(data.dir)) { + stop(paste0("No such file or directory: '", data.dir, "'")) + } + + + filename <- list.files(data.dir, filename, full.names = FALSE, recursive = FALSE) + counts.path <- file.path(data.dir, filename) + if (!file.exists(counts.path)) { + stop(paste0("File not found: '", counts.path, "'")) + } + + counts <- Read10X_h5(counts.path, ...) + + if (to.upper) { + counts <- imap(counts, ~{ + rownames(.x) <- toupper(rownames(.x)) + .x + }) + } + + if (is.list(counts)) { + seurat.list <- lapply(names(counts), function(name) { + CreateSeuratObject( + counts = counts[[name]], + assay = name, + project = name + ) + }) + + for (i in 1:seq_along(seurat.list)) { + if (Assays(seurat.list[[i]]) %in% c("Gene Expression", "RNA")) { + seurat.list[[i]] <- Add_10X_CellTypes(data.dir, seurat.list[[i]]) + } + } + + merged.object <- merge( + x = seurat.list[[1]], + y = seurat.list[-1], + add.cell.ids = names(counts), + merge.data = FALSE + ) + return(merged.object) + + } else { + object <- CreateSeuratObject(counts, assay = assay) + if (Assays(object) %in% c("Gene Expression", "RNA")) { + object <- Add_10X_CellTypes(data.dir, object) + } + return(object) + } +} #' Load STARmap data #' From 1d575a15d44e95b5a85af8484eaef9fd60901c2d Mon Sep 17 00:00:00 2001 From: Stephen Williams Date: Thu, 12 Dec 2024 10:32:12 -0800 Subject: [PATCH 2/2] use base r and small changes --- R/preprocessing.R | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 38a680f9e..5c6123c89 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -680,9 +680,14 @@ Add_10X_CellTypes <- function(data.dir, object) { cell_types_path <- file.path(data.dir, "cell_types", "cell_types.csv") if (file.exists(cell_types_path)) { cell.types <- read.csv(cell_types_path) - object@meta.data <- dplyr::left_join(tibble::rownames_to_column(object@meta.data, "barcode"), - cell.types, by = "barcode") %>% - tibble::column_to_rownames("barcode") + meta_data_with_barcodes <- tibble::rownames_to_column(object@meta.data, "barcode") + merged_meta_data <- merge( + x = meta_data_with_barcodes, + y = cell.types, + by = "barcode", + all.x = TRUE + ) + object@meta.data <- tibble::column_to_rownames(merged_meta_data, "barcode") return(object) } else { return(object) @@ -692,9 +697,9 @@ Add_10X_CellTypes <- function(data.dir, object) { #' Load a 10x Genomics Single Cell Experiment into a \code{Seurat} object #' #' @inheritParams Read10X -#' @inheritParams SeuratObject::CreateSeuratObject +#' @inheritParams SeuratObject::CreateSeuratObject If multiome 10x data the +#' assay param will not be used. The names of each assay contained in the matrix are used. #' @param data.dir Directory containing the H5 file specified by \code{filename} -#' and the image data in a subdirectory called \code{spatial} #' @param filename Name of H5 file containing the feature barcode matrix #' @param to.upper Converts all feature names to upper case. This can provide an #' approximate conversion of mouse to human gene names which can be useful in an @@ -722,14 +727,14 @@ Load10X <- function(data.dir, filename = "filtered_feature_bc_matrix.h5", stop("`data.dir` expects a single directory path but received multiple values.") } if (!file.exists(data.dir)) { - stop(paste0("No such file or directory: '", data.dir, "'")) + stop("No such file or directory: '", data.dir, "'") } filename <- list.files(data.dir, filename, full.names = FALSE, recursive = FALSE) counts.path <- file.path(data.dir, filename) if (!file.exists(counts.path)) { - stop(paste0("File not found: '", counts.path, "'")) + stop("File not found: '", counts.path, "'") } counts <- Read10X_h5(counts.path, ...) @@ -750,7 +755,7 @@ Load10X <- function(data.dir, filename = "filtered_feature_bc_matrix.h5", ) }) - for (i in 1:seq_along(seurat.list)) { + for (i in seq_along(seurat.list)) { if (Assays(seurat.list[[i]]) %in% c("Gene Expression", "RNA")) { seurat.list[[i]] <- Add_10X_CellTypes(data.dir, seurat.list[[i]]) }