Skip to content

Commit e2802af

Browse files
authored
Merge pull request #26 from becavin-lab/rpy2
Rpy2
2 parents baed76b + 79fd5eb commit e2802af

14 files changed

+856
-239
lines changed

checkatlas/__main__.py

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import argparse # pragma: no cover
22
import logging
33
import os.path
4-
54
import yaml
65

76
from . import checkatlas # pragma: no cover
@@ -40,7 +39,8 @@ def main() -> None: # pragma: no cover
4039
parser.add_argument(
4140
"path",
4241
type=str,
43-
help="Required argument: Your folder containing Scanpy, CellRanger and Seurat atlasesv",
42+
help="Required argument: Your folder containing "
43+
"Scanpy, CellRanger and Seurat atlasesv",
4444
default=".",
4545
)
4646
parser.add_argument(
@@ -212,26 +212,9 @@ def main() -> None: # pragma: no cover
212212
# Save all arguments to yaml (only run it when
213213
# generating example file config.yaml
214214
# save_arguments(args, 'config/default_config.yaml')
215-
# test_r()
216215
checkatlas.run(args)
217216

218217

219-
def test_r():
220-
seurat_path = "/Users/christophebecavin/Documents/testatlas/PAH_675093.rds"
221-
import rpy2
222-
223-
print(seurat_path)
224-
import rpy2.robjects as robjects
225-
226-
from . import atlas_seurat
227-
228-
# atlas_seurat.install_library()
229-
rcode = f'readRDS("{seurat_path}")'
230-
print(rcode)
231-
seurat = robjects.r(rcode)
232-
print(seurat)
233-
234-
235218
def load_arguments(args, yaml_name):
236219
"""
237220
Load all args from a yaml file.

checkatlas/atlas.py

Lines changed: 86 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import numpy as np
66
import pandas as pd
77
import scanpy as sc
8+
import anndata
89

910
from . import checkatlas, folders
1011
from .metrics import metrics
@@ -35,6 +36,7 @@
3536
"CellType",
3637
"celltype",
3738
"ann_finest_level",
39+
"cellranger_graphclust",
3840
"seurat_clusters",
3941
"louvain",
4042
"leiden",
@@ -73,28 +75,58 @@
7375
logger = logging.getLogger("checkatlas")
7476

7577

76-
def convert_atlas(atlas_path, atlas_name) -> None:
77-
"""
78-
Convert a atlas to Scanpy
79-
:param atlas_path:
80-
:return:
81-
"""
82-
logger.info(f"Convert Seurat object to Scanpy: {atlas_path}")
83-
rscript_cmd = (
84-
"Rscript "
85-
+ checkatlas.RSCRIPT
86-
+ " "
87-
+ os.path.dirname(atlas_path)
88-
+ " "
89-
+ atlas_name
78+
def read_atlas(atlas_path, atlas_info):
79+
logger.info(f"Load {atlas_info[0]} in {atlas_info[-1]}")
80+
try:
81+
if atlas_path.endswith(".h5"):
82+
logger.debug(f"Read Cellranger results {atlas_path}")
83+
adata = read_cellranger(atlas_path)
84+
else:
85+
logger.debug(f"Read Scanpy file {atlas_path}")
86+
adata = sc.read_h5ad(atlas_path)
87+
return adata
88+
except anndata._io.utils.AnnDataReadError:
89+
logger.warning(f"AnnDataReadError, cannot read: {atlas_info[0]}")
90+
return None
91+
92+
93+
def read_cellranger(atlas_path):
94+
cellranger_path = atlas_path.replace(checkatlas.CELLRANGER_FILE, "")
95+
cellranger_path = os.path.join(cellranger_path, "outs")
96+
clust_path = os.path.join(
97+
cellranger_path, "analysis", "clustering", "graphclust", "clusters.csv"
9098
)
91-
logger.debug(f"Run: {rscript_cmd}")
92-
os.system(rscript_cmd)
99+
rna_umap = os.path.join(
100+
cellranger_path, "analysis", "umap", "2_components", "projection.csv"
101+
)
102+
rna_tsne = os.path.join(
103+
cellranger_path, "analysis", "tsne", "2_components", "projection.csv"
104+
)
105+
rna_pca = os.path.join(
106+
cellranger_path, "analysis", "pca", "10_components", "projection.csv"
107+
)
108+
adata = sc.read_10x_h5(atlas_path)
109+
adata.var_names_make_unique()
110+
# Add cluster
111+
if os.path.exists(clust_path):
112+
df_cluster = pd.read_csv(clust_path, index_col=0)
113+
adata.obs["cellranger_graphclust"] = df_cluster["Cluster"]
114+
# Add reduction
115+
if os.path.exists(rna_umap):
116+
df_umap = pd.read_csv(rna_umap, index_col=0)
117+
adata.obsm["X_umap"] = df_umap
118+
if os.path.exists(rna_tsne):
119+
df_tsne = pd.read_csv(rna_tsne, index_col=0)
120+
adata.obsm["X_tsne"] = df_tsne
121+
if os.path.exists(rna_pca):
122+
df_pca = pd.read_csv(rna_pca, index_col=0)
123+
adata.obsm["X_pca"] = df_pca
124+
return adata
93125

94126

95127
def clean_scanpy_atlas(adata, atlas_info) -> bool:
96128
"""
97-
Clean the Scanpy object to be suyre to get all information out of it
129+
Clean the Scanpy object to be sure to get all information out of it
98130
:param adata:
99131
:return:
100132
"""
@@ -104,7 +136,10 @@ def clean_scanpy_atlas(adata, atlas_info) -> bool:
104136
for obs_key in adata.obs_keys():
105137
for obs_key_celltype in OBS_CLUSTERS:
106138
if obs_key_celltype in obs_key:
107-
if adata.obs[obs_key].dtype == np.int32:
139+
if (
140+
adata.obs[obs_key].dtype == np.int32
141+
or adata.obs[obs_key].dtype == np.int64
142+
):
108143
adata.obs[obs_key] = pd.Categorical(adata.obs[obs_key])
109144
return adata
110145

@@ -300,6 +335,7 @@ def create_qc_plots(adata, atlas_path, atlas_info, args) -> None:
300335
"""
301336
atlas_name = atlas_info[0]
302337
sc.settings.figdir = folders.get_workingdir(args.path)
338+
sc.set_figure_params(dpi_save=80)
303339
qc_path = os.sep + atlas_name + checkatlas.QC_FIG_EXTENSION
304340
logger.debug(f"Create QC violin plot for {atlas_name}")
305341
# mitochondrial genes
@@ -339,6 +375,7 @@ def create_umap_fig(adata, atlas_path, atlas_info, args) -> None:
339375
:return:
340376
"""
341377
atlas_name = atlas_info[0]
378+
sc.set_figure_params(dpi_save=150)
342379
# Search if tsne reduction exists
343380
r = re.compile(".*umap*.")
344381
if len(list(filter(r.match, adata.obsm_keys()))) > 0:
@@ -366,6 +403,7 @@ def create_tsne_fig(adata, atlas_path, atlas_info, args) -> None:
366403
"""
367404
# Search if tsne reduction exists
368405
atlas_name = atlas_info[0]
406+
sc.set_figure_params(dpi_save=150)
369407
r = re.compile(".*tsne*.")
370408
if len(list(filter(r.match, adata.obsm_keys()))) > 0:
371409
logger.debug(f"Create t-SNE figure for {atlas_name}")
@@ -384,10 +422,11 @@ def create_tsne_fig(adata, atlas_path, atlas_info, args) -> None:
384422

385423
def metric_cluster(adata, atlas_path, atlas_info, args) -> None:
386424
"""
387-
Main function of checkatlas
388-
For every atlas create summary tables with all attributes of the atlas
389-
Calc UMAP, tSNE, andd all metrics
425+
Calc clustering metrics
426+
:param adata:
390427
:param atlas_path:
428+
:param atlas_info:
429+
:param args:
391430
:return:
392431
"""
393432
atlas_name = atlas_info[0]
@@ -398,16 +437,22 @@ def metric_cluster(adata, atlas_path, atlas_info, args) -> None:
398437
header = ["Sample", "obs"] + args.metric_cluster
399438
df_cluster = pd.DataFrame(columns=header)
400439
obs_keys = get_viable_obs_annot(adata, args)
440+
obsm_key_representation = "X_umap"
401441
if len(obs_keys) > 0:
402442
logger.debug(f"Calc clustering metrics for {atlas_name}")
403443
else:
404444
logger.debug(f"No viable obs_key was found for {atlas_name}")
405445
for obs_key in obs_keys:
406446
dict_line = {"Sample": [atlas_name + "_" + obs_key], "obs": [obs_key]}
407447
for metric in args.metric_cluster:
408-
logger.debug(f"Calc {metric} for {atlas_name} with obs {obs_key}")
448+
logger.debug(
449+
f"Calc {metric} for {atlas_name} "
450+
f"with obs {obs_key} and obsm {obsm_key_representation}"
451+
)
452+
annotation = adata.obs[obs_key]
453+
count_representation = adata.obsm[obsm_key_representation]
409454
metric_value = metrics.calc_metric_cluster(
410-
metric, adata, obs_key, "X_umap"
455+
metric, count_representation, annotation
411456
)
412457
dict_line[metric] = metric_value
413458
df_line = pd.DataFrame(dict_line)
@@ -420,10 +465,11 @@ def metric_cluster(adata, atlas_path, atlas_info, args) -> None:
420465

421466
def metric_annot(adata, atlas_path, atlas_info, args) -> None:
422467
"""
423-
Main function of checkatlas
424-
For every atlas create summary tables with all attributes of the atlas
425-
Calc UMAP, tSNE, and all metrics
468+
Calc annotation metrics
469+
:param adata:
426470
:param atlas_path:
471+
:param atlas_info:
472+
:param args:
427473
:return:
428474
"""
429475
atlas_name = atlas_info[0]
@@ -449,10 +495,13 @@ def metric_annot(adata, atlas_path, atlas_info, args) -> None:
449495
}
450496
for metric in args.metric_annot:
451497
logger.debug(
452-
f"Calc {metric} for {atlas_name} with obs {obs_key} vs ref_obs {ref_obs}"
498+
f"Calc {metric} for {atlas_name} "
499+
f"with obs {obs_key} vs ref_obs {ref_obs}"
453500
)
501+
annotation = adata.obs[obs_key]
502+
ref_annotation = adata.obs[ref_obs]
454503
metric_value = metrics.calc_metric_annot(
455-
metric, adata, obs_key, ref_obs
504+
metric, annotation, ref_annotation
456505
)
457506
dict_line[metric] = metric_value
458507
df_line = pd.DataFrame(dict_line)
@@ -465,10 +514,11 @@ def metric_annot(adata, atlas_path, atlas_info, args) -> None:
465514

466515
def metric_dimred(adata, atlas_path, atlas_info, args) -> None:
467516
"""
468-
Main function of checkatlas
469-
For every atlas create summary tables with all attributes of the atlas
470-
Calc UMAP, tSNE, andd all metrics
517+
Calc dimensionality reduction metrics
518+
:param adata:
471519
:param atlas_path:
520+
:param atlas_info:
521+
:param args:
472522
:return:
473523
"""
474524
atlas_name = atlas_info[0]
@@ -492,7 +542,11 @@ def metric_dimred(adata, atlas_path, atlas_info, args) -> None:
492542
logger.debug(
493543
f"Calc {metric} for {atlas_name} with obsm {obsm_key}"
494544
)
495-
metric_value = metrics.calc_metric_dimred(metric, adata, obsm_key)
545+
high_dim_counts = adata.X
546+
low_dim_counts = adata.obsm[obsm_key]
547+
metric_value = metrics.calc_metric_dimred(
548+
metric, high_dim_counts, low_dim_counts
549+
)
496550
dict_line[metric] = metric_value
497551
df_line = pd.DataFrame(dict_line)
498552
df_dimred = pd.concat([df_dimred, df_line], ignore_index=True, axis=0)

0 commit comments

Comments
 (0)