5
5
import numpy as np
6
6
import pandas as pd
7
7
import scanpy as sc
8
+ import anndata
8
9
9
10
from . import checkatlas , folders
10
11
from .metrics import metrics
35
36
"CellType" ,
36
37
"celltype" ,
37
38
"ann_finest_level" ,
39
+ "cellranger_graphclust" ,
38
40
"seurat_clusters" ,
39
41
"louvain" ,
40
42
"leiden" ,
73
75
logger = logging .getLogger ("checkatlas" )
74
76
75
77
76
- def convert_atlas (atlas_path , atlas_name ) -> None :
77
- """
78
- Convert a atlas to Scanpy
79
- :param atlas_path:
80
- :return:
81
- """
82
- logger .info (f"Convert Seurat object to Scanpy: { atlas_path } " )
83
- rscript_cmd = (
84
- "Rscript "
85
- + checkatlas .RSCRIPT
86
- + " "
87
- + os .path .dirname (atlas_path )
88
- + " "
89
- + atlas_name
78
+ def read_atlas (atlas_path , atlas_info ):
79
+ logger .info (f"Load { atlas_info [0 ]} in { atlas_info [- 1 ]} " )
80
+ try :
81
+ if atlas_path .endswith (".h5" ):
82
+ logger .debug (f"Read Cellranger results { atlas_path } " )
83
+ adata = read_cellranger (atlas_path )
84
+ else :
85
+ logger .debug (f"Read Scanpy file { atlas_path } " )
86
+ adata = sc .read_h5ad (atlas_path )
87
+ return adata
88
+ except anndata ._io .utils .AnnDataReadError :
89
+ logger .warning (f"AnnDataReadError, cannot read: { atlas_info [0 ]} " )
90
+ return None
91
+
92
+
93
+ def read_cellranger (atlas_path ):
94
+ cellranger_path = atlas_path .replace (checkatlas .CELLRANGER_FILE , "" )
95
+ cellranger_path = os .path .join (cellranger_path , "outs" )
96
+ clust_path = os .path .join (
97
+ cellranger_path , "analysis" , "clustering" , "graphclust" , "clusters.csv"
90
98
)
91
- logger .debug (f"Run: { rscript_cmd } " )
92
- os .system (rscript_cmd )
99
+ rna_umap = os .path .join (
100
+ cellranger_path , "analysis" , "umap" , "2_components" , "projection.csv"
101
+ )
102
+ rna_tsne = os .path .join (
103
+ cellranger_path , "analysis" , "tsne" , "2_components" , "projection.csv"
104
+ )
105
+ rna_pca = os .path .join (
106
+ cellranger_path , "analysis" , "pca" , "10_components" , "projection.csv"
107
+ )
108
+ adata = sc .read_10x_h5 (atlas_path )
109
+ adata .var_names_make_unique ()
110
+ # Add cluster
111
+ if os .path .exists (clust_path ):
112
+ df_cluster = pd .read_csv (clust_path , index_col = 0 )
113
+ adata .obs ["cellranger_graphclust" ] = df_cluster ["Cluster" ]
114
+ # Add reduction
115
+ if os .path .exists (rna_umap ):
116
+ df_umap = pd .read_csv (rna_umap , index_col = 0 )
117
+ adata .obsm ["X_umap" ] = df_umap
118
+ if os .path .exists (rna_tsne ):
119
+ df_tsne = pd .read_csv (rna_tsne , index_col = 0 )
120
+ adata .obsm ["X_tsne" ] = df_tsne
121
+ if os .path .exists (rna_pca ):
122
+ df_pca = pd .read_csv (rna_pca , index_col = 0 )
123
+ adata .obsm ["X_pca" ] = df_pca
124
+ return adata
93
125
94
126
95
127
def clean_scanpy_atlas (adata , atlas_info ) -> bool :
96
128
"""
97
- Clean the Scanpy object to be suyre to get all information out of it
129
+ Clean the Scanpy object to be sure to get all information out of it
98
130
:param adata:
99
131
:return:
100
132
"""
@@ -104,7 +136,10 @@ def clean_scanpy_atlas(adata, atlas_info) -> bool:
104
136
for obs_key in adata .obs_keys ():
105
137
for obs_key_celltype in OBS_CLUSTERS :
106
138
if obs_key_celltype in obs_key :
107
- if adata .obs [obs_key ].dtype == np .int32 :
139
+ if (
140
+ adata .obs [obs_key ].dtype == np .int32
141
+ or adata .obs [obs_key ].dtype == np .int64
142
+ ):
108
143
adata .obs [obs_key ] = pd .Categorical (adata .obs [obs_key ])
109
144
return adata
110
145
@@ -300,6 +335,7 @@ def create_qc_plots(adata, atlas_path, atlas_info, args) -> None:
300
335
"""
301
336
atlas_name = atlas_info [0 ]
302
337
sc .settings .figdir = folders .get_workingdir (args .path )
338
+ sc .set_figure_params (dpi_save = 80 )
303
339
qc_path = os .sep + atlas_name + checkatlas .QC_FIG_EXTENSION
304
340
logger .debug (f"Create QC violin plot for { atlas_name } " )
305
341
# mitochondrial genes
@@ -339,6 +375,7 @@ def create_umap_fig(adata, atlas_path, atlas_info, args) -> None:
339
375
:return:
340
376
"""
341
377
atlas_name = atlas_info [0 ]
378
+ sc .set_figure_params (dpi_save = 150 )
342
379
# Search if tsne reduction exists
343
380
r = re .compile (".*umap*." )
344
381
if len (list (filter (r .match , adata .obsm_keys ()))) > 0 :
@@ -366,6 +403,7 @@ def create_tsne_fig(adata, atlas_path, atlas_info, args) -> None:
366
403
"""
367
404
# Search if tsne reduction exists
368
405
atlas_name = atlas_info [0 ]
406
+ sc .set_figure_params (dpi_save = 150 )
369
407
r = re .compile (".*tsne*." )
370
408
if len (list (filter (r .match , adata .obsm_keys ()))) > 0 :
371
409
logger .debug (f"Create t-SNE figure for { atlas_name } " )
@@ -384,10 +422,11 @@ def create_tsne_fig(adata, atlas_path, atlas_info, args) -> None:
384
422
385
423
def metric_cluster (adata , atlas_path , atlas_info , args ) -> None :
386
424
"""
387
- Main function of checkatlas
388
- For every atlas create summary tables with all attributes of the atlas
389
- Calc UMAP, tSNE, andd all metrics
425
+ Calc clustering metrics
426
+ :param adata:
390
427
:param atlas_path:
428
+ :param atlas_info:
429
+ :param args:
391
430
:return:
392
431
"""
393
432
atlas_name = atlas_info [0 ]
@@ -398,16 +437,22 @@ def metric_cluster(adata, atlas_path, atlas_info, args) -> None:
398
437
header = ["Sample" , "obs" ] + args .metric_cluster
399
438
df_cluster = pd .DataFrame (columns = header )
400
439
obs_keys = get_viable_obs_annot (adata , args )
440
+ obsm_key_representation = "X_umap"
401
441
if len (obs_keys ) > 0 :
402
442
logger .debug (f"Calc clustering metrics for { atlas_name } " )
403
443
else :
404
444
logger .debug (f"No viable obs_key was found for { atlas_name } " )
405
445
for obs_key in obs_keys :
406
446
dict_line = {"Sample" : [atlas_name + "_" + obs_key ], "obs" : [obs_key ]}
407
447
for metric in args .metric_cluster :
408
- logger .debug (f"Calc { metric } for { atlas_name } with obs { obs_key } " )
448
+ logger .debug (
449
+ f"Calc { metric } for { atlas_name } "
450
+ f"with obs { obs_key } and obsm { obsm_key_representation } "
451
+ )
452
+ annotation = adata .obs [obs_key ]
453
+ count_representation = adata .obsm [obsm_key_representation ]
409
454
metric_value = metrics .calc_metric_cluster (
410
- metric , adata , obs_key , "X_umap"
455
+ metric , count_representation , annotation
411
456
)
412
457
dict_line [metric ] = metric_value
413
458
df_line = pd .DataFrame (dict_line )
@@ -420,10 +465,11 @@ def metric_cluster(adata, atlas_path, atlas_info, args) -> None:
420
465
421
466
def metric_annot (adata , atlas_path , atlas_info , args ) -> None :
422
467
"""
423
- Main function of checkatlas
424
- For every atlas create summary tables with all attributes of the atlas
425
- Calc UMAP, tSNE, and all metrics
468
+ Calc annotation metrics
469
+ :param adata:
426
470
:param atlas_path:
471
+ :param atlas_info:
472
+ :param args:
427
473
:return:
428
474
"""
429
475
atlas_name = atlas_info [0 ]
@@ -449,10 +495,13 @@ def metric_annot(adata, atlas_path, atlas_info, args) -> None:
449
495
}
450
496
for metric in args .metric_annot :
451
497
logger .debug (
452
- f"Calc { metric } for { atlas_name } with obs { obs_key } vs ref_obs { ref_obs } "
498
+ f"Calc { metric } for { atlas_name } "
499
+ f"with obs { obs_key } vs ref_obs { ref_obs } "
453
500
)
501
+ annotation = adata .obs [obs_key ]
502
+ ref_annotation = adata .obs [ref_obs ]
454
503
metric_value = metrics .calc_metric_annot (
455
- metric , adata , obs_key , ref_obs
504
+ metric , annotation , ref_annotation
456
505
)
457
506
dict_line [metric ] = metric_value
458
507
df_line = pd .DataFrame (dict_line )
@@ -465,10 +514,11 @@ def metric_annot(adata, atlas_path, atlas_info, args) -> None:
465
514
466
515
def metric_dimred (adata , atlas_path , atlas_info , args ) -> None :
467
516
"""
468
- Main function of checkatlas
469
- For every atlas create summary tables with all attributes of the atlas
470
- Calc UMAP, tSNE, andd all metrics
517
+ Calc dimensionality reduction metrics
518
+ :param adata:
471
519
:param atlas_path:
520
+ :param atlas_info:
521
+ :param args:
472
522
:return:
473
523
"""
474
524
atlas_name = atlas_info [0 ]
@@ -492,7 +542,11 @@ def metric_dimred(adata, atlas_path, atlas_info, args) -> None:
492
542
logger .debug (
493
543
f"Calc { metric } for { atlas_name } with obsm { obsm_key } "
494
544
)
495
- metric_value = metrics .calc_metric_dimred (metric , adata , obsm_key )
545
+ high_dim_counts = adata .X
546
+ low_dim_counts = adata .obsm [obsm_key ]
547
+ metric_value = metrics .calc_metric_dimred (
548
+ metric , high_dim_counts , low_dim_counts
549
+ )
496
550
dict_line [metric ] = metric_value
497
551
df_line = pd .DataFrame (dict_line )
498
552
df_dimred = pd .concat ([df_dimred , df_line ], ignore_index = True , axis = 0 )
0 commit comments