7
7
import numpy as np
8
8
import pandas as pd
9
9
import yaml
10
+ from tqdm import tqdm
10
11
11
- sys .path .append (".." )
12
+ from dance .settings import DANCEDIR , SIMILARITYDIR
13
+
14
+ sys .path .append (str (DANCEDIR ))
12
15
import ast
13
16
14
17
from get_result_web import get_sweep_url , spilt_web
15
18
16
19
from dance import logger
20
+ from dance .settings import entity , project
17
21
from dance .utils import try_import
18
22
19
23
file_root = str (Path (__file__ ).resolve ().parent .parent )
@@ -70,8 +74,6 @@ def is_match(config_str):
70
74
71
75
72
76
wandb = try_import ("wandb" )
73
- entity = "xzy11632"
74
- project = "dance-dev"
75
77
76
78
77
79
def is_matching_dict (yaml_str , target_dict ):
@@ -156,18 +158,20 @@ def get_ans_from_cache(query_dataset, method):
156
158
# Get best method from step2 of atlas datasets
157
159
# Search accuracy according to best method (all values should exist)
158
160
ans = pd .DataFrame (index = [method ], columns = [f"{ atlas_dataset } _from_cache" for atlas_dataset in atlas_datasets ])
159
-
160
- sweep_url = re .search (r"step2:([^|]+)" ,
161
- conf_data [conf_data ["dataset_id" ] == query_dataset ][method ].iloc [0 ]).group (1 )
161
+ step_str = conf_data [conf_data ["dataset_id" ] == query_dataset ][method ].iloc [0 ]
162
+ if pd .isna (step_str ):
163
+ logger .warning (f"{ query_dataset } is nan in { method } " )
164
+ return ans
165
+ sweep_url = re .search (r"step2:([^|]+)" , step_str ).group (1 )
162
166
_ , _ , sweep_id = spilt_web (sweep_url )
163
167
sweep = wandb .Api ().sweep (f"{ entity } /{ project } /{ sweep_id } " )
164
-
165
- for atlas_dataset in atlas_datasets :
166
- best_yaml = conf_data [conf_data ["dataset_id" ] == atlas_dataset ][f"{ method } _best_yaml " ].iloc [0 ]
168
+ runs = sweep . runs
169
+ for atlas_dataset in tqdm ( atlas_datasets ) :
170
+ best_yaml = conf_data [conf_data ["dataset_id" ] == atlas_dataset ][f"{ method } _step2_best_yaml " ].iloc [0 ]
167
171
match_run = None
168
172
169
173
# Find matching run configuration
170
- for run in sweep . runs :
174
+ for run in tqdm ( runs , leave = False ) :
171
175
if isinstance (best_yaml , float ) and np .isnan (best_yaml ):
172
176
continue
173
177
if is_matching_dict (best_yaml , run .config ):
@@ -188,7 +192,7 @@ def get_ans_from_cache(query_dataset, method):
188
192
parser = argparse .ArgumentParser (formatter_class = argparse .ArgumentDefaultsHelpFormatter )
189
193
parser .add_argument ("--methods" , default = ["cta_actinn" , "cta_celltypist" , "cta_scdeepsort" , "cta_singlecellnet" ],
190
194
nargs = "+" )
191
- parser .add_argument ("--tissue" , type = str , default = "blood " )
195
+ parser .add_argument ("--tissue" , type = str , default = "pancreas " )
192
196
args = parser .parse_args ()
193
197
methods = args .methods
194
198
tissue = args .tissue
@@ -208,7 +212,7 @@ def get_ans_from_cache(query_dataset, method):
208
212
# "738942eb-ac72-44ff-a64b-8943b5ecd8d9", "a5d95a42-0137-496f-8a60-101e17f263c8",
209
213
# "71be997d-ff75-41b9-8a9f-1288c865f921"
210
214
# ]
211
- conf_data = pd .read_excel (" Cell Type Annotation Atlas.xlsx" , sheet_name = tissue )
215
+ conf_data = pd .read_excel (SIMILARITYDIR / "data/ Cell Type Annotation Atlas.xlsx" , sheet_name = tissue )
212
216
# conf_data = pd.read_csv(f"results/{tissue}_result.csv", index_col=0)
213
217
atlas_datasets = list (conf_data [conf_data ["queryed" ] == False ]["dataset_id" ])
214
218
query_datasets = list (conf_data [conf_data ["queryed" ] == True ]["dataset_id" ])
@@ -219,8 +223,9 @@ def get_ans_from_cache(query_dataset, method):
219
223
ans .append (get_ans_from_cache (query_dataset , method ))
220
224
ans = pd .concat (ans )
221
225
ans_all [query_dataset ] = ans
222
- for k , v in ans_all .items ():
223
- file_path = f"in_atlas_datas/{ tissue } /{ str (methods )} _{ k } _in_atlas.csv"
226
+ print (query_dataset )
227
+ # for k, v in ans_all.items():
228
+ file_path = SIMILARITYDIR / f"data/in_atlas_datas/{ tissue } /{ str (methods )} _{ query_dataset } _in_atlas.csv"
224
229
folder_path = Path (file_path ).parent
225
230
folder_path .mkdir (parents = True , exist_ok = True )
226
- v .to_csv (file_path )
231
+ ans .to_csv (file_path )
0 commit comments