Skip to content

Commit 0c2b37d

Browse files
committed
update ans
1 parent 1f242f4 commit 0c2b37d

17 files changed

+890
-659
lines changed

dance/atlas/sc_similarity/anndata_similarity.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ def wasserstein_dist(self) -> float:
288288
b = np.ones((Y.shape[0], )) / Y.shape[0]
289289
M = ot.dist(X, Y, metric='euclidean')
290290
wasserstein_dist = ot.emd2(a, b, M)
291-
return 1 / 1 + wasserstein_dist
291+
return 1 / (1 + wasserstein_dist)
292292

293293
def get_Hausdorff(self):
294294
X = self.X

examples/atlas/__init__.py

Whitespace-only changes.

examples/atlas/sc_similarity_examples/cal_w1_w2.py

-165
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
tissues = ["blood", "brain", "heart", "intestine", "kidney", "lung", "pancreas"]
2+
import pandas as pd
3+
4+
from dance.settings import ATLASDIR, SIMILARITYDIR
5+
6+
if __name__ == "__main__":
7+
for tissue in tissues:
8+
metadata_df = pd.read_csv(ATLASDIR / f"metadatas/{tissue}_metadata.csv")
9+
sweep_result_df = pd.read_csv(ATLASDIR / f"sweep_results/{tissue.capitalize()}_ans.csv")
10+
sweep_result_df = sweep_result_df.rename(columns={"Dataset_id": "dataset_id"})
11+
sweep_result_df["dataset_id"] = sweep_result_df["dataset_id"].str.split('(').str[0]
12+
result_df = metadata_df.merge(sweep_result_df, how="outer", on="dataset_id")
13+
# result_df.to_csv(SIMILARITYDIR / f"data/results/{tissue}_result.csv")
14+
# for tissue in tissues:
15+
# df=pd.read_csv(SIMILARITYDIR / f"data/results/{tissue}_result.csv")
16+
with pd.ExcelWriter(SIMILARITYDIR / "data/Cell Type Annotation Atlas.xlsx", mode='a',
17+
if_sheet_exists='replace') as writer:
18+
result_df.to_excel(writer, sheet_name=tissue)

examples/atlas/sc_similarity_examples/process_similarity.py

-79
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
\[scGPT->metadatas\]+\[get_result_web->sweep_results\]+\[data_processing/merge_result_metadata.py\]->\[data/cell_type_annotation_atlas.xlsx\]
2+
\[data/cell_type_annotation_atlas.xlsx\]+\[similarity/analyze_atlas_accuracy.py\]->\[data/in_atlas_datas\]
3+
\[similarity/example_usage_anndata.py\]+\[data/in_atlas_datas\]+\[data/cell_type_annotation_atlas.xlsx\]->\[data/dataset_similarity\]
4+
\[data/dataset_similarity\]+\[similarity/process_tissue_similarity_matrices.py\]->\[data/new_sim\]
5+
6+
#run_similarity_optimization.sh
7+
\[data/new_sim\]+\[similarity/optimize_similarity_weights.py\]+\[cache/sweep_cache.json\]->\[data/similarity_weights_results\]
8+
\[data/similarity_weights_results\]+\[similarity/visualize_atlas_performance.py\]+\[cache/sweep_cache.json\]->\[data/imgs\]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/bin/bash
2+
3+
# 定义数组
4+
array=("blood" "brain" "heart" "intestine" "kidney" "lung" "pancreas")
5+
# 循环数组并在后台运行 Python 脚本
6+
for tissue in "${array[@]}"
7+
do
8+
python similarity/example_usage_anndata.py --tissue "$tissue"
9+
# python similarity/optimize_similarity_weights.py --tissue "$tissue"
10+
# python visualization/visualize_atlas_performance.py --tissue "$tissue"
11+
# python similarity/optimize_similarity_weights.py --tissue "$tissue" --in_query
12+
# python visualization/visualize_atlas_performance.py --tissue "$tissue" --in_query
13+
# python similarity/optimize_similarity_weights.py --tissue "$tissue" --reduce_error
14+
# python visualization/visualize_atlas_performance.py --tissue "$tissue" --reduce_error
15+
# python similarity/optimize_similarity_weights.py --tissue "$tissue" --in_query --reduce_error
16+
# python visualization/visualize_atlas_performance.py --tissue "$tissue" --in_query --reduce_error
17+
echo "启动处理 tissue: $tissue"
18+
done
19+
20+
# 等待所有后台进程完成
21+
wait
22+
23+
echo "所有 Python 脚本已执行完成"

examples/atlas/sc_similarity_examples/sim_query_atlas.py examples/atlas/sc_similarity_examples/similarity/analyze_atlas_accuracy.py

+20-15
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,17 @@
77
import numpy as np
88
import pandas as pd
99
import yaml
10+
from tqdm import tqdm
1011

11-
sys.path.append("..")
12+
from dance.settings import DANCEDIR, SIMILARITYDIR
13+
14+
sys.path.append(str(DANCEDIR))
1215
import ast
1316

1417
from get_result_web import get_sweep_url, spilt_web
1518

1619
from dance import logger
20+
from dance.settings import entity, project
1721
from dance.utils import try_import
1822

1923
file_root = str(Path(__file__).resolve().parent.parent)
@@ -70,8 +74,6 @@ def is_match(config_str):
7074

7175

7276
wandb = try_import("wandb")
73-
entity = "xzy11632"
74-
project = "dance-dev"
7577

7678

7779
def is_matching_dict(yaml_str, target_dict):
@@ -156,18 +158,20 @@ def get_ans_from_cache(query_dataset, method):
156158
# Get best method from step2 of atlas datasets
157159
# Search accuracy according to best method (all values should exist)
158160
ans = pd.DataFrame(index=[method], columns=[f"{atlas_dataset}_from_cache" for atlas_dataset in atlas_datasets])
159-
160-
sweep_url = re.search(r"step2:([^|]+)",
161-
conf_data[conf_data["dataset_id"] == query_dataset][method].iloc[0]).group(1)
161+
step_str = conf_data[conf_data["dataset_id"] == query_dataset][method].iloc[0]
162+
if pd.isna(step_str):
163+
logger.warning(f"{query_dataset} is nan in {method}")
164+
return ans
165+
sweep_url = re.search(r"step2:([^|]+)", step_str).group(1)
162166
_, _, sweep_id = spilt_web(sweep_url)
163167
sweep = wandb.Api().sweep(f"{entity}/{project}/{sweep_id}")
164-
165-
for atlas_dataset in atlas_datasets:
166-
best_yaml = conf_data[conf_data["dataset_id"] == atlas_dataset][f"{method}_best_yaml"].iloc[0]
168+
runs = sweep.runs
169+
for atlas_dataset in tqdm(atlas_datasets):
170+
best_yaml = conf_data[conf_data["dataset_id"] == atlas_dataset][f"{method}_step2_best_yaml"].iloc[0]
167171
match_run = None
168172

169173
# Find matching run configuration
170-
for run in sweep.runs:
174+
for run in tqdm(runs, leave=False):
171175
if isinstance(best_yaml, float) and np.isnan(best_yaml):
172176
continue
173177
if is_matching_dict(best_yaml, run.config):
@@ -188,7 +192,7 @@ def get_ans_from_cache(query_dataset, method):
188192
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
189193
parser.add_argument("--methods", default=["cta_actinn", "cta_celltypist", "cta_scdeepsort", "cta_singlecellnet"],
190194
nargs="+")
191-
parser.add_argument("--tissue", type=str, default="blood")
195+
parser.add_argument("--tissue", type=str, default="pancreas")
192196
args = parser.parse_args()
193197
methods = args.methods
194198
tissue = args.tissue
@@ -208,7 +212,7 @@ def get_ans_from_cache(query_dataset, method):
208212
# "738942eb-ac72-44ff-a64b-8943b5ecd8d9", "a5d95a42-0137-496f-8a60-101e17f263c8",
209213
# "71be997d-ff75-41b9-8a9f-1288c865f921"
210214
# ]
211-
conf_data = pd.read_excel("Cell Type Annotation Atlas.xlsx", sheet_name=tissue)
215+
conf_data = pd.read_excel(SIMILARITYDIR / "data/Cell Type Annotation Atlas.xlsx", sheet_name=tissue)
212216
# conf_data = pd.read_csv(f"results/{tissue}_result.csv", index_col=0)
213217
atlas_datasets = list(conf_data[conf_data["queryed"] == False]["dataset_id"])
214218
query_datasets = list(conf_data[conf_data["queryed"] == True]["dataset_id"])
@@ -219,8 +223,9 @@ def get_ans_from_cache(query_dataset, method):
219223
ans.append(get_ans_from_cache(query_dataset, method))
220224
ans = pd.concat(ans)
221225
ans_all[query_dataset] = ans
222-
for k, v in ans_all.items():
223-
file_path = f"in_atlas_datas/{tissue}/{str(methods)}_{k}_in_atlas.csv"
226+
print(query_dataset)
227+
# for k, v in ans_all.items():
228+
file_path = SIMILARITYDIR / f"data/in_atlas_datas/{tissue}/{str(methods)}_{query_dataset}_in_atlas.csv"
224229
folder_path = Path(file_path).parent
225230
folder_path.mkdir(parents=True, exist_ok=True)
226-
v.to_csv(file_path)
231+
ans.to_csv(file_path)

0 commit comments

Comments
 (0)