Skip to content

Commit c86cfbb

Browse files
committed
update step3 imputation
1 parent 6d41415 commit c86cfbb

File tree

3 files changed

+99
-5
lines changed

3 files changed

+99
-5
lines changed

test_automl/step2_imputation_deepimpute.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def train(config):
5858
print('deepimpute')
5959
print(f'rmses: {rmses}')
6060
print(f'rmses: {np.mean(rmses)} +/- {np.std(rmses)}')
61-
return ({"rmses": np.mean(rmses)})
61+
return ({"scores": np.mean(rmses)})
6262

6363

6464
def startSweep(parameters_dict) -> Tuple[Dict[str, Any], Callable[..., Any]]:
@@ -111,7 +111,7 @@ def startSweep(parameters_dict) -> Tuple[Dict[str, Any], Callable[..., Any]]:
111111
})
112112
sweep_config = {'method': 'grid'}
113113
sweep_config['parameters'] = parameters_dict
114-
metric = {'name': 'rmses', 'goal': 'minimize'}
114+
metric = {'name': 'scores', 'goal': 'minimize'}
115115

116116
sweep_config['metric'] = metric
117117
return sweep_config, train #Return function configuration and training function

test_automl/step3_config.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33

44
import optuna
55
import scanpy as sc
6-
import wandb
76
from fun2code import fun2code_dict
87
from optuna.integration.wandb import WeightsAndBiasesCallback
98
from step2_config import pipline2fun_dict
109

10+
import wandb
1111
from dance.transforms.cell_feature import CellPCA, CellSVD, WeightedFeaturePCA
1212
from dance.transforms.filter import FilterGenesPercentile, FilterGenesRegression
13+
from dance.transforms.gene_holdout import GeneHoldout
1314
from dance.transforms.interface import AnnDataTransform
1415
from dance.transforms.mask import CellwiseMaskData, MaskData
1516
from dance.transforms.misc import Compose, SetConfig
@@ -144,6 +145,12 @@ def mask_data(method_name: str, trial: optuna.Trial):
144145
return MaskData(mask_rate=trial.suggest_float(method_name + "mask_rate", 0.01, 0.5))
145146

146147

148+
@set_method_name
149+
def gene_hold_out(method_name: str, trial: optuna.Trial):
150+
return GeneHoldout(n_top=trial.suggest_int(method_name + "n_top", 1, 10),
151+
batch_size=trial.suggest_categorical(method_name + "batch_size", [256, 512, 1024]))
152+
153+
147154
# # 获取当前文件中的所有函数
148155
# functions = [(name,obj) for name, obj in inspect.getmembers(
149156
# sys.modules[__name__]) if inspect.isfunction(obj)]
@@ -197,13 +204,13 @@ def wrapper(*args, **kwargs):
197204
return decorator
198205

199206

200-
def get_optimizer(project, objective, n_trials=2):
207+
def get_optimizer(project, objective, n_trials=2, direction="maximize"):
201208
"""Get optimizer."""
202209
wandb_kwargs = {"project": project}
203210
wandbc = WeightsAndBiasesCallback(wandb_kwargs=wandb_kwargs, as_multirun=True)
204211
decorator = log_in_wandb(wandbc)
205212
decorator_function = decorator(objective)
206-
study = optuna.create_study()
213+
study = optuna.create_study(direction=direction)
207214

208215
def wrapper():
209216
study.optimize(decorator_function, n_trials=n_trials, callbacks=[wandbc])
+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import torch
2+
from step3_config import get_optimizer, get_transforms
3+
4+
from dance import logger
5+
from dance.datasets.singlemodality import ImputationDataset
6+
from dance.modules.single_modality.imputation.deepimpute import DeepImpute
7+
from dance.registry import DotDict
8+
from dance.transforms.misc import Compose, SetConfig
9+
from dance.utils import set_seed
10+
11+
fun_list = ["filter_gene_by_count", "filter_cell_by_count", "log1p", "gene_hold_out", "cell_wise_mask_data"]
12+
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
13+
import numpy as np
14+
15+
16+
def objective(trial):
17+
parameters_dict = {
18+
'dropout': 0.1,
19+
'lr': 1e-5,
20+
'n_epochs': 5,
21+
'batch_size': 64,
22+
'sub_outputdim': 512,
23+
'hidden_dim': 256,
24+
'patience': 20,
25+
'min_cells': 0.05,
26+
"n_top": 5,
27+
"train_size": 0.9,
28+
"mask_rate": 0.1,
29+
"cache": False,
30+
"mask": True, #避免出现与超参数流程重复的情况,一般没有
31+
"seed": 0,
32+
"num_runs": 1
33+
}
34+
parameters_config = {}
35+
parameters_config.update(parameters_dict)
36+
parameters_config = DotDict(parameters_config)
37+
rmses = []
38+
for seed in range(parameters_config.seed, parameters_config.seed + parameters_config.num_runs):
39+
set_seed(seed)
40+
dataset = "mouse_brain_data"
41+
data_dir = "./test_automl/data"
42+
dataloader = ImputationDataset(data_dir=data_dir, dataset=dataset, train_size=parameters_config.train_size)
43+
# preprocessing_pipeline = DeepImpute.preprocessing_pipeline(min_cells=parameters_config.min_cells, n_top=parameters_config.n_top,
44+
# sub_outputdim=parameters_config.sub_outputdim, mask=parameters_config.mask,
45+
# seed=seed, mask_rate=parameters_config.mask_rate)
46+
transforms = get_transforms(trial=trial, fun_list=fun_list, set_data_config=False, save_raw=True)
47+
if transforms is None:
48+
logger.warning("skip transforms")
49+
return {"scores": 0}
50+
transforms.append(
51+
SetConfig({
52+
"feature_channel": [None, None, "targets", "predictors", "train_mask"],
53+
"feature_channel_type": ["X", "raw_X", "uns", "uns", "layers"],
54+
"label_channel": [None, None],
55+
"label_channel_type": ["X", "raw_X"],
56+
}))
57+
preprocessing_pipeline = Compose(*transforms, log_level="INFO")
58+
data = dataloader.load_data(transform=preprocessing_pipeline, cache=parameters_config.cache)
59+
60+
if parameters_config.mask:
61+
X, X_raw, targets, predictors, mask = data.get_x(return_type="default")
62+
else:
63+
mask = None
64+
X, X_raw, targets, predictors = data.get_x(return_type="default")
65+
X = torch.tensor(X.toarray()).float()
66+
X_raw = torch.tensor(X_raw.toarray()).float()
67+
X_train = X * mask
68+
model = DeepImpute(predictors, targets, dataset, parameters_config.sub_outputdim, parameters_config.hidden_dim,
69+
parameters_config.dropout, seed, 1)
70+
71+
model.fit(X_train, X_train, mask, parameters_config.batch_size, parameters_config.lr,
72+
parameters_config.n_epochs, parameters_config.patience)
73+
imputed_data = model.predict(X_train, mask)
74+
score = model.score(X, imputed_data, mask, metric='RMSE')
75+
print("RMSE: %.4f" % score)
76+
rmses.append(score)
77+
78+
print('deepimpute')
79+
print(f'rmses: {rmses}')
80+
print(f'rmses: {np.mean(rmses)} +/- {np.std(rmses)}')
81+
return ({"scores": np.mean(rmses)})
82+
83+
84+
if __name__ == "__main__":
85+
start_optimizer = get_optimizer(project="step3-imputation-deepimpute-project", objective=objective, n_trials=10,
86+
direction="minimize")
87+
start_optimizer()

0 commit comments

Comments
 (0)