|
| 1 | +import torch |
| 2 | +from step3_config import get_optimizer, get_transforms |
| 3 | + |
| 4 | +from dance import logger |
| 5 | +from dance.datasets.singlemodality import ImputationDataset |
| 6 | +from dance.modules.single_modality.imputation.deepimpute import DeepImpute |
| 7 | +from dance.registry import DotDict |
| 8 | +from dance.transforms.misc import Compose, SetConfig |
| 9 | +from dance.utils import set_seed |
| 10 | + |
| 11 | +fun_list = ["filter_gene_by_count", "filter_cell_by_count", "log1p", "gene_hold_out", "cell_wise_mask_data"] |
| 12 | +device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") |
| 13 | +import numpy as np |
| 14 | + |
| 15 | + |
| 16 | +def objective(trial): |
| 17 | + parameters_dict = { |
| 18 | + 'dropout': 0.1, |
| 19 | + 'lr': 1e-5, |
| 20 | + 'n_epochs': 5, |
| 21 | + 'batch_size': 64, |
| 22 | + 'sub_outputdim': 512, |
| 23 | + 'hidden_dim': 256, |
| 24 | + 'patience': 20, |
| 25 | + 'min_cells': 0.05, |
| 26 | + "n_top": 5, |
| 27 | + "train_size": 0.9, |
| 28 | + "mask_rate": 0.1, |
| 29 | + "cache": False, |
| 30 | + "mask": True, #避免出现与超参数流程重复的情况,一般没有 |
| 31 | + "seed": 0, |
| 32 | + "num_runs": 1 |
| 33 | + } |
| 34 | + parameters_config = {} |
| 35 | + parameters_config.update(parameters_dict) |
| 36 | + parameters_config = DotDict(parameters_config) |
| 37 | + rmses = [] |
| 38 | + for seed in range(parameters_config.seed, parameters_config.seed + parameters_config.num_runs): |
| 39 | + set_seed(seed) |
| 40 | + dataset = "mouse_brain_data" |
| 41 | + data_dir = "./test_automl/data" |
| 42 | + dataloader = ImputationDataset(data_dir=data_dir, dataset=dataset, train_size=parameters_config.train_size) |
| 43 | + # preprocessing_pipeline = DeepImpute.preprocessing_pipeline(min_cells=parameters_config.min_cells, n_top=parameters_config.n_top, |
| 44 | + # sub_outputdim=parameters_config.sub_outputdim, mask=parameters_config.mask, |
| 45 | + # seed=seed, mask_rate=parameters_config.mask_rate) |
| 46 | + transforms = get_transforms(trial=trial, fun_list=fun_list, set_data_config=False, save_raw=True) |
| 47 | + if transforms is None: |
| 48 | + logger.warning("skip transforms") |
| 49 | + return {"scores": 0} |
| 50 | + transforms.append( |
| 51 | + SetConfig({ |
| 52 | + "feature_channel": [None, None, "targets", "predictors", "train_mask"], |
| 53 | + "feature_channel_type": ["X", "raw_X", "uns", "uns", "layers"], |
| 54 | + "label_channel": [None, None], |
| 55 | + "label_channel_type": ["X", "raw_X"], |
| 56 | + })) |
| 57 | + preprocessing_pipeline = Compose(*transforms, log_level="INFO") |
| 58 | + data = dataloader.load_data(transform=preprocessing_pipeline, cache=parameters_config.cache) |
| 59 | + |
| 60 | + if parameters_config.mask: |
| 61 | + X, X_raw, targets, predictors, mask = data.get_x(return_type="default") |
| 62 | + else: |
| 63 | + mask = None |
| 64 | + X, X_raw, targets, predictors = data.get_x(return_type="default") |
| 65 | + X = torch.tensor(X.toarray()).float() |
| 66 | + X_raw = torch.tensor(X_raw.toarray()).float() |
| 67 | + X_train = X * mask |
| 68 | + model = DeepImpute(predictors, targets, dataset, parameters_config.sub_outputdim, parameters_config.hidden_dim, |
| 69 | + parameters_config.dropout, seed, 1) |
| 70 | + |
| 71 | + model.fit(X_train, X_train, mask, parameters_config.batch_size, parameters_config.lr, |
| 72 | + parameters_config.n_epochs, parameters_config.patience) |
| 73 | + imputed_data = model.predict(X_train, mask) |
| 74 | + score = model.score(X, imputed_data, mask, metric='RMSE') |
| 75 | + print("RMSE: %.4f" % score) |
| 76 | + rmses.append(score) |
| 77 | + |
| 78 | + print('deepimpute') |
| 79 | + print(f'rmses: {rmses}') |
| 80 | + print(f'rmses: {np.mean(rmses)} +/- {np.std(rmses)}') |
| 81 | + return ({"scores": np.mean(rmses)}) |
| 82 | + |
| 83 | + |
| 84 | +if __name__ == "__main__": |
| 85 | + start_optimizer = get_optimizer(project="step3-imputation-deepimpute-project", objective=objective, n_trials=10, |
| 86 | + direction="minimize") |
| 87 | + start_optimizer() |
0 commit comments