-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdvc.yaml
100 lines (95 loc) · 2.95 KB
/
dvc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
stages:
data_cleaning:
cmd: python src/data_preprocessing.py
deps:
- src/data_preprocessing.py
- data/raw/Dev_data_to_be_shared.csv
- data/raw/validation_data_to_be_shared.csv
params:
- experiment.EXPERIMENT_NAME
- data_cleaning.ENCODING_TECHNIQUE
- data_cleaning.DEV_DATA_PATH
- data_cleaning.VAL_DATA_PATH
- data_cleaning.NULL_PERCENTAGE
- data_cleaning.VARIANCE_THRESHOLD
- data_cleaning.IMPUTATION_TECHNIQUE
- data_cleaning.KNN_IMPUTER_N_NEIGHBORS
- data_cleaning.TEST_SIZE
- data_cleaning.RANDOM_STATE
outs:
- data/interim/train.csv
- data/interim/test.csv
- data/interim/validation.csv
feature_engineering:
cmd: python src/feature_engineering.py
deps:
- src/feature_engineering.py
- data/interim/train.csv
- data/interim/test.csv
- data/interim/validation.csv
params:
- experiment.EXPERIMENT_NAME
- feature_engineering.FEATURE_SELECTION_TECHNIQUE
- feature_engineering.ANOVA_K
- feature_engineering.SFS_FORWARD
- feature_engineering.N_ESTIMATORS
- feature_engineering.MAX_DEPTH
- feature_engineering.MIN_SAMPLES_SPLIT
- feature_engineering.MIN_SAMPLES_LEAF
- feature_engineering.MAX_FEATURES
- feature_engineering.RANDOM_STATE
- feature_engineering.NO_FEATURES
- feature_engineering.FEATURE_IMPORTANCE_TECHNIQUE
outs:
- data/processed/train.csv
- data/processed/test.csv
- data/processed/val.csv
model_training:
cmd: python src/Model_Building.py
deps:
- src/Model_Building.py
- data/processed/train.csv
- data/processed/test.csv
- data/processed/val.csv
params:
- experiment.EXPERIMENT_NAME
- model_training.MODEL
- model_training.MAX_DEPTH
- model_training.OBJECTIVE
- model_training.EVAL_METRIC
- model_training.ETA
- model_training.SUB_SAMPLE
- model_training.COL_SAMPLE_BY_TREE
- model_training.MIN_CHILD_WEIGHT
- model_training.GAMMA_XGB
- model_training.LAMBDA
- model_training.ALPHA
- model_training.SCALE_POS_WEIGHT
- model_training.N_ESTIMATORS
- model_training.TREE_METHOD
- model_training.DEVICE
- model_training.SEED
- model_training.N_TRIALS
- model_training.TRIALS
- model_training.NO_LAYERS
- model_training.HIDDEN_DIMS
- model_training.DROPOUT_RATE
- model_training.LEARNING_RATE
- model_training.WEIGHT_DECAY
- model_training.BATCH_SIZE
- model_training.N_EPOCHS
- model_training.OPTIMIZER
prediction:
cmd: python src/prediction.py
deps:
- src/prediction.py
- data/processed/val.csv
params:
- experiment.EXPERIMENT_NAME
- prediction.DATA
- prediction.MODEL
- prediction.SUBMISSION_FILE_PATH
- prediction.XGB_MODEL_PATH
- prediction.MLP_MODEL_PATH
outs:
- prediction.csv