We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents 068fc48 + 2ba4ae8 commit 06652c1Copy full SHA for 06652c1
examples/openllama-3b/config.yml
@@ -26,17 +26,18 @@ wandb_watch:
26
wandb_run_id:
27
wandb_log_model:
28
output_dir: ./openllama-out
29
-batch_size: 16
30
-micro_batch_size: 4
+gradient_accumulation_steps: 1
+micro_batch_size: 1
31
num_epochs: 3
32
optimizer: adamw_bnb_8bit
33
torchdistx_path:
34
lr_scheduler: cosine
35
-learning_rate: 0.0002
+learning_rate: 0.00001
36
train_on_inputs: false
37
group_by_length: false
38
+float16: true
39
bf16: false
-fp16: true
40
+fp16: false
41
tf32: false
42
gradient_checkpointing: true
43
early_stopping_patience:
@@ -52,7 +53,7 @@ eval_steps: 50
52
53
save_steps:
54
debug:
55
deepspeed:
-weight_decay: 0.0
56
+weight_decay: 0.1
57
fsdp:
58
fsdp_config:
59
special_tokens:
0 commit comments