diff --git a/pretraining/roberta/pretraining.py b/pretraining/roberta/pretraining.py index a75b5184..6871f5c0 100644 --- a/pretraining/roberta/pretraining.py +++ b/pretraining/roberta/pretraining.py @@ -101,6 +101,7 @@ def mlm_acc(inputs): 'lr_schedule': lr_schedule, 'weight_decay_rate': weight_decay_rate, 'exclude_from_weight_decay': exclude_from_weight_decay, + 'bias_correction': False, } if grad_accum_steps > 1: OPT = extend_with_gradient_accumulation(OPT)