training_args = TrainingArguments( | |
output_dir='t5-small-newsela-biendata-with-domain-adaptation', | |
num_train_epochs=20, | |
warmup_steps=250, | |
per_device_train_batch_size=BATCH_SIZE, | |
weight_decay=0.01, | |
learning_rate=2e-4, | |
fp16=True, | |
optim="adafactor", | |
) | |
Step Training Loss | |
500 35.466600 | |
1000 25.795400 | |
1500 10.923200 | |
2000 4.515500 | |
TrainOutput(global_step=2320, training_loss=16.92537920721646, metrics={'train_runtime': 628.0033, 'train_samples_per_second': 472.418, 'train_steps_per_second': 3.694, 'total_flos': 0.0, 'train_loss': 16.92537920721646, 'epoch': 20.0}) |