training_args = TrainingArguments( | |
output_dir='bart-base-newsela-biendata-with-domain-adaptation', | |
num_train_epochs=20, | |
warmup_steps=250, | |
per_device_train_batch_size=BATCH_SIZE, | |
weight_decay=0.01, | |
learning_rate=2e-4, | |
fp16=True, | |
optim="adafactor", | |
) | |
Step Training Loss | |
500 5.677000 | |
1000 2.361900 | |
1500 1.826000 | |
2000 1.672900 | |
2500 1.597900 | |
3000 1.555700 | |
3500 1.520600 | |
4000 1.496300 | |
4500 1.476800 | |
TrainOutput(global_step=4640, training_loss=2.1116079396214977, metrics={'train_runtime': 1059.6025, 'train_samples_per_second': 279.992, 'train_steps_per_second': 4.379, 'total_flos': 0.0, 'train_loss': 2.1116079396214977, 'epoch': 20.0}) |