|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9957805907172996, |
|
"eval_steps": 500, |
|
"global_step": 118, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08438818565400844, |
|
"grad_norm": 6.15625, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 1.2054, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16877637130801687, |
|
"grad_norm": 2.546875, |
|
"learning_rate": 1.9720229140804108e-06, |
|
"loss": 1.1272, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.25316455696202533, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 1.8610436117673553e-06, |
|
"loss": 1.0603, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33755274261603374, |
|
"grad_norm": 1.109375, |
|
"learning_rate": 1.6749830015182104e-06, |
|
"loss": 1.0228, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4219409282700422, |
|
"grad_norm": 0.96875, |
|
"learning_rate": 1.4300652022765205e-06, |
|
"loss": 1.0089, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5063291139240507, |
|
"grad_norm": 0.9296875, |
|
"learning_rate": 1.1476465640024813e-06, |
|
"loss": 1.0109, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5907172995780591, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 8.523534359975189e-07, |
|
"loss": 1.0062, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6751054852320675, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 5.699347977234798e-07, |
|
"loss": 0.9945, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.759493670886076, |
|
"grad_norm": 0.8671875, |
|
"learning_rate": 3.2501699848178964e-07, |
|
"loss": 0.9971, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8438818565400844, |
|
"grad_norm": 0.875, |
|
"learning_rate": 1.3895638823264445e-07, |
|
"loss": 1.0152, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9282700421940928, |
|
"grad_norm": 0.8515625, |
|
"learning_rate": 2.797708591958925e-08, |
|
"loss": 0.9995, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9957805907172996, |
|
"step": 118, |
|
"total_flos": 1.3120143844945428e+18, |
|
"train_loss": 1.0387564513642908, |
|
"train_runtime": 4852.3718, |
|
"train_samples_per_second": 3.125, |
|
"train_steps_per_second": 0.024 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 118, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3120143844945428e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|