|
{ |
|
"best_metric": 1.147840142250061, |
|
"best_model_checkpoint": "./mistral/10-04-24-Weni-WeniGPT-Agents-Mistral-1.0.0-SFT_Experiment with SFT and a new tokenizer configuration for chat template of mistral-2_max_steps-312_batch_4_2024-04-10_ppid_9/checkpoint-100", |
|
"epoch": 2.8708133971291865, |
|
"eval_steps": 50, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.6767070889472961, |
|
"learning_rate": 0.0001993503206718859, |
|
"loss": 1.421, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.7594590783119202, |
|
"learning_rate": 0.00019487887022684336, |
|
"loss": 1.1094, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.1684353351593018, |
|
"eval_runtime": 15.9813, |
|
"eval_samples_per_second": 2.878, |
|
"eval_steps_per_second": 1.439, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.8954619765281677, |
|
"learning_rate": 0.00018634217048966637, |
|
"loss": 1.0392, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.6521860361099243, |
|
"learning_rate": 0.0001741059911251997, |
|
"loss": 0.9187, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.869737446308136, |
|
"learning_rate": 0.0001586946124013354, |
|
"loss": 0.879, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.147840142250061, |
|
"eval_runtime": 15.9788, |
|
"eval_samples_per_second": 2.879, |
|
"eval_steps_per_second": 1.439, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.7937932014465332, |
|
"learning_rate": 0.00014076836149416887, |
|
"loss": 0.6586, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 1.0015738010406494, |
|
"learning_rate": 0.00012109531962807332, |
|
"loss": 0.6885, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 1.1584892272949219, |
|
"eval_runtime": 15.9777, |
|
"eval_samples_per_second": 2.879, |
|
"eval_steps_per_second": 1.44, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.8210965991020203, |
|
"learning_rate": 0.00010051841230721065, |
|
"loss": 0.6127, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.9020585417747498, |
|
"learning_rate": 7.991929271442817e-05, |
|
"loss": 0.4806, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 1.1187869310379028, |
|
"learning_rate": 6.018056575578075e-05, |
|
"loss": 0.5073, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 1.1682521104812622, |
|
"eval_runtime": 15.98, |
|
"eval_samples_per_second": 2.879, |
|
"eval_steps_per_second": 1.439, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 1.101958990097046, |
|
"learning_rate": 4.2147971326825966e-05, |
|
"loss": 0.4016, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 1.0914422273635864, |
|
"learning_rate": 2.659414712405398e-05, |
|
"loss": 0.3815, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 1.2108865976333618, |
|
"eval_runtime": 15.9816, |
|
"eval_samples_per_second": 2.878, |
|
"eval_steps_per_second": 1.439, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 1.116877555847168, |
|
"learning_rate": 1.4185523646469822e-05, |
|
"loss": 0.3674, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 1.0452585220336914, |
|
"learning_rate": 5.453769828241872e-06, |
|
"loss": 0.3678, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.9877675771713257, |
|
"learning_rate": 7.730127636723539e-07, |
|
"loss": 0.2894, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 1.207001805305481, |
|
"eval_runtime": 15.9786, |
|
"eval_samples_per_second": 2.879, |
|
"eval_steps_per_second": 1.439, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.5078115136355533e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|