|
{ |
|
"best_metric": 1.1614402532577515, |
|
"best_model_checkpoint": "./mistral/22-04-24-Weni-WeniGPT-Agents-Mistral-1.0.12-SFT_Experiment with SFT and a new tokenizer configuration for chat template of mistral-2_max_steps-330_batch_8_2024-04-22_ppid_9/checkpoint-90", |
|
"epoch": 3.2142857142857144, |
|
"eval_steps": 30, |
|
"global_step": 180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"grad_norm": 0.7089748978614807, |
|
"learning_rate": 0.00019999521087449523, |
|
"loss": 1.366, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 0.47184231877326965, |
|
"learning_rate": 0.00019942107065112286, |
|
"loss": 1.0042, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"grad_norm": 0.4505631625652313, |
|
"learning_rate": 0.0001978954027238763, |
|
"loss": 0.8828, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"eval_loss": 1.1399036645889282, |
|
"eval_runtime": 38.6638, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.31, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.4148388206958771, |
|
"learning_rate": 0.00019543280877920072, |
|
"loss": 0.806, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 0.3701043725013733, |
|
"learning_rate": 0.00019205685752689177, |
|
"loss": 0.771, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 0.39483487606048584, |
|
"learning_rate": 0.00018779985913140924, |
|
"loss": 0.6609, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"eval_loss": 1.1470385789871216, |
|
"eval_runtime": 38.654, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.31, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.40163591504096985, |
|
"learning_rate": 0.0001827025559814854, |
|
"loss": 0.4089, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.3985687792301178, |
|
"learning_rate": 0.0001768137327575751, |
|
"loss": 0.3764, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.6071428571428572, |
|
"grad_norm": 0.3154538869857788, |
|
"learning_rate": 0.00017018974952906884, |
|
"loss": 0.3918, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6071428571428572, |
|
"eval_loss": 1.1614402532577515, |
|
"eval_runtime": 38.6493, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.31, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 0.5162068009376526, |
|
"learning_rate": 0.0001628940023498477, |
|
"loss": 0.4058, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9642857142857144, |
|
"grad_norm": 0.2627800703048706, |
|
"learning_rate": 0.00015499631651465085, |
|
"loss": 0.2883, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 0.42833977937698364, |
|
"learning_rate": 0.00014657227828320635, |
|
"loss": 0.1679, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"eval_loss": 1.2334527969360352, |
|
"eval_runtime": 38.6496, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.31, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.3214285714285716, |
|
"grad_norm": 0.39060840010643005, |
|
"learning_rate": 0.000137702511467984, |
|
"loss": 0.1706, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.273832231760025, |
|
"learning_rate": 0.0001284719058091194, |
|
"loss": 0.1654, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.678571428571429, |
|
"grad_norm": 0.3711472451686859, |
|
"learning_rate": 0.00011896880452149077, |
|
"loss": 0.1361, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.678571428571429, |
|
"eval_loss": 1.2611552476882935, |
|
"eval_runtime": 38.655, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.31, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.3359326720237732, |
|
"learning_rate": 0.00010928415878967781, |
|
"loss": 0.1492, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.0357142857142856, |
|
"grad_norm": 0.3263035714626312, |
|
"learning_rate": 9.951065730286553e-05, |
|
"loss": 0.1239, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.2142857142857144, |
|
"grad_norm": 0.2739813029766083, |
|
"learning_rate": 8.974183916063968e-05, |
|
"loss": 0.0571, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.2142857142857144, |
|
"eval_loss": 1.3478918075561523, |
|
"eval_runtime": 38.6443, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.311, |
|
"step": 180 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 330, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 90, |
|
"total_flos": 1.8702894437184307e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|