|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 410, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 0.4036892056465149, |
|
"learning_rate": 0.00019999478113897612, |
|
"loss": 1.0282, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 0.3629762828350067, |
|
"learning_rate": 0.0001999791251006346, |
|
"loss": 0.7875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 0.4877622425556183, |
|
"learning_rate": 0.0001999530335191093, |
|
"loss": 0.5942, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 0.4466260075569153, |
|
"learning_rate": 0.00019991650911776695, |
|
"loss": 0.3866, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 0.649118959903717, |
|
"learning_rate": 0.000199869555708923, |
|
"loss": 0.3928, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 0.8762800097465515, |
|
"learning_rate": 0.0001998121781934438, |
|
"loss": 0.3258, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34146341463414637, |
|
"grad_norm": 0.9195622801780701, |
|
"learning_rate": 0.0001997443825602349, |
|
"loss": 0.2885, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 0.5856262445449829, |
|
"learning_rate": 0.00019966617588561609, |
|
"loss": 0.2888, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.43902439024390244, |
|
"grad_norm": 0.5520443320274353, |
|
"learning_rate": 0.00019957756633258265, |
|
"loss": 0.2242, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 0.9435800909996033, |
|
"learning_rate": 0.00019947856314995349, |
|
"loss": 0.1629, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5365853658536586, |
|
"grad_norm": 0.9416623115539551, |
|
"learning_rate": 0.00019936917667140555, |
|
"loss": 0.1555, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 0.802065372467041, |
|
"learning_rate": 0.0001992494183143955, |
|
"loss": 0.1339, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6341463414634146, |
|
"grad_norm": 0.7007794380187988, |
|
"learning_rate": 0.00019911930057896774, |
|
"loss": 0.1191, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6829268292682927, |
|
"grad_norm": 0.6755990386009216, |
|
"learning_rate": 0.00019897883704644983, |
|
"loss": 0.1571, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 1.6951078176498413, |
|
"learning_rate": 0.00019882804237803488, |
|
"loss": 0.1309, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 0.567158579826355, |
|
"learning_rate": 0.0001986669323132512, |
|
"loss": 0.0766, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8292682926829268, |
|
"grad_norm": 0.8820038437843323, |
|
"learning_rate": 0.0001984955236683196, |
|
"loss": 0.0839, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 0.6520794034004211, |
|
"learning_rate": 0.00019831383433439797, |
|
"loss": 0.0863, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.926829268292683, |
|
"grad_norm": 0.45519864559173584, |
|
"learning_rate": 0.00019812188327571399, |
|
"loss": 0.0889, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.614235520362854, |
|
"learning_rate": 0.00019791969052758562, |
|
"loss": 0.0725, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.024390243902439, |
|
"grad_norm": 0.2764686644077301, |
|
"learning_rate": 0.00019770727719432994, |
|
"loss": 0.0407, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0731707317073171, |
|
"grad_norm": 0.6082726716995239, |
|
"learning_rate": 0.00019748466544706022, |
|
"loss": 0.044, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1219512195121952, |
|
"grad_norm": 0.9295619130134583, |
|
"learning_rate": 0.00019725187852137195, |
|
"loss": 0.0675, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.170731707317073, |
|
"grad_norm": 0.3758924603462219, |
|
"learning_rate": 0.00019700894071491732, |
|
"loss": 0.0439, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 0.46514585614204407, |
|
"learning_rate": 0.00019675587738486936, |
|
"loss": 0.0398, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2682926829268293, |
|
"grad_norm": 0.5870018005371094, |
|
"learning_rate": 0.0001964927149452751, |
|
"loss": 0.0406, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3170731707317074, |
|
"grad_norm": 0.30292996764183044, |
|
"learning_rate": 0.00019621948086429844, |
|
"loss": 0.028, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.3658536585365852, |
|
"grad_norm": 0.47037121653556824, |
|
"learning_rate": 0.00019593620366135337, |
|
"loss": 0.0239, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4146341463414633, |
|
"grad_norm": 0.4176475405693054, |
|
"learning_rate": 0.00019564291290412688, |
|
"loss": 0.0281, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 0.3179157078266144, |
|
"learning_rate": 0.00019533963920549306, |
|
"loss": 0.0281, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5121951219512195, |
|
"grad_norm": 0.5817562937736511, |
|
"learning_rate": 0.00019502641422031763, |
|
"loss": 0.0296, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.5609756097560976, |
|
"grad_norm": 0.7409655451774597, |
|
"learning_rate": 0.00019470327064215383, |
|
"loss": 0.029, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.6097560975609757, |
|
"grad_norm": 0.4418310225009918, |
|
"learning_rate": 0.00019437024219983028, |
|
"loss": 0.0583, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.6585365853658538, |
|
"grad_norm": 0.31637728214263916, |
|
"learning_rate": 0.0001940273636539301, |
|
"loss": 0.0354, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 0.22175493836402893, |
|
"learning_rate": 0.00019367467079316279, |
|
"loss": 0.0514, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7560975609756098, |
|
"grad_norm": 0.6636152267456055, |
|
"learning_rate": 0.00019331220043062894, |
|
"loss": 0.034, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.8048780487804879, |
|
"grad_norm": 0.8424332141876221, |
|
"learning_rate": 0.00019293999039997746, |
|
"loss": 0.0299, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.8536585365853657, |
|
"grad_norm": 0.6435155272483826, |
|
"learning_rate": 0.00019255807955145677, |
|
"loss": 0.0508, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.9024390243902438, |
|
"grad_norm": 0.7734220027923584, |
|
"learning_rate": 0.00019216650774785972, |
|
"loss": 0.035, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 0.2854250967502594, |
|
"learning_rate": 0.0001917653158603628, |
|
"loss": 0.0339, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.6165639758110046, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 0.0323, |
|
"step": 410 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3075, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.64317874233344e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|