|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 612, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.032679738562091505, |
|
"grad_norm": 5.336216449737549, |
|
"learning_rate": 3.2520325203252037e-06, |
|
"loss": 1.4653, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06535947712418301, |
|
"grad_norm": 4.05853796005249, |
|
"learning_rate": 6.504065040650407e-06, |
|
"loss": 1.2502, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09803921568627451, |
|
"grad_norm": 4.095743656158447, |
|
"learning_rate": 9.756097560975611e-06, |
|
"loss": 1.1818, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13071895424836602, |
|
"grad_norm": 4.049984931945801, |
|
"learning_rate": 1.3008130081300815e-05, |
|
"loss": 1.1616, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16339869281045752, |
|
"grad_norm": 4.275440216064453, |
|
"learning_rate": 1.6260162601626018e-05, |
|
"loss": 1.1681, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19607843137254902, |
|
"grad_norm": 3.7174243927001953, |
|
"learning_rate": 1.9512195121951222e-05, |
|
"loss": 1.1263, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.22875816993464052, |
|
"grad_norm": 3.68149471282959, |
|
"learning_rate": 1.9940417581113062e-05, |
|
"loss": 1.1764, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26143790849673204, |
|
"grad_norm": 3.664177656173706, |
|
"learning_rate": 1.9718803741191918e-05, |
|
"loss": 1.1296, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.29411764705882354, |
|
"grad_norm": 3.219787120819092, |
|
"learning_rate": 1.9336954955188042e-05, |
|
"loss": 1.1026, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.32679738562091504, |
|
"grad_norm": 3.5025317668914795, |
|
"learning_rate": 1.880116680445757e-05, |
|
"loss": 1.1542, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35947712418300654, |
|
"grad_norm": 3.2319493293762207, |
|
"learning_rate": 1.812027288495843e-05, |
|
"loss": 1.1492, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.39215686274509803, |
|
"grad_norm": 2.933061122894287, |
|
"learning_rate": 1.730549916681868e-05, |
|
"loss": 1.1107, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.42483660130718953, |
|
"grad_norm": 3.0482289791107178, |
|
"learning_rate": 1.6370278910578644e-05, |
|
"loss": 1.1077, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.45751633986928103, |
|
"grad_norm": 3.3377017974853516, |
|
"learning_rate": 1.5330031191602395e-05, |
|
"loss": 1.1246, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.49019607843137253, |
|
"grad_norm": 3.618112564086914, |
|
"learning_rate": 1.420190668415002e-05, |
|
"loss": 1.101, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5228758169934641, |
|
"grad_norm": 3.0649290084838867, |
|
"learning_rate": 1.3004504896395564e-05, |
|
"loss": 1.0556, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 3.0544955730438232, |
|
"learning_rate": 1.1757567518366883e-05, |
|
"loss": 1.0287, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 2.709897994995117, |
|
"learning_rate": 1.0481652938612374e-05, |
|
"loss": 1.0145, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6209150326797386, |
|
"grad_norm": 2.6121253967285156, |
|
"learning_rate": 9.197797295872709e-06, |
|
"loss": 1.0179, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6535947712418301, |
|
"grad_norm": 3.049081563949585, |
|
"learning_rate": 7.927167654034622e-06, |
|
"loss": 0.9818, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6862745098039216, |
|
"grad_norm": 3.008591413497925, |
|
"learning_rate": 6.690713018507917e-06, |
|
"loss": 0.9431, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7189542483660131, |
|
"grad_norm": 3.0657355785369873, |
|
"learning_rate": 5.508818947755687e-06, |
|
"loss": 0.9978, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7516339869281046, |
|
"grad_norm": 2.533656597137451, |
|
"learning_rate": 4.4009714544339755e-06, |
|
"loss": 0.9494, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7843137254901961, |
|
"grad_norm": 2.7470057010650635, |
|
"learning_rate": 3.3854357374383905e-06, |
|
"loss": 0.962, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8169934640522876, |
|
"grad_norm": 2.5096466541290283, |
|
"learning_rate": 2.478955041636435e-06, |
|
"loss": 0.9592, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8496732026143791, |
|
"grad_norm": 2.438676357269287, |
|
"learning_rate": 1.6964746102169582e-06, |
|
"loss": 0.9327, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 2.47017240524292, |
|
"learning_rate": 1.0508952808836682e-06, |
|
"loss": 0.9249, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9150326797385621, |
|
"grad_norm": 2.9075846672058105, |
|
"learning_rate": 5.528607883782599e-07, |
|
"loss": 0.9466, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9477124183006536, |
|
"grad_norm": 2.7440707683563232, |
|
"learning_rate": 2.1058228009902094e-07, |
|
"loss": 0.9481, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9803921568627451, |
|
"grad_norm": 2.9985830783843994, |
|
"learning_rate": 2.9702938044468e-08, |
|
"loss": 0.951, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 612, |
|
"total_flos": 16017543659520.0, |
|
"train_loss": 1.0669627922033174, |
|
"train_runtime": 18486.1158, |
|
"train_samples_per_second": 0.132, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 612, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 16017543659520.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|