|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.996510067114094, |
|
"eval_steps": 400, |
|
"global_step": 116, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008590604026845637, |
|
"grad_norm": 86.01569524610598, |
|
"learning_rate": 4.166666666666666e-08, |
|
"logits/chosen": -2.431039571762085, |
|
"logits/rejected": -2.618009090423584, |
|
"logps/chosen": -1197.8489990234375, |
|
"logps/rejected": -7907.7099609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.042953020134228186, |
|
"grad_norm": 81.41508200934528, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.8407392501831055, |
|
"logits/rejected": -3.0651891231536865, |
|
"logps/chosen": -1897.8328857421875, |
|
"logps/rejected": -9360.8955078125, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -0.00021657101751770824, |
|
"rewards/margins": 0.003072525840252638, |
|
"rewards/rejected": -0.003289096988737583, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08590604026845637, |
|
"grad_norm": 77.79677990779399, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.6457934379577637, |
|
"logits/rejected": -2.990572690963745, |
|
"logps/chosen": -1608.490478515625, |
|
"logps/rejected": -9246.4970703125, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.021037336438894272, |
|
"rewards/margins": 0.08746902644634247, |
|
"rewards/rejected": -0.10850635915994644, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12885906040268458, |
|
"grad_norm": 34.39608246130055, |
|
"learning_rate": 4.989741394042727e-07, |
|
"logits/chosen": -2.5478570461273193, |
|
"logits/rejected": -2.9816832542419434, |
|
"logps/chosen": -1612.6597900390625, |
|
"logps/rejected": -9213.7060546875, |
|
"loss": 0.4677, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.11297205835580826, |
|
"rewards/margins": 0.6206797361373901, |
|
"rewards/rejected": -0.7336517572402954, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.17181208053691274, |
|
"grad_norm": 5.898677172155064, |
|
"learning_rate": 4.92735454356513e-07, |
|
"logits/chosen": -2.497690200805664, |
|
"logits/rejected": -3.0749311447143555, |
|
"logps/chosen": -1422.3905029296875, |
|
"logps/rejected": -9622.453125, |
|
"loss": 0.2772, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.46565741300582886, |
|
"rewards/margins": 2.972280979156494, |
|
"rewards/rejected": -3.437938690185547, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.21476510067114093, |
|
"grad_norm": 4.142364189438871, |
|
"learning_rate": 4.809698831278217e-07, |
|
"logits/chosen": -2.579451322555542, |
|
"logits/rejected": -3.227189540863037, |
|
"logps/chosen": -1883.857421875, |
|
"logps/rejected": -10082.71875, |
|
"loss": 0.255, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.9519465565681458, |
|
"rewards/margins": 5.0396575927734375, |
|
"rewards/rejected": -5.991604328155518, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.25771812080536916, |
|
"grad_norm": 7.398342740617321, |
|
"learning_rate": 4.639453180753619e-07, |
|
"logits/chosen": -2.527676820755005, |
|
"logits/rejected": -3.185889959335327, |
|
"logps/chosen": -2057.02001953125, |
|
"logps/rejected": -9853.166015625, |
|
"loss": 0.2471, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -1.8792552947998047, |
|
"rewards/margins": 8.077213287353516, |
|
"rewards/rejected": -9.956467628479004, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3006711409395973, |
|
"grad_norm": 12.059892215323622, |
|
"learning_rate": 4.420493945100701e-07, |
|
"logits/chosen": -2.484814167022705, |
|
"logits/rejected": -3.121709108352661, |
|
"logps/chosen": -2086.67919921875, |
|
"logps/rejected": -9674.890625, |
|
"loss": 0.274, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.0851800441741943, |
|
"rewards/margins": 7.848902225494385, |
|
"rewards/rejected": -9.934083938598633, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3436241610738255, |
|
"grad_norm": 28.99159189374227, |
|
"learning_rate": 4.157806645601988e-07, |
|
"logits/chosen": -2.427899122238159, |
|
"logits/rejected": -2.911158800125122, |
|
"logps/chosen": -1157.116455078125, |
|
"logps/rejected": -10012.34765625, |
|
"loss": 0.1935, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -1.1424468755722046, |
|
"rewards/margins": 10.81905460357666, |
|
"rewards/rejected": -11.961501121520996, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3865771812080537, |
|
"grad_norm": 27.796451654387887, |
|
"learning_rate": 3.857372455503697e-07, |
|
"logits/chosen": -2.5545668601989746, |
|
"logits/rejected": -2.8794655799865723, |
|
"logps/chosen": -1950.16796875, |
|
"logps/rejected": -10788.267578125, |
|
"loss": 0.1834, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -2.45682954788208, |
|
"rewards/margins": 11.35061264038086, |
|
"rewards/rejected": -13.807443618774414, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.42953020134228187, |
|
"grad_norm": 26.159018172068677, |
|
"learning_rate": 3.5260320136318924e-07, |
|
"logits/chosen": -2.4899744987487793, |
|
"logits/rejected": -2.9161746501922607, |
|
"logps/chosen": -1632.9305419921875, |
|
"logps/rejected": -10670.7177734375, |
|
"loss": 0.1654, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.4171953201293945, |
|
"rewards/margins": 11.587867736816406, |
|
"rewards/rejected": -14.0050630569458, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.47248322147651006, |
|
"grad_norm": 27.198586027054176, |
|
"learning_rate": 3.171329668685942e-07, |
|
"logits/chosen": -2.460887908935547, |
|
"logits/rejected": -2.9514319896698, |
|
"logps/chosen": -1985.7174072265625, |
|
"logps/rejected": -10099.3125, |
|
"loss": 0.1791, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.890523672103882, |
|
"rewards/margins": 9.628759384155273, |
|
"rewards/rejected": -12.519282341003418, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5154362416107383, |
|
"grad_norm": 39.49613447619216, |
|
"learning_rate": 2.801341700638307e-07, |
|
"logits/chosen": -2.5868403911590576, |
|
"logits/rejected": -3.0467333793640137, |
|
"logps/chosen": -1847.5205078125, |
|
"logps/rejected": -10758.123046875, |
|
"loss": 0.1843, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -2.3636813163757324, |
|
"rewards/margins": 10.008157730102539, |
|
"rewards/rejected": -12.37183952331543, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5583892617449664, |
|
"grad_norm": 12.8001844827942, |
|
"learning_rate": 2.424492430497778e-07, |
|
"logits/chosen": -2.506343126296997, |
|
"logits/rejected": -2.965503215789795, |
|
"logps/chosen": -2238.29443359375, |
|
"logps/rejected": -10792.2021484375, |
|
"loss": 0.2211, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.6938767433166504, |
|
"rewards/margins": 9.952713012695312, |
|
"rewards/rejected": -12.646589279174805, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6013422818791946, |
|
"grad_norm": 15.245673155295346, |
|
"learning_rate": 2.0493624054652355e-07, |
|
"logits/chosen": -2.58244252204895, |
|
"logits/rejected": -2.951399326324463, |
|
"logps/chosen": -2069.97998046875, |
|
"logps/rejected": -10962.5087890625, |
|
"loss": 0.246, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.3197269439697266, |
|
"rewards/margins": 10.182031631469727, |
|
"rewards/rejected": -12.50175952911377, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6442953020134228, |
|
"grad_norm": 24.590124308811014, |
|
"learning_rate": 1.6844930269478273e-07, |
|
"logits/chosen": -2.5273938179016113, |
|
"logits/rejected": -2.789759397506714, |
|
"logps/chosen": -2302.49169921875, |
|
"logps/rejected": -10204.7763671875, |
|
"loss": 0.2857, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -2.474081039428711, |
|
"rewards/margins": 8.876008987426758, |
|
"rewards/rejected": -11.350090980529785, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.687248322147651, |
|
"grad_norm": 5.243275488519254, |
|
"learning_rate": 1.3381920698905784e-07, |
|
"logits/chosen": -2.599067211151123, |
|
"logits/rejected": -2.9476146697998047, |
|
"logps/chosen": -2229.91162109375, |
|
"logps/rejected": -10514.13671875, |
|
"loss": 0.1797, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.218345880508423, |
|
"rewards/margins": 8.61392593383789, |
|
"rewards/rejected": -10.832271575927734, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7302013422818792, |
|
"grad_norm": 8.334219171923868, |
|
"learning_rate": 1.0183445215899584e-07, |
|
"logits/chosen": -2.6111998558044434, |
|
"logits/rejected": -2.9625191688537598, |
|
"logps/chosen": -1786.7320556640625, |
|
"logps/rejected": -10765.2060546875, |
|
"loss": 0.1725, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.8339601755142212, |
|
"rewards/margins": 8.938019752502441, |
|
"rewards/rejected": -10.771979331970215, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7731543624161074, |
|
"grad_norm": 14.219523417845217, |
|
"learning_rate": 7.322330470336313e-08, |
|
"logits/chosen": -2.2908596992492676, |
|
"logits/rejected": -2.7106270790100098, |
|
"logps/chosen": -1873.132568359375, |
|
"logps/rejected": -9457.634765625, |
|
"loss": 0.1766, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8528430461883545, |
|
"rewards/margins": 7.991453647613525, |
|
"rewards/rejected": -9.844297409057617, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8161073825503355, |
|
"grad_norm": 15.487241466447763, |
|
"learning_rate": 4.863721686226349e-08, |
|
"logits/chosen": -2.6290388107299805, |
|
"logits/rejected": -2.9791619777679443, |
|
"logps/chosen": -1920.321044921875, |
|
"logps/rejected": -10810.255859375, |
|
"loss": 0.203, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.1639926433563232, |
|
"rewards/margins": 9.248581886291504, |
|
"rewards/rejected": -11.412572860717773, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8590604026845637, |
|
"grad_norm": 10.617279968480949, |
|
"learning_rate": 2.863599358669755e-08, |
|
"logits/chosen": -2.513326644897461, |
|
"logits/rejected": -2.773226499557495, |
|
"logps/chosen": -2062.977294921875, |
|
"logps/rejected": -10156.541015625, |
|
"loss": 0.158, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.4399707317352295, |
|
"rewards/margins": 8.490180969238281, |
|
"rewards/rejected": -10.930150985717773, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9020134228187919, |
|
"grad_norm": 15.766621732547646, |
|
"learning_rate": 1.3675046241339916e-08, |
|
"logits/chosen": -2.458155870437622, |
|
"logits/rejected": -2.8766350746154785, |
|
"logps/chosen": -1824.9993896484375, |
|
"logps/rejected": -10561.1455078125, |
|
"loss": 0.2026, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.1738946437835693, |
|
"rewards/margins": 8.930900573730469, |
|
"rewards/rejected": -11.104796409606934, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9449664429530201, |
|
"grad_norm": 5.046777954270063, |
|
"learning_rate": 4.0950232632141205e-09, |
|
"logits/chosen": -2.5404601097106934, |
|
"logits/rejected": -2.9703125953674316, |
|
"logps/chosen": -1576.762939453125, |
|
"logps/rejected": -11024.712890625, |
|
"loss": 0.2015, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.8676944971084595, |
|
"rewards/margins": 9.766222953796387, |
|
"rewards/rejected": -11.633917808532715, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9879194630872483, |
|
"grad_norm": 13.782488804833061, |
|
"learning_rate": 1.1405387761664887e-10, |
|
"logits/chosen": -2.4785690307617188, |
|
"logits/rejected": -2.7061634063720703, |
|
"logps/chosen": -2462.81689453125, |
|
"logps/rejected": -9758.763671875, |
|
"loss": 0.2002, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.0159003734588623, |
|
"rewards/margins": 7.688973426818848, |
|
"rewards/rejected": -10.704873085021973, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.996510067114094, |
|
"step": 116, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2600768296497649, |
|
"train_runtime": 7822.1359, |
|
"train_samples_per_second": 1.904, |
|
"train_steps_per_second": 0.015 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 116, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|