|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9992429977289932, |
|
"eval_steps": 500, |
|
"global_step": 165, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 19.93140330088016, |
|
"learning_rate": 2.941176470588235e-08, |
|
"logits/chosen": -2.397038221359253, |
|
"logits/rejected": -2.213353395462036, |
|
"logps/chosen": -180.87660217285156, |
|
"logps/pi_response": -160.3468780517578, |
|
"logps/ref_response": -160.3468780517578, |
|
"logps/rejected": -188.15975952148438, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 19.32986244730549, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -2.334710121154785, |
|
"logits/rejected": -2.268401861190796, |
|
"logps/chosen": -204.74749755859375, |
|
"logps/pi_response": -172.67669677734375, |
|
"logps/ref_response": -171.70980834960938, |
|
"logps/rejected": -210.5995330810547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4930555522441864, |
|
"rewards/chosen": -0.009038920514285564, |
|
"rewards/margins": 0.0021236613392829895, |
|
"rewards/rejected": -0.011162581853568554, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 38.14550506704795, |
|
"learning_rate": 4.994932636402031e-07, |
|
"logits/chosen": -2.1406025886535645, |
|
"logits/rejected": -2.2008185386657715, |
|
"logps/chosen": -219.163330078125, |
|
"logps/pi_response": -189.20216369628906, |
|
"logps/ref_response": -168.5287628173828, |
|
"logps/rejected": -222.94271850585938, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.19716469943523407, |
|
"rewards/margins": 0.0258896853774786, |
|
"rewards/rejected": -0.22305437922477722, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 17.675340945525093, |
|
"learning_rate": 4.905416503522123e-07, |
|
"logits/chosen": -2.074061870574951, |
|
"logits/rejected": -2.080552101135254, |
|
"logps/chosen": -198.05816650390625, |
|
"logps/pi_response": -164.6136932373047, |
|
"logps/ref_response": -160.591552734375, |
|
"logps/rejected": -201.87464904785156, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.06703463941812515, |
|
"rewards/margins": 0.023267237469553947, |
|
"rewards/rejected": -0.09030187875032425, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 20.77937618350303, |
|
"learning_rate": 4.707922373336523e-07, |
|
"logits/chosen": -2.1749043464660645, |
|
"logits/rejected": -2.1247994899749756, |
|
"logps/chosen": -209.81790161132812, |
|
"logps/pi_response": -183.0597381591797, |
|
"logps/ref_response": -177.06118774414062, |
|
"logps/rejected": -218.8212890625, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.03284044936299324, |
|
"rewards/margins": 0.03580809757113457, |
|
"rewards/rejected": -0.06864854693412781, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 21.485794770779655, |
|
"learning_rate": 4.4113156629677313e-07, |
|
"logits/chosen": -2.161895990371704, |
|
"logits/rejected": -2.047234058380127, |
|
"logps/chosen": -253.85107421875, |
|
"logps/pi_response": -220.5885467529297, |
|
"logps/ref_response": -183.68719482421875, |
|
"logps/rejected": -255.03494262695312, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.3808112144470215, |
|
"rewards/margins": 0.016062479466199875, |
|
"rewards/rejected": -0.3968736529350281, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 20.861550744842482, |
|
"learning_rate": 4.0289109058972283e-07, |
|
"logits/chosen": -2.0308499336242676, |
|
"logits/rejected": -2.0642848014831543, |
|
"logps/chosen": -249.21365356445312, |
|
"logps/pi_response": -218.06405639648438, |
|
"logps/ref_response": -171.6521453857422, |
|
"logps/rejected": -256.102294921875, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.45084279775619507, |
|
"rewards/margins": 0.007402978837490082, |
|
"rewards/rejected": -0.45824581384658813, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 28.00866705635057, |
|
"learning_rate": 3.577874068920446e-07, |
|
"logits/chosen": -1.9535369873046875, |
|
"logits/rejected": -1.9607006311416626, |
|
"logps/chosen": -277.5329284667969, |
|
"logps/pi_response": -250.90200805664062, |
|
"logps/ref_response": -169.15725708007812, |
|
"logps/rejected": -291.10015869140625, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.7873853445053101, |
|
"rewards/margins": 0.044238921254873276, |
|
"rewards/rejected": -0.8316243290901184, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 30.93214776345345, |
|
"learning_rate": 3.078451980100854e-07, |
|
"logits/chosen": -1.9619076251983643, |
|
"logits/rejected": -1.8502511978149414, |
|
"logps/chosen": -281.6351318359375, |
|
"logps/pi_response": -248.10684204101562, |
|
"logps/ref_response": -168.71682739257812, |
|
"logps/rejected": -294.75604248046875, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.7786952257156372, |
|
"rewards/margins": 0.08077356964349747, |
|
"rewards/rejected": -0.8594688177108765, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 25.352348701139025, |
|
"learning_rate": 2.553063458334059e-07, |
|
"logits/chosen": -1.952636957168579, |
|
"logits/rejected": -2.0428998470306396, |
|
"logps/chosen": -253.1057586669922, |
|
"logps/pi_response": -229.44277954101562, |
|
"logps/ref_response": -163.27528381347656, |
|
"logps/rejected": -273.68780517578125, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.6530182361602783, |
|
"rewards/margins": 0.06835106015205383, |
|
"rewards/rejected": -0.7213693261146545, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 30.010292143430778, |
|
"learning_rate": 2.0252929432814287e-07, |
|
"logits/chosen": -2.0059685707092285, |
|
"logits/rejected": -1.9757779836654663, |
|
"logps/chosen": -277.2453308105469, |
|
"logps/pi_response": -250.5970458984375, |
|
"logps/ref_response": -177.19424438476562, |
|
"logps/rejected": -285.8126525878906, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.7217534780502319, |
|
"rewards/margins": 0.028179144486784935, |
|
"rewards/rejected": -0.7499326467514038, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 24.160814982479724, |
|
"learning_rate": 1.5188318011445906e-07, |
|
"logits/chosen": -1.880448579788208, |
|
"logits/rejected": -1.9354912042617798, |
|
"logps/chosen": -272.9020080566406, |
|
"logps/pi_response": -249.7644805908203, |
|
"logps/ref_response": -170.73162841796875, |
|
"logps/rejected": -296.87628173828125, |
|
"loss": 0.6738, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7538261413574219, |
|
"rewards/margins": 0.10054464638233185, |
|
"rewards/rejected": -0.8543707132339478, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 21.79427862950612, |
|
"learning_rate": 1.0564148305586295e-07, |
|
"logits/chosen": -1.8592274188995361, |
|
"logits/rejected": -1.8245065212249756, |
|
"logps/chosen": -286.49853515625, |
|
"logps/pi_response": -259.8140869140625, |
|
"logps/ref_response": -175.83364868164062, |
|
"logps/rejected": -298.8387756347656, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.8258917927742004, |
|
"rewards/margins": 0.05054790526628494, |
|
"rewards/rejected": -0.8764396905899048, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 19.644675719528685, |
|
"learning_rate": 6.587997083462196e-08, |
|
"logits/chosen": -2.0082201957702637, |
|
"logits/rejected": -1.852614164352417, |
|
"logps/chosen": -262.72113037109375, |
|
"logps/pi_response": -234.92135620117188, |
|
"logps/ref_response": -170.3947296142578, |
|
"logps/rejected": -271.8548278808594, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.6122658848762512, |
|
"rewards/margins": 0.0654686689376831, |
|
"rewards/rejected": -0.6777344942092896, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 23.231586788018557, |
|
"learning_rate": 3.438351873250492e-08, |
|
"logits/chosen": -2.007563829421997, |
|
"logits/rejected": -1.9652036428451538, |
|
"logps/chosen": -268.14642333984375, |
|
"logps/pi_response": -241.33349609375, |
|
"logps/ref_response": -177.7176971435547, |
|
"logps/rejected": -281.2812805175781, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.6138588190078735, |
|
"rewards/margins": 0.07413015514612198, |
|
"rewards/rejected": -0.6879889369010925, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 22.182591656592486, |
|
"learning_rate": 1.256598743236703e-08, |
|
"logits/chosen": -1.9013135433197021, |
|
"logits/rejected": -1.9633811712265015, |
|
"logps/chosen": -258.21563720703125, |
|
"logps/pi_response": -229.17837524414062, |
|
"logps/ref_response": -161.02003479003906, |
|
"logps/rejected": -270.0240478515625, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.6737670302391052, |
|
"rewards/margins": 0.05831047147512436, |
|
"rewards/rejected": -0.7320775985717773, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 28.927536436184628, |
|
"learning_rate": 1.406755487774386e-09, |
|
"logits/chosen": -1.9876502752304077, |
|
"logits/rejected": -1.8038742542266846, |
|
"logps/chosen": -269.11968994140625, |
|
"logps/pi_response": -239.73095703125, |
|
"logps/ref_response": -171.18202209472656, |
|
"logps/rejected": -278.70245361328125, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6629125475883484, |
|
"rewards/margins": 0.0806988924741745, |
|
"rewards/rejected": -0.7436113953590393, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 165, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6822745196747058, |
|
"train_runtime": 33626.4283, |
|
"train_samples_per_second": 0.629, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 165, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|