|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 100, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.2547454833984375, |
|
"logits/rejected": -2.401865005493164, |
|
"logps/chosen": -53.759212493896484, |
|
"logps/rejected": -48.83185958862305, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.2421462535858154, |
|
"logits/rejected": -2.2770614624023438, |
|
"logps/chosen": -51.98179626464844, |
|
"logps/rejected": -64.9604263305664, |
|
"loss": 0.6929, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.2222222238779068, |
|
"rewards/chosen": 0.001975727966055274, |
|
"rewards/margins": 0.00047667179023846984, |
|
"rewards/rejected": 0.001499056350439787, |
|
"step": 10, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.2520272731781006, |
|
"logits/rejected": -2.255510091781616, |
|
"logps/chosen": -62.492515563964844, |
|
"logps/rejected": -72.63607788085938, |
|
"loss": 0.6919, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": 0.01601376011967659, |
|
"rewards/margins": 0.0011284304782748222, |
|
"rewards/rejected": 0.014885328710079193, |
|
"step": 20, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.3422012329101562, |
|
"logits/rejected": -2.3548905849456787, |
|
"logps/chosen": -79.14694213867188, |
|
"logps/rejected": -98.82722473144531, |
|
"loss": 0.6898, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": 0.030949687585234642, |
|
"rewards/margins": 0.0029636542312800884, |
|
"rewards/rejected": 0.027986034750938416, |
|
"step": 30, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.322833776473999, |
|
"logits/rejected": -2.3010501861572266, |
|
"logps/chosen": -82.85880279541016, |
|
"logps/rejected": -82.40392303466797, |
|
"loss": 0.6866, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": 0.033333443105220795, |
|
"rewards/margins": 0.011918319389224052, |
|
"rewards/rejected": 0.021415119990706444, |
|
"step": 40, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 4.999731868769027e-06, |
|
"logits/chosen": -2.241189956665039, |
|
"logits/rejected": -2.263849973678589, |
|
"logps/chosen": -67.93062591552734, |
|
"logps/rejected": -81.85546875, |
|
"loss": 0.6805, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 0.009002490900456905, |
|
"rewards/margins": 0.03016103245317936, |
|
"rewards/rejected": -0.02115854248404503, |
|
"step": 50, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 4.9903533134293035e-06, |
|
"logits/chosen": -2.218756914138794, |
|
"logits/rejected": -2.1594481468200684, |
|
"logps/chosen": -62.0407600402832, |
|
"logps/rejected": -71.9369888305664, |
|
"loss": 0.6748, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.0231451578438282, |
|
"rewards/margins": 0.04653460532426834, |
|
"rewards/rejected": -0.06967976689338684, |
|
"step": 60, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 4.967625656594782e-06, |
|
"logits/chosen": -2.08909273147583, |
|
"logits/rejected": -2.088801383972168, |
|
"logps/chosen": -68.09326171875, |
|
"logps/rejected": -81.9454116821289, |
|
"loss": 0.6684, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.12382155656814575, |
|
"rewards/margins": 0.03761869668960571, |
|
"rewards/rejected": -0.16144026815891266, |
|
"step": 70, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 4.93167072587771e-06, |
|
"logits/chosen": -2.20400071144104, |
|
"logits/rejected": -2.1452622413635254, |
|
"logps/chosen": -55.867881774902344, |
|
"logps/rejected": -70.91771697998047, |
|
"loss": 0.6588, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.0733698159456253, |
|
"rewards/margins": 0.10403277724981308, |
|
"rewards/rejected": -0.17740261554718018, |
|
"step": 80, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 4.882681251368549e-06, |
|
"logits/chosen": -1.991231918334961, |
|
"logits/rejected": -1.9964717626571655, |
|
"logps/chosen": -72.28443908691406, |
|
"logps/rejected": -90.79218292236328, |
|
"loss": 0.6587, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.13902384042739868, |
|
"rewards/margins": 0.08125626295804977, |
|
"rewards/rejected": -0.22028008103370667, |
|
"step": 90, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.359375, |
|
"learning_rate": 4.8209198325401815e-06, |
|
"logits/chosen": -1.9231764078140259, |
|
"logits/rejected": -1.9043807983398438, |
|
"logps/chosen": -103.5636978149414, |
|
"logps/rejected": -96.08602142333984, |
|
"loss": 0.6551, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.2353379726409912, |
|
"rewards/margins": 0.08685441315174103, |
|
"rewards/rejected": -0.32219237089157104, |
|
"step": 100, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.762041687965393, |
|
"eval_logits/rejected": -1.7460479736328125, |
|
"eval_logps/chosen": -87.55253601074219, |
|
"eval_logps/rejected": -114.47212219238281, |
|
"eval_loss": 0.652633547782898, |
|
"eval_pred_label": 0.0, |
|
"eval_rewards/accuracies": 0.3359375, |
|
"eval_rewards/chosen": -0.23640292882919312, |
|
"eval_rewards/margins": 0.136388897895813, |
|
"eval_rewards/rejected": -0.3727918267250061, |
|
"eval_runtime": 125.4491, |
|
"eval_samples_per_second": 15.943, |
|
"eval_steps_per_second": 0.255, |
|
"eval_use_label": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 4.746717530629565e-06, |
|
"logits/chosen": -1.7847106456756592, |
|
"logits/rejected": -1.7590484619140625, |
|
"logps/chosen": -85.73925018310547, |
|
"logps/rejected": -106.20509338378906, |
|
"loss": 0.6557, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.14638465642929077, |
|
"rewards/margins": 0.12975916266441345, |
|
"rewards/rejected": -0.2761438190937042, |
|
"step": 110, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.828125, |
|
"learning_rate": 4.660472094042121e-06, |
|
"logits/chosen": -1.1902318000793457, |
|
"logits/rejected": -1.0542975664138794, |
|
"logps/chosen": -108.4779052734375, |
|
"logps/rejected": -127.95109558105469, |
|
"loss": 0.6493, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.38532325625419617, |
|
"rewards/margins": 0.1649974286556244, |
|
"rewards/rejected": -0.5503206849098206, |
|
"step": 120, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.9375, |
|
"learning_rate": 4.5626458262912745e-06, |
|
"logits/chosen": -0.818010687828064, |
|
"logits/rejected": -0.7847374081611633, |
|
"logps/chosen": -109.61775207519531, |
|
"logps/rejected": -133.42086791992188, |
|
"loss": 0.6524, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.43839359283447266, |
|
"rewards/margins": 0.16735044121742249, |
|
"rewards/rejected": -0.6057440638542175, |
|
"step": 130, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.71875, |
|
"learning_rate": 4.453763107901676e-06, |
|
"logits/chosen": -0.7395650148391724, |
|
"logits/rejected": -0.8444339036941528, |
|
"logps/chosen": -116.97528076171875, |
|
"logps/rejected": -130.2399139404297, |
|
"loss": 0.6381, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.3622770607471466, |
|
"rewards/margins": 0.1490650475025177, |
|
"rewards/rejected": -0.5113420486450195, |
|
"step": 140, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.125, |
|
"learning_rate": 4.33440758555951e-06, |
|
"logits/chosen": -0.6497868299484253, |
|
"logits/rejected": -0.6378159523010254, |
|
"logps/chosen": -89.60552978515625, |
|
"logps/rejected": -115.42192077636719, |
|
"loss": 0.6379, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.2445882111787796, |
|
"rewards/margins": 0.23124215006828308, |
|
"rewards/rejected": -0.4758303761482239, |
|
"step": 150, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 4.205219043576955e-06, |
|
"logits/chosen": -0.3159053921699524, |
|
"logits/rejected": -0.33064812421798706, |
|
"logps/chosen": -99.68696594238281, |
|
"logps/rejected": -129.45729064941406, |
|
"loss": 0.6317, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.35356926918029785, |
|
"rewards/margins": 0.16687795519828796, |
|
"rewards/rejected": -0.5204472541809082, |
|
"step": 160, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 4.066889974440757e-06, |
|
"logits/chosen": 0.14531800150871277, |
|
"logits/rejected": 0.18166163563728333, |
|
"logps/chosen": -95.45491027832031, |
|
"logps/rejected": -125.1463623046875, |
|
"loss": 0.6291, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.39946848154067993, |
|
"rewards/margins": 0.20978550612926483, |
|
"rewards/rejected": -0.609254002571106, |
|
"step": 170, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 3.92016186682789e-06, |
|
"logits/chosen": -0.3282355070114136, |
|
"logits/rejected": -0.21966704726219177, |
|
"logps/chosen": -108.00712585449219, |
|
"logps/rejected": -128.67587280273438, |
|
"loss": 0.649, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.4521949887275696, |
|
"rewards/margins": 0.27172034978866577, |
|
"rewards/rejected": -0.7239152789115906, |
|
"step": 180, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.84375, |
|
"learning_rate": 3.7658212309857576e-06, |
|
"logits/chosen": -0.889633297920227, |
|
"logits/rejected": -0.6851574778556824, |
|
"logps/chosen": -91.25111389160156, |
|
"logps/rejected": -118.9649887084961, |
|
"loss": 0.6461, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.32139474153518677, |
|
"rewards/margins": 0.22424864768981934, |
|
"rewards/rejected": -0.5456433892250061, |
|
"step": 190, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.9453125, |
|
"learning_rate": 3.604695382782159e-06, |
|
"logits/chosen": -0.8204952478408813, |
|
"logits/rejected": -0.7186430096626282, |
|
"logps/chosen": -112.41142272949219, |
|
"logps/rejected": -120.7835693359375, |
|
"loss": 0.6376, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.30735117197036743, |
|
"rewards/margins": 0.169038325548172, |
|
"rewards/rejected": -0.47638946771621704, |
|
"step": 200, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -0.023804781958460808, |
|
"eval_logits/rejected": 0.04317883029580116, |
|
"eval_logps/chosen": -97.96138000488281, |
|
"eval_logps/rejected": -137.9141845703125, |
|
"eval_loss": 0.6288520693778992, |
|
"eval_pred_label": 0.0, |
|
"eval_rewards/accuracies": 0.3671875, |
|
"eval_rewards/chosen": -0.34049129486083984, |
|
"eval_rewards/margins": 0.26672109961509705, |
|
"eval_rewards/rejected": -0.6072123646736145, |
|
"eval_runtime": 125.433, |
|
"eval_samples_per_second": 15.945, |
|
"eval_steps_per_second": 0.255, |
|
"eval_use_label": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 3.437648009023905e-06, |
|
"logits/chosen": -0.05805685371160507, |
|
"logits/rejected": -0.06056814268231392, |
|
"logps/chosen": -88.78871154785156, |
|
"logps/rejected": -124.3318862915039, |
|
"loss": 0.6218, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.3281395435333252, |
|
"rewards/margins": 0.28538644313812256, |
|
"rewards/rejected": -0.613525927066803, |
|
"step": 210, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 3.265574537815398e-06, |
|
"logits/chosen": -0.1400775909423828, |
|
"logits/rejected": -0.005620801355689764, |
|
"logps/chosen": -133.7158660888672, |
|
"logps/rejected": -136.84619140625, |
|
"loss": 0.627, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.5408719778060913, |
|
"rewards/margins": 0.16390959918498993, |
|
"rewards/rejected": -0.7047815918922424, |
|
"step": 220, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.8515625, |
|
"learning_rate": 3.089397338773569e-06, |
|
"logits/chosen": 0.16266627609729767, |
|
"logits/rejected": 0.2626825273036957, |
|
"logps/chosen": -93.3644027709961, |
|
"logps/rejected": -119.67996978759766, |
|
"loss": 0.6261, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.28929832577705383, |
|
"rewards/margins": 0.27991363406181335, |
|
"rewards/rejected": -0.5692119598388672, |
|
"step": 230, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.8984375, |
|
"learning_rate": 2.9100607788275547e-06, |
|
"logits/chosen": 0.854693591594696, |
|
"logits/rejected": 0.7261193990707397, |
|
"logps/chosen": -99.00528717041016, |
|
"logps/rejected": -135.73580932617188, |
|
"loss": 0.6295, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.2997274696826935, |
|
"rewards/margins": 0.3153937757015228, |
|
"rewards/rejected": -0.6151211857795715, |
|
"step": 240, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 2.72852616010567e-06, |
|
"logits/chosen": 0.6816203594207764, |
|
"logits/rejected": 0.7033491134643555, |
|
"logps/chosen": -119.7255859375, |
|
"logps/rejected": -144.8857421875, |
|
"loss": 0.6376, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.4632648825645447, |
|
"rewards/margins": 0.2932681143283844, |
|
"rewards/rejected": -0.7565330266952515, |
|
"step": 250, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.8984375, |
|
"learning_rate": 2.5457665670441937e-06, |
|
"logits/chosen": 0.5938165187835693, |
|
"logits/rejected": 0.5592354536056519, |
|
"logps/chosen": -110.32804870605469, |
|
"logps/rejected": -146.76275634765625, |
|
"loss": 0.6162, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.44222426414489746, |
|
"rewards/margins": 0.2809238135814667, |
|
"rewards/rejected": -0.7231480479240417, |
|
"step": 260, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.90625, |
|
"learning_rate": 2.3627616503391813e-06, |
|
"logits/chosen": 0.6390979290008545, |
|
"logits/rejected": 0.5789315700531006, |
|
"logps/chosen": -123.83528137207031, |
|
"logps/rejected": -144.61489868164062, |
|
"loss": 0.6162, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.5091949701309204, |
|
"rewards/margins": 0.24320097267627716, |
|
"rewards/rejected": -0.7523959279060364, |
|
"step": 270, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.34375, |
|
"learning_rate": 2.1804923757009885e-06, |
|
"logits/chosen": 0.8771865963935852, |
|
"logits/rejected": 1.0158352851867676, |
|
"logps/chosen": -118.5296859741211, |
|
"logps/rejected": -138.31729125976562, |
|
"loss": 0.6357, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.5302416086196899, |
|
"rewards/margins": 0.2237352430820465, |
|
"rewards/rejected": -0.7539768218994141, |
|
"step": 280, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.59375, |
|
"learning_rate": 1.9999357655598894e-06, |
|
"logits/chosen": 0.44083184003829956, |
|
"logits/rejected": 0.41123947501182556, |
|
"logps/chosen": -112.27372741699219, |
|
"logps/rejected": -146.95498657226562, |
|
"loss": 0.6228, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.4572528004646301, |
|
"rewards/margins": 0.24868395924568176, |
|
"rewards/rejected": -0.7059367299079895, |
|
"step": 290, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.34375, |
|
"learning_rate": 1.8220596619089576e-06, |
|
"logits/chosen": 0.6273639798164368, |
|
"logits/rejected": 0.5140804052352905, |
|
"logps/chosen": -123.02046966552734, |
|
"logps/rejected": -168.80987548828125, |
|
"loss": 0.6196, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.4542613625526428, |
|
"rewards/margins": 0.2926333546638489, |
|
"rewards/rejected": -0.7468947172164917, |
|
"step": 300, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 1.0944873094558716, |
|
"eval_logits/rejected": 1.1831356287002563, |
|
"eval_logps/chosen": -102.62176513671875, |
|
"eval_logps/rejected": -150.12503051757812, |
|
"eval_loss": 0.618873655796051, |
|
"eval_pred_label": 0.0, |
|
"eval_rewards/accuracies": 0.375, |
|
"eval_rewards/chosen": -0.3870951533317566, |
|
"eval_rewards/margins": 0.34222573041915894, |
|
"eval_rewards/rejected": -0.7293209433555603, |
|
"eval_runtime": 125.4362, |
|
"eval_samples_per_second": 15.944, |
|
"eval_steps_per_second": 0.255, |
|
"eval_use_label": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.8515625, |
|
"learning_rate": 1.647817538357072e-06, |
|
"logits/chosen": 0.8131985664367676, |
|
"logits/rejected": 0.8752232789993286, |
|
"logps/chosen": -91.52378845214844, |
|
"logps/rejected": -139.95840454101562, |
|
"loss": 0.5999, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.3592718541622162, |
|
"rewards/margins": 0.3578081727027893, |
|
"rewards/rejected": -0.7170799970626831, |
|
"step": 310, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.40625, |
|
"learning_rate": 1.4781433892011132e-06, |
|
"logits/chosen": 0.9751952886581421, |
|
"logits/rejected": 1.1630818843841553, |
|
"logps/chosen": -135.82566833496094, |
|
"logps/rejected": -168.11805725097656, |
|
"loss": 0.6109, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.6275521516799927, |
|
"rewards/margins": 0.3816707730293274, |
|
"rewards/rejected": -1.0092228651046753, |
|
"step": 320, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.984375, |
|
"learning_rate": 1.3139467229135999e-06, |
|
"logits/chosen": 1.3293979167938232, |
|
"logits/rejected": 1.3260401487350464, |
|
"logps/chosen": -135.96664428710938, |
|
"logps/rejected": -166.52359008789062, |
|
"loss": 0.6295, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.6585850715637207, |
|
"rewards/margins": 0.3205706775188446, |
|
"rewards/rejected": -0.9791557192802429, |
|
"step": 330, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 1.1561076868822756e-06, |
|
"logits/chosen": 0.7383319139480591, |
|
"logits/rejected": 0.6407849192619324, |
|
"logps/chosen": -150.60504150390625, |
|
"logps/rejected": -166.74940490722656, |
|
"loss": 0.6247, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.658658504486084, |
|
"rewards/margins": 0.24373307824134827, |
|
"rewards/rejected": -0.9023915529251099, |
|
"step": 340, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 1.0054723495346484e-06, |
|
"logits/chosen": 0.6359546184539795, |
|
"logits/rejected": 0.7167641520500183, |
|
"logps/chosen": -163.8385772705078, |
|
"logps/rejected": -195.6297607421875, |
|
"loss": 0.6138, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.7442194819450378, |
|
"rewards/margins": 0.3593973219394684, |
|
"rewards/rejected": -1.103616714477539, |
|
"step": 350, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.859375, |
|
"learning_rate": 8.628481651367876e-07, |
|
"logits/chosen": 0.7298086881637573, |
|
"logits/rejected": 0.8517257571220398, |
|
"logps/chosen": -119.41548156738281, |
|
"logps/rejected": -165.3460235595703, |
|
"loss": 0.6137, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.5577787160873413, |
|
"rewards/margins": 0.37339919805526733, |
|
"rewards/rejected": -0.9311779141426086, |
|
"step": 360, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.421875, |
|
"learning_rate": 7.289996455765749e-07, |
|
"logits/chosen": 0.8383787274360657, |
|
"logits/rejected": 0.9305205345153809, |
|
"logps/chosen": -111.84449768066406, |
|
"logps/rejected": -153.93136596679688, |
|
"loss": 0.6125, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.46409696340560913, |
|
"rewards/margins": 0.39606258273124695, |
|
"rewards/rejected": -0.8601595759391785, |
|
"step": 370, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.8984375, |
|
"learning_rate": 6.046442623320145e-07, |
|
"logits/chosen": 0.5329448580741882, |
|
"logits/rejected": 0.513522744178772, |
|
"logps/chosen": -116.62841796875, |
|
"logps/rejected": -165.17893981933594, |
|
"loss": 0.6191, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.5079302787780762, |
|
"rewards/margins": 0.2802185118198395, |
|
"rewards/rejected": -0.7881487607955933, |
|
"step": 380, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 4.904486005914027e-07, |
|
"logits/chosen": 0.8266662359237671, |
|
"logits/rejected": 0.5234752893447876, |
|
"logps/chosen": -159.83407592773438, |
|
"logps/rejected": -186.96768188476562, |
|
"loss": 0.6085, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.6701575517654419, |
|
"rewards/margins": 0.36982032656669617, |
|
"rewards/rejected": -1.039977788925171, |
|
"step": 390, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.46875, |
|
"learning_rate": 3.8702478614051353e-07, |
|
"logits/chosen": 0.511390745639801, |
|
"logits/rejected": 0.6720080971717834, |
|
"logps/chosen": -116.7987060546875, |
|
"logps/rejected": -141.3931884765625, |
|
"loss": 0.6139, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.4430771768093109, |
|
"rewards/margins": 0.3362268805503845, |
|
"rewards/rejected": -0.779304027557373, |
|
"step": 400, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 1.4532994031906128, |
|
"eval_logits/rejected": 1.5453113317489624, |
|
"eval_logps/chosen": -112.56050109863281, |
|
"eval_logps/rejected": -162.19764709472656, |
|
"eval_loss": 0.6157013177871704, |
|
"eval_pred_label": 0.0, |
|
"eval_rewards/accuracies": 0.37109375, |
|
"eval_rewards/chosen": -0.4864824414253235, |
|
"eval_rewards/margins": 0.36356455087661743, |
|
"eval_rewards/rejected": -0.8500469923019409, |
|
"eval_runtime": 125.4203, |
|
"eval_samples_per_second": 15.946, |
|
"eval_steps_per_second": 0.255, |
|
"eval_use_label": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 2.9492720416985004e-07, |
|
"logits/chosen": 0.8359997868537903, |
|
"logits/rejected": 0.8144146800041199, |
|
"logps/chosen": -110.30177307128906, |
|
"logps/rejected": -143.6800079345703, |
|
"loss": 0.6222, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.4887877404689789, |
|
"rewards/margins": 0.3508199453353882, |
|
"rewards/rejected": -0.8396075963973999, |
|
"step": 410, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.984375, |
|
"learning_rate": 2.1464952759020857e-07, |
|
"logits/chosen": 1.027252435684204, |
|
"logits/rejected": 0.9827619791030884, |
|
"logps/chosen": -106.49784851074219, |
|
"logps/rejected": -116.97566223144531, |
|
"loss": 0.6216, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.4555872976779938, |
|
"rewards/margins": 0.20033884048461914, |
|
"rewards/rejected": -0.6559261083602905, |
|
"step": 420, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.96875, |
|
"learning_rate": 1.4662207078575685e-07, |
|
"logits/chosen": 0.9206047058105469, |
|
"logits/rejected": 0.8673297166824341, |
|
"logps/chosen": -151.376220703125, |
|
"logps/rejected": -178.04725646972656, |
|
"loss": 0.5986, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.5210937261581421, |
|
"rewards/margins": 0.46580758690834045, |
|
"rewards/rejected": -0.9869012832641602, |
|
"step": 430, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.125, |
|
"learning_rate": 9.120948298936422e-08, |
|
"logits/chosen": 0.9004503488540649, |
|
"logits/rejected": 1.0573413372039795, |
|
"logps/chosen": -119.21500396728516, |
|
"logps/rejected": -165.19241333007812, |
|
"loss": 0.6064, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.5231102705001831, |
|
"rewards/margins": 0.37818416953086853, |
|
"rewards/rejected": -0.9012944102287292, |
|
"step": 440, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.46875, |
|
"learning_rate": 4.870879364444109e-08, |
|
"logits/chosen": 1.300728440284729, |
|
"logits/rejected": 1.0580918788909912, |
|
"logps/chosen": -129.29281616210938, |
|
"logps/rejected": -178.3690948486328, |
|
"loss": 0.6111, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.570349931716919, |
|
"rewards/margins": 0.3304445147514343, |
|
"rewards/rejected": -0.9007943868637085, |
|
"step": 450, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.8359375, |
|
"learning_rate": 1.93478202307823e-08, |
|
"logits/chosen": 1.1906068325042725, |
|
"logits/rejected": 1.2149587869644165, |
|
"logps/chosen": -83.74864196777344, |
|
"logps/rejected": -130.91348266601562, |
|
"loss": 0.6154, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.3762877583503723, |
|
"rewards/margins": 0.2993956208229065, |
|
"rewards/rejected": -0.6756833791732788, |
|
"step": 460, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.375, |
|
"learning_rate": 3.283947088983663e-09, |
|
"logits/chosen": 1.1844379901885986, |
|
"logits/rejected": 0.9474547505378723, |
|
"logps/chosen": -113.1079330444336, |
|
"logps/rejected": -141.49147033691406, |
|
"loss": 0.6213, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.4577876627445221, |
|
"rewards/margins": 0.26655709743499756, |
|
"rewards/rejected": -0.7243447303771973, |
|
"step": 470, |
|
"use_label": 0.0 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6357159084743924, |
|
"train_runtime": 9601.7268, |
|
"train_samples_per_second": 6.367, |
|
"train_steps_per_second": 0.05 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|