zephyr-7b / trainer_state.json
jikaixuan's picture
Model save
6b1b603 verified
raw
history blame
30 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984301412872841,
"eval_steps": 100,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 0.400390625,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -2.2547454833984375,
"logits/rejected": -2.401865005493164,
"logps/chosen": -53.759212493896484,
"logps/rejected": -48.83185958862305,
"loss": 0.6931,
"pred_label": 0.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"use_label": 0.0
},
{
"epoch": 0.02,
"grad_norm": 0.4609375,
"learning_rate": 1.0416666666666667e-06,
"logits/chosen": -2.2421462535858154,
"logits/rejected": -2.2770614624023438,
"logps/chosen": -51.98179626464844,
"logps/rejected": -64.9604263305664,
"loss": 0.6929,
"pred_label": 0.0,
"rewards/accuracies": 0.2222222238779068,
"rewards/chosen": 0.001975727966055274,
"rewards/margins": 0.00047667179023846984,
"rewards/rejected": 0.001499056350439787,
"step": 10,
"use_label": 0.0
},
{
"epoch": 0.04,
"grad_norm": 0.39453125,
"learning_rate": 2.0833333333333334e-06,
"logits/chosen": -2.2520272731781006,
"logits/rejected": -2.255510091781616,
"logps/chosen": -62.492515563964844,
"logps/rejected": -72.63607788085938,
"loss": 0.6919,
"pred_label": 0.0,
"rewards/accuracies": 0.2874999940395355,
"rewards/chosen": 0.01601376011967659,
"rewards/margins": 0.0011284304782748222,
"rewards/rejected": 0.014885328710079193,
"step": 20,
"use_label": 0.0
},
{
"epoch": 0.06,
"grad_norm": 0.5078125,
"learning_rate": 3.125e-06,
"logits/chosen": -2.3422012329101562,
"logits/rejected": -2.3548905849456787,
"logps/chosen": -79.14694213867188,
"logps/rejected": -98.82722473144531,
"loss": 0.6898,
"pred_label": 0.0,
"rewards/accuracies": 0.2874999940395355,
"rewards/chosen": 0.030949687585234642,
"rewards/margins": 0.0029636542312800884,
"rewards/rejected": 0.027986034750938416,
"step": 30,
"use_label": 0.0
},
{
"epoch": 0.08,
"grad_norm": 0.515625,
"learning_rate": 4.166666666666667e-06,
"logits/chosen": -2.322833776473999,
"logits/rejected": -2.3010501861572266,
"logps/chosen": -82.85880279541016,
"logps/rejected": -82.40392303466797,
"loss": 0.6866,
"pred_label": 0.0,
"rewards/accuracies": 0.2874999940395355,
"rewards/chosen": 0.033333443105220795,
"rewards/margins": 0.011918319389224052,
"rewards/rejected": 0.021415119990706444,
"step": 40,
"use_label": 0.0
},
{
"epoch": 0.1,
"grad_norm": 0.67578125,
"learning_rate": 4.999731868769027e-06,
"logits/chosen": -2.241189956665039,
"logits/rejected": -2.263849973678589,
"logps/chosen": -67.93062591552734,
"logps/rejected": -81.85546875,
"loss": 0.6805,
"pred_label": 0.0,
"rewards/accuracies": 0.32499998807907104,
"rewards/chosen": 0.009002490900456905,
"rewards/margins": 0.03016103245317936,
"rewards/rejected": -0.02115854248404503,
"step": 50,
"use_label": 0.0
},
{
"epoch": 0.13,
"grad_norm": 1.09375,
"learning_rate": 4.9903533134293035e-06,
"logits/chosen": -2.218756914138794,
"logits/rejected": -2.1594481468200684,
"logps/chosen": -62.0407600402832,
"logps/rejected": -71.9369888305664,
"loss": 0.6748,
"pred_label": 0.0,
"rewards/accuracies": 0.3062500059604645,
"rewards/chosen": -0.0231451578438282,
"rewards/margins": 0.04653460532426834,
"rewards/rejected": -0.06967976689338684,
"step": 60,
"use_label": 0.0
},
{
"epoch": 0.15,
"grad_norm": 0.8984375,
"learning_rate": 4.967625656594782e-06,
"logits/chosen": -2.08909273147583,
"logits/rejected": -2.088801383972168,
"logps/chosen": -68.09326171875,
"logps/rejected": -81.9454116821289,
"loss": 0.6684,
"pred_label": 0.0,
"rewards/accuracies": 0.25,
"rewards/chosen": -0.12382155656814575,
"rewards/margins": 0.03761869668960571,
"rewards/rejected": -0.16144026815891266,
"step": 70,
"use_label": 0.0
},
{
"epoch": 0.17,
"grad_norm": 1.15625,
"learning_rate": 4.93167072587771e-06,
"logits/chosen": -2.20400071144104,
"logits/rejected": -2.1452622413635254,
"logps/chosen": -55.867881774902344,
"logps/rejected": -70.91771697998047,
"loss": 0.6588,
"pred_label": 0.0,
"rewards/accuracies": 0.26875001192092896,
"rewards/chosen": -0.0733698159456253,
"rewards/margins": 0.10403277724981308,
"rewards/rejected": -0.17740261554718018,
"step": 80,
"use_label": 0.0
},
{
"epoch": 0.19,
"grad_norm": 1.0546875,
"learning_rate": 4.882681251368549e-06,
"logits/chosen": -1.991231918334961,
"logits/rejected": -1.9964717626571655,
"logps/chosen": -72.28443908691406,
"logps/rejected": -90.79218292236328,
"loss": 0.6587,
"pred_label": 0.0,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.13902384042739868,
"rewards/margins": 0.08125626295804977,
"rewards/rejected": -0.22028008103370667,
"step": 90,
"use_label": 0.0
},
{
"epoch": 0.21,
"grad_norm": 2.359375,
"learning_rate": 4.8209198325401815e-06,
"logits/chosen": -1.9231764078140259,
"logits/rejected": -1.9043807983398438,
"logps/chosen": -103.5636978149414,
"logps/rejected": -96.08602142333984,
"loss": 0.6551,
"pred_label": 0.0,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.2353379726409912,
"rewards/margins": 0.08685441315174103,
"rewards/rejected": -0.32219237089157104,
"step": 100,
"use_label": 0.0
},
{
"epoch": 0.21,
"eval_logits/chosen": -1.762041687965393,
"eval_logits/rejected": -1.7460479736328125,
"eval_logps/chosen": -87.55253601074219,
"eval_logps/rejected": -114.47212219238281,
"eval_loss": 0.652633547782898,
"eval_pred_label": 0.0,
"eval_rewards/accuracies": 0.3359375,
"eval_rewards/chosen": -0.23640292882919312,
"eval_rewards/margins": 0.136388897895813,
"eval_rewards/rejected": -0.3727918267250061,
"eval_runtime": 125.4491,
"eval_samples_per_second": 15.943,
"eval_steps_per_second": 0.255,
"eval_use_label": 0.0,
"step": 100
},
{
"epoch": 0.23,
"grad_norm": 1.59375,
"learning_rate": 4.746717530629565e-06,
"logits/chosen": -1.7847106456756592,
"logits/rejected": -1.7590484619140625,
"logps/chosen": -85.73925018310547,
"logps/rejected": -106.20509338378906,
"loss": 0.6557,
"pred_label": 0.0,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.14638465642929077,
"rewards/margins": 0.12975916266441345,
"rewards/rejected": -0.2761438190937042,
"step": 110,
"use_label": 0.0
},
{
"epoch": 0.25,
"grad_norm": 1.828125,
"learning_rate": 4.660472094042121e-06,
"logits/chosen": -1.1902318000793457,
"logits/rejected": -1.0542975664138794,
"logps/chosen": -108.4779052734375,
"logps/rejected": -127.95109558105469,
"loss": 0.6493,
"pred_label": 0.0,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.38532325625419617,
"rewards/margins": 0.1649974286556244,
"rewards/rejected": -0.5503206849098206,
"step": 120,
"use_label": 0.0
},
{
"epoch": 0.27,
"grad_norm": 1.9375,
"learning_rate": 4.5626458262912745e-06,
"logits/chosen": -0.818010687828064,
"logits/rejected": -0.7847374081611633,
"logps/chosen": -109.61775207519531,
"logps/rejected": -133.42086791992188,
"loss": 0.6524,
"pred_label": 0.0,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.43839359283447266,
"rewards/margins": 0.16735044121742249,
"rewards/rejected": -0.6057440638542175,
"step": 130,
"use_label": 0.0
},
{
"epoch": 0.29,
"grad_norm": 1.71875,
"learning_rate": 4.453763107901676e-06,
"logits/chosen": -0.7395650148391724,
"logits/rejected": -0.8444339036941528,
"logps/chosen": -116.97528076171875,
"logps/rejected": -130.2399139404297,
"loss": 0.6381,
"pred_label": 0.0,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.3622770607471466,
"rewards/margins": 0.1490650475025177,
"rewards/rejected": -0.5113420486450195,
"step": 140,
"use_label": 0.0
},
{
"epoch": 0.31,
"grad_norm": 2.125,
"learning_rate": 4.33440758555951e-06,
"logits/chosen": -0.6497868299484253,
"logits/rejected": -0.6378159523010254,
"logps/chosen": -89.60552978515625,
"logps/rejected": -115.42192077636719,
"loss": 0.6379,
"pred_label": 0.0,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.2445882111787796,
"rewards/margins": 0.23124215006828308,
"rewards/rejected": -0.4758303761482239,
"step": 150,
"use_label": 0.0
},
{
"epoch": 0.33,
"grad_norm": 2.15625,
"learning_rate": 4.205219043576955e-06,
"logits/chosen": -0.3159053921699524,
"logits/rejected": -0.33064812421798706,
"logps/chosen": -99.68696594238281,
"logps/rejected": -129.45729064941406,
"loss": 0.6317,
"pred_label": 0.0,
"rewards/accuracies": 0.2874999940395355,
"rewards/chosen": -0.35356926918029785,
"rewards/margins": 0.16687795519828796,
"rewards/rejected": -0.5204472541809082,
"step": 160,
"use_label": 0.0
},
{
"epoch": 0.36,
"grad_norm": 2.4375,
"learning_rate": 4.066889974440757e-06,
"logits/chosen": 0.14531800150871277,
"logits/rejected": 0.18166163563728333,
"logps/chosen": -95.45491027832031,
"logps/rejected": -125.1463623046875,
"loss": 0.6291,
"pred_label": 0.0,
"rewards/accuracies": 0.29374998807907104,
"rewards/chosen": -0.39946848154067993,
"rewards/margins": 0.20978550612926483,
"rewards/rejected": -0.609254002571106,
"step": 170,
"use_label": 0.0
},
{
"epoch": 0.38,
"grad_norm": 2.453125,
"learning_rate": 3.92016186682789e-06,
"logits/chosen": -0.3282355070114136,
"logits/rejected": -0.21966704726219177,
"logps/chosen": -108.00712585449219,
"logps/rejected": -128.67587280273438,
"loss": 0.649,
"pred_label": 0.0,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.4521949887275696,
"rewards/margins": 0.27172034978866577,
"rewards/rejected": -0.7239152789115906,
"step": 180,
"use_label": 0.0
},
{
"epoch": 0.4,
"grad_norm": 1.84375,
"learning_rate": 3.7658212309857576e-06,
"logits/chosen": -0.889633297920227,
"logits/rejected": -0.6851574778556824,
"logps/chosen": -91.25111389160156,
"logps/rejected": -118.9649887084961,
"loss": 0.6461,
"pred_label": 0.0,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.32139474153518677,
"rewards/margins": 0.22424864768981934,
"rewards/rejected": -0.5456433892250061,
"step": 190,
"use_label": 0.0
},
{
"epoch": 0.42,
"grad_norm": 1.9453125,
"learning_rate": 3.604695382782159e-06,
"logits/chosen": -0.8204952478408813,
"logits/rejected": -0.7186430096626282,
"logps/chosen": -112.41142272949219,
"logps/rejected": -120.7835693359375,
"loss": 0.6376,
"pred_label": 0.0,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.30735117197036743,
"rewards/margins": 0.169038325548172,
"rewards/rejected": -0.47638946771621704,
"step": 200,
"use_label": 0.0
},
{
"epoch": 0.42,
"eval_logits/chosen": -0.023804781958460808,
"eval_logits/rejected": 0.04317883029580116,
"eval_logps/chosen": -97.96138000488281,
"eval_logps/rejected": -137.9141845703125,
"eval_loss": 0.6288520693778992,
"eval_pred_label": 0.0,
"eval_rewards/accuracies": 0.3671875,
"eval_rewards/chosen": -0.34049129486083984,
"eval_rewards/margins": 0.26672109961509705,
"eval_rewards/rejected": -0.6072123646736145,
"eval_runtime": 125.433,
"eval_samples_per_second": 15.945,
"eval_steps_per_second": 0.255,
"eval_use_label": 0.0,
"step": 200
},
{
"epoch": 0.44,
"grad_norm": 2.265625,
"learning_rate": 3.437648009023905e-06,
"logits/chosen": -0.05805685371160507,
"logits/rejected": -0.06056814268231392,
"logps/chosen": -88.78871154785156,
"logps/rejected": -124.3318862915039,
"loss": 0.6218,
"pred_label": 0.0,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.3281395435333252,
"rewards/margins": 0.28538644313812256,
"rewards/rejected": -0.613525927066803,
"step": 210,
"use_label": 0.0
},
{
"epoch": 0.46,
"grad_norm": 2.21875,
"learning_rate": 3.265574537815398e-06,
"logits/chosen": -0.1400775909423828,
"logits/rejected": -0.005620801355689764,
"logps/chosen": -133.7158660888672,
"logps/rejected": -136.84619140625,
"loss": 0.627,
"pred_label": 0.0,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.5408719778060913,
"rewards/margins": 0.16390959918498993,
"rewards/rejected": -0.7047815918922424,
"step": 220,
"use_label": 0.0
},
{
"epoch": 0.48,
"grad_norm": 1.8515625,
"learning_rate": 3.089397338773569e-06,
"logits/chosen": 0.16266627609729767,
"logits/rejected": 0.2626825273036957,
"logps/chosen": -93.3644027709961,
"logps/rejected": -119.67996978759766,
"loss": 0.6261,
"pred_label": 0.0,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.28929832577705383,
"rewards/margins": 0.27991363406181335,
"rewards/rejected": -0.5692119598388672,
"step": 230,
"use_label": 0.0
},
{
"epoch": 0.5,
"grad_norm": 1.8984375,
"learning_rate": 2.9100607788275547e-06,
"logits/chosen": 0.854693591594696,
"logits/rejected": 0.7261193990707397,
"logps/chosen": -99.00528717041016,
"logps/rejected": -135.73580932617188,
"loss": 0.6295,
"pred_label": 0.0,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.2997274696826935,
"rewards/margins": 0.3153937757015228,
"rewards/rejected": -0.6151211857795715,
"step": 240,
"use_label": 0.0
},
{
"epoch": 0.52,
"grad_norm": 2.03125,
"learning_rate": 2.72852616010567e-06,
"logits/chosen": 0.6816203594207764,
"logits/rejected": 0.7033491134643555,
"logps/chosen": -119.7255859375,
"logps/rejected": -144.8857421875,
"loss": 0.6376,
"pred_label": 0.0,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -0.4632648825645447,
"rewards/margins": 0.2932681143283844,
"rewards/rejected": -0.7565330266952515,
"step": 250,
"use_label": 0.0
},
{
"epoch": 0.54,
"grad_norm": 1.8984375,
"learning_rate": 2.5457665670441937e-06,
"logits/chosen": 0.5938165187835693,
"logits/rejected": 0.5592354536056519,
"logps/chosen": -110.32804870605469,
"logps/rejected": -146.76275634765625,
"loss": 0.6162,
"pred_label": 0.0,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.44222426414489746,
"rewards/margins": 0.2809238135814667,
"rewards/rejected": -0.7231480479240417,
"step": 260,
"use_label": 0.0
},
{
"epoch": 0.57,
"grad_norm": 2.90625,
"learning_rate": 2.3627616503391813e-06,
"logits/chosen": 0.6390979290008545,
"logits/rejected": 0.5789315700531006,
"logps/chosen": -123.83528137207031,
"logps/rejected": -144.61489868164062,
"loss": 0.6162,
"pred_label": 0.0,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.5091949701309204,
"rewards/margins": 0.24320097267627716,
"rewards/rejected": -0.7523959279060364,
"step": 270,
"use_label": 0.0
},
{
"epoch": 0.59,
"grad_norm": 2.34375,
"learning_rate": 2.1804923757009885e-06,
"logits/chosen": 0.8771865963935852,
"logits/rejected": 1.0158352851867676,
"logps/chosen": -118.5296859741211,
"logps/rejected": -138.31729125976562,
"loss": 0.6357,
"pred_label": 0.0,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.5302416086196899,
"rewards/margins": 0.2237352430820465,
"rewards/rejected": -0.7539768218994141,
"step": 280,
"use_label": 0.0
},
{
"epoch": 0.61,
"grad_norm": 2.59375,
"learning_rate": 1.9999357655598894e-06,
"logits/chosen": 0.44083184003829956,
"logits/rejected": 0.41123947501182556,
"logps/chosen": -112.27372741699219,
"logps/rejected": -146.95498657226562,
"loss": 0.6228,
"pred_label": 0.0,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.4572528004646301,
"rewards/margins": 0.24868395924568176,
"rewards/rejected": -0.7059367299079895,
"step": 290,
"use_label": 0.0
},
{
"epoch": 0.63,
"grad_norm": 2.34375,
"learning_rate": 1.8220596619089576e-06,
"logits/chosen": 0.6273639798164368,
"logits/rejected": 0.5140804052352905,
"logps/chosen": -123.02046966552734,
"logps/rejected": -168.80987548828125,
"loss": 0.6196,
"pred_label": 0.0,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.4542613625526428,
"rewards/margins": 0.2926333546638489,
"rewards/rejected": -0.7468947172164917,
"step": 300,
"use_label": 0.0
},
{
"epoch": 0.63,
"eval_logits/chosen": 1.0944873094558716,
"eval_logits/rejected": 1.1831356287002563,
"eval_logps/chosen": -102.62176513671875,
"eval_logps/rejected": -150.12503051757812,
"eval_loss": 0.618873655796051,
"eval_pred_label": 0.0,
"eval_rewards/accuracies": 0.375,
"eval_rewards/chosen": -0.3870951533317566,
"eval_rewards/margins": 0.34222573041915894,
"eval_rewards/rejected": -0.7293209433555603,
"eval_runtime": 125.4362,
"eval_samples_per_second": 15.944,
"eval_steps_per_second": 0.255,
"eval_use_label": 0.0,
"step": 300
},
{
"epoch": 0.65,
"grad_norm": 1.8515625,
"learning_rate": 1.647817538357072e-06,
"logits/chosen": 0.8131985664367676,
"logits/rejected": 0.8752232789993286,
"logps/chosen": -91.52378845214844,
"logps/rejected": -139.95840454101562,
"loss": 0.5999,
"pred_label": 0.0,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.3592718541622162,
"rewards/margins": 0.3578081727027893,
"rewards/rejected": -0.7170799970626831,
"step": 310,
"use_label": 0.0
},
{
"epoch": 0.67,
"grad_norm": 2.40625,
"learning_rate": 1.4781433892011132e-06,
"logits/chosen": 0.9751952886581421,
"logits/rejected": 1.1630818843841553,
"logps/chosen": -135.82566833496094,
"logps/rejected": -168.11805725097656,
"loss": 0.6109,
"pred_label": 0.0,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.6275521516799927,
"rewards/margins": 0.3816707730293274,
"rewards/rejected": -1.0092228651046753,
"step": 320,
"use_label": 0.0
},
{
"epoch": 0.69,
"grad_norm": 1.984375,
"learning_rate": 1.3139467229135999e-06,
"logits/chosen": 1.3293979167938232,
"logits/rejected": 1.3260401487350464,
"logps/chosen": -135.96664428710938,
"logps/rejected": -166.52359008789062,
"loss": 0.6295,
"pred_label": 0.0,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.6585850715637207,
"rewards/margins": 0.3205706775188446,
"rewards/rejected": -0.9791557192802429,
"step": 330,
"use_label": 0.0
},
{
"epoch": 0.71,
"grad_norm": 2.09375,
"learning_rate": 1.1561076868822756e-06,
"logits/chosen": 0.7383319139480591,
"logits/rejected": 0.6407849192619324,
"logps/chosen": -150.60504150390625,
"logps/rejected": -166.74940490722656,
"loss": 0.6247,
"pred_label": 0.0,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.658658504486084,
"rewards/margins": 0.24373307824134827,
"rewards/rejected": -0.9023915529251099,
"step": 340,
"use_label": 0.0
},
{
"epoch": 0.73,
"grad_norm": 2.21875,
"learning_rate": 1.0054723495346484e-06,
"logits/chosen": 0.6359546184539795,
"logits/rejected": 0.7167641520500183,
"logps/chosen": -163.8385772705078,
"logps/rejected": -195.6297607421875,
"loss": 0.6138,
"pred_label": 0.0,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.7442194819450378,
"rewards/margins": 0.3593973219394684,
"rewards/rejected": -1.103616714477539,
"step": 350,
"use_label": 0.0
},
{
"epoch": 0.75,
"grad_norm": 1.859375,
"learning_rate": 8.628481651367876e-07,
"logits/chosen": 0.7298086881637573,
"logits/rejected": 0.8517257571220398,
"logps/chosen": -119.41548156738281,
"logps/rejected": -165.3460235595703,
"loss": 0.6137,
"pred_label": 0.0,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -0.5577787160873413,
"rewards/margins": 0.37339919805526733,
"rewards/rejected": -0.9311779141426086,
"step": 360,
"use_label": 0.0
},
{
"epoch": 0.77,
"grad_norm": 2.421875,
"learning_rate": 7.289996455765749e-07,
"logits/chosen": 0.8383787274360657,
"logits/rejected": 0.9305205345153809,
"logps/chosen": -111.84449768066406,
"logps/rejected": -153.93136596679688,
"loss": 0.6125,
"pred_label": 0.0,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.46409696340560913,
"rewards/margins": 0.39606258273124695,
"rewards/rejected": -0.8601595759391785,
"step": 370,
"use_label": 0.0
},
{
"epoch": 0.8,
"grad_norm": 1.8984375,
"learning_rate": 6.046442623320145e-07,
"logits/chosen": 0.5329448580741882,
"logits/rejected": 0.513522744178772,
"logps/chosen": -116.62841796875,
"logps/rejected": -165.17893981933594,
"loss": 0.6191,
"pred_label": 0.0,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.5079302787780762,
"rewards/margins": 0.2802185118198395,
"rewards/rejected": -0.7881487607955933,
"step": 380,
"use_label": 0.0
},
{
"epoch": 0.82,
"grad_norm": 2.4375,
"learning_rate": 4.904486005914027e-07,
"logits/chosen": 0.8266662359237671,
"logits/rejected": 0.5234752893447876,
"logps/chosen": -159.83407592773438,
"logps/rejected": -186.96768188476562,
"loss": 0.6085,
"pred_label": 0.0,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.6701575517654419,
"rewards/margins": 0.36982032656669617,
"rewards/rejected": -1.039977788925171,
"step": 390,
"use_label": 0.0
},
{
"epoch": 0.84,
"grad_norm": 2.46875,
"learning_rate": 3.8702478614051353e-07,
"logits/chosen": 0.511390745639801,
"logits/rejected": 0.6720080971717834,
"logps/chosen": -116.7987060546875,
"logps/rejected": -141.3931884765625,
"loss": 0.6139,
"pred_label": 0.0,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -0.4430771768093109,
"rewards/margins": 0.3362268805503845,
"rewards/rejected": -0.779304027557373,
"step": 400,
"use_label": 0.0
},
{
"epoch": 0.84,
"eval_logits/chosen": 1.4532994031906128,
"eval_logits/rejected": 1.5453113317489624,
"eval_logps/chosen": -112.56050109863281,
"eval_logps/rejected": -162.19764709472656,
"eval_loss": 0.6157013177871704,
"eval_pred_label": 0.0,
"eval_rewards/accuracies": 0.37109375,
"eval_rewards/chosen": -0.4864824414253235,
"eval_rewards/margins": 0.36356455087661743,
"eval_rewards/rejected": -0.8500469923019409,
"eval_runtime": 125.4203,
"eval_samples_per_second": 15.946,
"eval_steps_per_second": 0.255,
"eval_use_label": 0.0,
"step": 400
},
{
"epoch": 0.86,
"grad_norm": 2.203125,
"learning_rate": 2.9492720416985004e-07,
"logits/chosen": 0.8359997868537903,
"logits/rejected": 0.8144146800041199,
"logps/chosen": -110.30177307128906,
"logps/rejected": -143.6800079345703,
"loss": 0.6222,
"pred_label": 0.0,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.4887877404689789,
"rewards/margins": 0.3508199453353882,
"rewards/rejected": -0.8396075963973999,
"step": 410,
"use_label": 0.0
},
{
"epoch": 0.88,
"grad_norm": 1.984375,
"learning_rate": 2.1464952759020857e-07,
"logits/chosen": 1.027252435684204,
"logits/rejected": 0.9827619791030884,
"logps/chosen": -106.49784851074219,
"logps/rejected": -116.97566223144531,
"loss": 0.6216,
"pred_label": 0.0,
"rewards/accuracies": 0.2750000059604645,
"rewards/chosen": -0.4555872976779938,
"rewards/margins": 0.20033884048461914,
"rewards/rejected": -0.6559261083602905,
"step": 420,
"use_label": 0.0
},
{
"epoch": 0.9,
"grad_norm": 1.96875,
"learning_rate": 1.4662207078575685e-07,
"logits/chosen": 0.9206047058105469,
"logits/rejected": 0.8673297166824341,
"logps/chosen": -151.376220703125,
"logps/rejected": -178.04725646972656,
"loss": 0.5986,
"pred_label": 0.0,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.5210937261581421,
"rewards/margins": 0.46580758690834045,
"rewards/rejected": -0.9869012832641602,
"step": 430,
"use_label": 0.0
},
{
"epoch": 0.92,
"grad_norm": 2.125,
"learning_rate": 9.120948298936422e-08,
"logits/chosen": 0.9004503488540649,
"logits/rejected": 1.0573413372039795,
"logps/chosen": -119.21500396728516,
"logps/rejected": -165.19241333007812,
"loss": 0.6064,
"pred_label": 0.0,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.5231102705001831,
"rewards/margins": 0.37818416953086853,
"rewards/rejected": -0.9012944102287292,
"step": 440,
"use_label": 0.0
},
{
"epoch": 0.94,
"grad_norm": 2.46875,
"learning_rate": 4.870879364444109e-08,
"logits/chosen": 1.300728440284729,
"logits/rejected": 1.0580918788909912,
"logps/chosen": -129.29281616210938,
"logps/rejected": -178.3690948486328,
"loss": 0.6111,
"pred_label": 0.0,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.570349931716919,
"rewards/margins": 0.3304445147514343,
"rewards/rejected": -0.9007943868637085,
"step": 450,
"use_label": 0.0
},
{
"epoch": 0.96,
"grad_norm": 1.8359375,
"learning_rate": 1.93478202307823e-08,
"logits/chosen": 1.1906068325042725,
"logits/rejected": 1.2149587869644165,
"logps/chosen": -83.74864196777344,
"logps/rejected": -130.91348266601562,
"loss": 0.6154,
"pred_label": 0.0,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.3762877583503723,
"rewards/margins": 0.2993956208229065,
"rewards/rejected": -0.6756833791732788,
"step": 460,
"use_label": 0.0
},
{
"epoch": 0.98,
"grad_norm": 2.375,
"learning_rate": 3.283947088983663e-09,
"logits/chosen": 1.1844379901885986,
"logits/rejected": 0.9474547505378723,
"logps/chosen": -113.1079330444336,
"logps/rejected": -141.49147033691406,
"loss": 0.6213,
"pred_label": 0.0,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.4577876627445221,
"rewards/margins": 0.26655709743499756,
"rewards/rejected": -0.7243447303771973,
"step": 470,
"use_label": 0.0
},
{
"epoch": 1.0,
"step": 477,
"total_flos": 0.0,
"train_loss": 0.6357159084743924,
"train_runtime": 9601.7268,
"train_samples_per_second": 6.367,
"train_steps_per_second": 0.05
}
],
"logging_steps": 10,
"max_steps": 477,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}