|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.99562408835174, |
|
"eval_steps": 200, |
|
"global_step": 1797, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 22.077117919921875, |
|
"learning_rate": 9.999251052313705e-06, |
|
"logits/chosen": 0.9130447506904602, |
|
"logits/rejected": 0.9451152682304382, |
|
"logps/chosen": -119.4351806640625, |
|
"logps/rejected": -146.38978576660156, |
|
"loss": 2.3628, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 23.45577621459961, |
|
"rewards/margins": 1.552248477935791, |
|
"rewards/rejected": 21.903528213500977, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 22.769922256469727, |
|
"learning_rate": 9.996974102027961e-06, |
|
"logits/chosen": 0.5303283929824829, |
|
"logits/rejected": 0.5954318642616272, |
|
"logps/chosen": -131.7035369873047, |
|
"logps/rejected": -162.93435668945312, |
|
"loss": 2.0403, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 23.408517837524414, |
|
"rewards/margins": 3.0596365928649902, |
|
"rewards/rejected": 20.3488826751709, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 34.49003601074219, |
|
"learning_rate": 9.993618904787861e-06, |
|
"logits/chosen": 0.23024284839630127, |
|
"logits/rejected": 0.2893335521221161, |
|
"logps/chosen": -135.048828125, |
|
"logps/rejected": -175.6779022216797, |
|
"loss": 1.5462, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 22.8612003326416, |
|
"rewards/margins": 3.707317352294922, |
|
"rewards/rejected": 19.153881072998047, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 26.99772071838379, |
|
"learning_rate": 9.98844090765316e-06, |
|
"logits/chosen": -0.12471504509449005, |
|
"logits/rejected": -0.05195974186062813, |
|
"logps/chosen": -137.30029296875, |
|
"logps/rejected": -179.8270721435547, |
|
"loss": 1.6569, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 21.461271286010742, |
|
"rewards/margins": 3.4389548301696777, |
|
"rewards/rejected": 18.022315979003906, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 39.197853088378906, |
|
"learning_rate": 9.98173813574765e-06, |
|
"logits/chosen": -0.5969198346138, |
|
"logits/rejected": -0.5180742144584656, |
|
"logps/chosen": -145.24537658691406, |
|
"logps/rejected": -207.34591674804688, |
|
"loss": 1.5324, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 21.160579681396484, |
|
"rewards/margins": 5.194663047790527, |
|
"rewards/rejected": 15.965914726257324, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 28.039043426513672, |
|
"learning_rate": 9.973512637851239e-06, |
|
"logits/chosen": -0.972032904624939, |
|
"logits/rejected": -0.9215275049209595, |
|
"logps/chosen": -159.76718139648438, |
|
"logps/rejected": -216.4589080810547, |
|
"loss": 1.515, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 19.871841430664062, |
|
"rewards/margins": 5.058460235595703, |
|
"rewards/rejected": 14.813380241394043, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 11.507994651794434, |
|
"learning_rate": 9.963766928182676e-06, |
|
"logits/chosen": -1.2355167865753174, |
|
"logits/rejected": -1.1757726669311523, |
|
"logps/chosen": -153.20492553710938, |
|
"logps/rejected": -238.68008422851562, |
|
"loss": 1.1533, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 18.97603988647461, |
|
"rewards/margins": 6.053138732910156, |
|
"rewards/rejected": 12.922900199890137, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 8.204204559326172, |
|
"learning_rate": 9.952503985631063e-06, |
|
"logits/chosen": -1.262416958808899, |
|
"logits/rejected": -1.1802202463150024, |
|
"logps/chosen": -134.965576171875, |
|
"logps/rejected": -221.21835327148438, |
|
"loss": 1.0608, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 20.67170524597168, |
|
"rewards/margins": 6.517201900482178, |
|
"rewards/rejected": 14.154504776000977, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 27.41866111755371, |
|
"learning_rate": 9.939727252845304e-06, |
|
"logits/chosen": -0.9036836624145508, |
|
"logits/rejected": -0.8652833104133606, |
|
"logps/chosen": -141.32794189453125, |
|
"logps/rejected": -192.92916870117188, |
|
"loss": 1.2558, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 21.43319320678711, |
|
"rewards/margins": 5.654860496520996, |
|
"rewards/rejected": 15.77833080291748, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 31.713287353515625, |
|
"learning_rate": 9.925440635181834e-06, |
|
"logits/chosen": -0.7909407615661621, |
|
"logits/rejected": -0.7640722393989563, |
|
"logps/chosen": -128.05874633789062, |
|
"logps/rejected": -197.05027770996094, |
|
"loss": 1.3031, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 21.722820281982422, |
|
"rewards/margins": 5.5480637550354, |
|
"rewards/rejected": 16.174755096435547, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 5.262263298034668, |
|
"learning_rate": 9.909648499510903e-06, |
|
"logits/chosen": -0.7350383400917053, |
|
"logits/rejected": -0.7220867872238159, |
|
"logps/chosen": -141.88296508789062, |
|
"logps/rejected": -192.28868103027344, |
|
"loss": 1.2729, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 20.970355987548828, |
|
"rewards/margins": 6.049419403076172, |
|
"rewards/rejected": 14.920933723449707, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 20.070709228515625, |
|
"learning_rate": 9.892355672881781e-06, |
|
"logits/chosen": -0.8407080769538879, |
|
"logits/rejected": -0.8053030967712402, |
|
"logps/chosen": -129.39187622070312, |
|
"logps/rejected": -197.65792846679688, |
|
"loss": 1.4401, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 22.391189575195312, |
|
"rewards/margins": 6.017077445983887, |
|
"rewards/rejected": 16.374113082885742, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 15.006941795349121, |
|
"learning_rate": 9.873567441047321e-06, |
|
"logits/chosen": -0.965476393699646, |
|
"logits/rejected": -0.8994636535644531, |
|
"logps/chosen": -115.93741607666016, |
|
"logps/rejected": -211.8863983154297, |
|
"loss": 0.9361, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 23.738582611083984, |
|
"rewards/margins": 7.023127555847168, |
|
"rewards/rejected": 16.715452194213867, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 21.54697608947754, |
|
"learning_rate": 9.853289546848304e-06, |
|
"logits/chosen": -1.1242530345916748, |
|
"logits/rejected": -1.0690996646881104, |
|
"logps/chosen": -136.72805786132812, |
|
"logps/rejected": -215.2025146484375, |
|
"loss": 0.994, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 21.464555740356445, |
|
"rewards/margins": 6.428616523742676, |
|
"rewards/rejected": 15.035940170288086, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 36.22259521484375, |
|
"learning_rate": 9.83152818845808e-06, |
|
"logits/chosen": -1.238879680633545, |
|
"logits/rejected": -1.212425947189331, |
|
"logps/chosen": -135.1555938720703, |
|
"logps/rejected": -223.7063751220703, |
|
"loss": 1.5207, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 21.200214385986328, |
|
"rewards/margins": 6.609138488769531, |
|
"rewards/rejected": 14.59107780456543, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 5.652963161468506, |
|
"learning_rate": 9.808290017488018e-06, |
|
"logits/chosen": -1.2760050296783447, |
|
"logits/rejected": -1.229891061782837, |
|
"logps/chosen": -124.10953521728516, |
|
"logps/rejected": -205.0720977783203, |
|
"loss": 1.1957, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 21.773719787597656, |
|
"rewards/margins": 6.910721778869629, |
|
"rewards/rejected": 14.862997055053711, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 7.20241117477417, |
|
"learning_rate": 9.783582136954363e-06, |
|
"logits/chosen": -1.022456169128418, |
|
"logits/rejected": -1.0098379850387573, |
|
"logps/chosen": -138.91571044921875, |
|
"logps/rejected": -193.36270141601562, |
|
"loss": 1.1055, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 21.772497177124023, |
|
"rewards/margins": 7.529877662658691, |
|
"rewards/rejected": 14.242624282836914, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 23.037525177001953, |
|
"learning_rate": 9.757412099107122e-06, |
|
"logits/chosen": -1.0896828174591064, |
|
"logits/rejected": -1.049005150794983, |
|
"logps/chosen": -126.07826232910156, |
|
"logps/rejected": -211.6298370361328, |
|
"loss": 1.1861, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 22.460693359375, |
|
"rewards/margins": 7.169064998626709, |
|
"rewards/rejected": 15.29162883758545, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 29.62506866455078, |
|
"learning_rate": 9.72978790312163e-06, |
|
"logits/chosen": -0.8976786732673645, |
|
"logits/rejected": -0.8687394857406616, |
|
"logps/chosen": -119.01469421386719, |
|
"logps/rejected": -198.20928955078125, |
|
"loss": 0.9781, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 23.478139877319336, |
|
"rewards/margins": 6.4206695556640625, |
|
"rewards/rejected": 17.057470321655273, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 36.63336181640625, |
|
"learning_rate": 9.700717992653505e-06, |
|
"logits/chosen": -1.0064117908477783, |
|
"logits/rejected": -0.9567538499832153, |
|
"logps/chosen": -119.5957260131836, |
|
"logps/rejected": -198.21707153320312, |
|
"loss": 1.1726, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 22.104581832885742, |
|
"rewards/margins": 6.333141803741455, |
|
"rewards/rejected": 15.771438598632812, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_logits/chosen": -0.7826857566833496, |
|
"eval_logits/rejected": -0.7607142925262451, |
|
"eval_logps/chosen": -162.48268127441406, |
|
"eval_logps/rejected": -199.3195037841797, |
|
"eval_loss": 2.607863664627075, |
|
"eval_rewards/accuracies": 0.5929077863693237, |
|
"eval_rewards/chosen": 14.876009941101074, |
|
"eval_rewards/margins": 1.181178092956543, |
|
"eval_rewards/rejected": 13.694831848144531, |
|
"eval_runtime": 941.4569, |
|
"eval_samples_per_second": 0.749, |
|
"eval_steps_per_second": 0.749, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 5.637684345245361, |
|
"learning_rate": 9.670211253257753e-06, |
|
"logits/chosen": -1.359417200088501, |
|
"logits/rejected": -1.3465789556503296, |
|
"logps/chosen": -123.56754302978516, |
|
"logps/rejected": -221.25131225585938, |
|
"loss": 0.8259, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 22.563716888427734, |
|
"rewards/margins": 8.354880332946777, |
|
"rewards/rejected": 14.208834648132324, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 18.743879318237305, |
|
"learning_rate": 9.638277009672787e-06, |
|
"logits/chosen": -1.294345736503601, |
|
"logits/rejected": -1.2438859939575195, |
|
"logps/chosen": -120.88092041015625, |
|
"logps/rejected": -219.56845092773438, |
|
"loss": 0.9568, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 22.052473068237305, |
|
"rewards/margins": 7.690165042877197, |
|
"rewards/rejected": 14.36230754852295, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 4.425251483917236, |
|
"learning_rate": 9.604925022970226e-06, |
|
"logits/chosen": -1.4860092401504517, |
|
"logits/rejected": -1.4260871410369873, |
|
"logps/chosen": -116.0375747680664, |
|
"logps/rejected": -233.6222381591797, |
|
"loss": 0.8266, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 22.423358917236328, |
|
"rewards/margins": 7.869799613952637, |
|
"rewards/rejected": 14.553558349609375, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 15.595026969909668, |
|
"learning_rate": 9.570165487571295e-06, |
|
"logits/chosen": -1.29686439037323, |
|
"logits/rejected": -1.2336632013320923, |
|
"logps/chosen": -114.66429138183594, |
|
"logps/rejected": -219.73941040039062, |
|
"loss": 0.9711, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 23.410022735595703, |
|
"rewards/margins": 8.979305267333984, |
|
"rewards/rejected": 14.430717468261719, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 8.037131309509277, |
|
"learning_rate": 9.534009028130791e-06, |
|
"logits/chosen": -1.3593500852584839, |
|
"logits/rejected": -1.3431365489959717, |
|
"logps/chosen": -116.7984619140625, |
|
"logps/rejected": -203.38714599609375, |
|
"loss": 1.0554, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 22.437034606933594, |
|
"rewards/margins": 7.695396423339844, |
|
"rewards/rejected": 14.74163818359375, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 24.600027084350586, |
|
"learning_rate": 9.496466696289533e-06, |
|
"logits/chosen": -1.296525478363037, |
|
"logits/rejected": -1.2403943538665771, |
|
"logps/chosen": -125.29974365234375, |
|
"logps/rejected": -220.273193359375, |
|
"loss": 1.0706, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 22.99033546447754, |
|
"rewards/margins": 7.625124454498291, |
|
"rewards/rejected": 15.365211486816406, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 12.19877815246582, |
|
"learning_rate": 9.4575499672963e-06, |
|
"logits/chosen": -1.330751657485962, |
|
"logits/rejected": -1.2963988780975342, |
|
"logps/chosen": -121.9324951171875, |
|
"logps/rejected": -204.8725128173828, |
|
"loss": 1.0879, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 22.846813201904297, |
|
"rewards/margins": 6.9288506507873535, |
|
"rewards/rejected": 15.917961120605469, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 61.58235168457031, |
|
"learning_rate": 9.417270736500284e-06, |
|
"logits/chosen": -1.2155901193618774, |
|
"logits/rejected": -1.1836094856262207, |
|
"logps/chosen": -120.52559661865234, |
|
"logps/rejected": -213.6286163330078, |
|
"loss": 1.044, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 22.38599967956543, |
|
"rewards/margins": 7.965991973876953, |
|
"rewards/rejected": 14.420007705688477, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 27.1291446685791, |
|
"learning_rate": 9.375641315715147e-06, |
|
"logits/chosen": -1.4014637470245361, |
|
"logits/rejected": -1.3697010278701782, |
|
"logps/chosen": -120.2726821899414, |
|
"logps/rejected": -248.0132293701172, |
|
"loss": 1.1305, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 23.45905113220215, |
|
"rewards/margins": 9.868653297424316, |
|
"rewards/rejected": 13.5903959274292, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.111963748931885, |
|
"learning_rate": 9.332674429455762e-06, |
|
"logits/chosen": -1.3501853942871094, |
|
"logits/rejected": -1.3238388299942017, |
|
"logps/chosen": -120.29520416259766, |
|
"logps/rejected": -219.8441619873047, |
|
"loss": 0.9532, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 23.571392059326172, |
|
"rewards/margins": 10.177915573120117, |
|
"rewards/rejected": 13.393475532531738, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 19.904033660888672, |
|
"learning_rate": 9.288383211048827e-06, |
|
"logits/chosen": -1.3211156129837036, |
|
"logits/rejected": -1.2947032451629639, |
|
"logps/chosen": -108.23663330078125, |
|
"logps/rejected": -220.5325164794922, |
|
"loss": 0.8943, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 23.694904327392578, |
|
"rewards/margins": 8.900300025939941, |
|
"rewards/rejected": 14.794604301452637, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 5.557188987731934, |
|
"learning_rate": 9.242781198618508e-06, |
|
"logits/chosen": -1.4145915508270264, |
|
"logits/rejected": -1.4117708206176758, |
|
"logps/chosen": -116.71977233886719, |
|
"logps/rejected": -205.55245971679688, |
|
"loss": 0.8282, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 24.738483428955078, |
|
"rewards/margins": 9.999399185180664, |
|
"rewards/rejected": 14.739084243774414, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 38.25541687011719, |
|
"learning_rate": 9.195882330948351e-06, |
|
"logits/chosen": -1.4355194568634033, |
|
"logits/rejected": -1.430084228515625, |
|
"logps/chosen": -116.34031677246094, |
|
"logps/rejected": -221.4161376953125, |
|
"loss": 0.7625, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 24.25949478149414, |
|
"rewards/margins": 10.02813720703125, |
|
"rewards/rejected": 14.231356620788574, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 40.876747131347656, |
|
"learning_rate": 9.147700943220737e-06, |
|
"logits/chosen": -1.4224138259887695, |
|
"logits/rejected": -1.3659865856170654, |
|
"logps/chosen": -110.4036865234375, |
|
"logps/rejected": -227.19296264648438, |
|
"loss": 0.792, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 24.380083084106445, |
|
"rewards/margins": 9.536338806152344, |
|
"rewards/rejected": 14.843744277954102, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 31.962615966796875, |
|
"learning_rate": 9.098251762635162e-06, |
|
"logits/chosen": -1.1947047710418701, |
|
"logits/rejected": -1.1642345190048218, |
|
"logps/chosen": -112.7500991821289, |
|
"logps/rejected": -222.45657348632812, |
|
"loss": 1.1409, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 23.021337509155273, |
|
"rewards/margins": 8.003363609313965, |
|
"rewards/rejected": 15.017971992492676, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 22.922439575195312, |
|
"learning_rate": 9.047549903906704e-06, |
|
"logits/chosen": -1.4528108835220337, |
|
"logits/rejected": -1.4263083934783936, |
|
"logps/chosen": -112.0550537109375, |
|
"logps/rejected": -224.366943359375, |
|
"loss": 0.7604, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 24.369909286499023, |
|
"rewards/margins": 9.932126998901367, |
|
"rewards/rejected": 14.437784194946289, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 15.393943786621094, |
|
"learning_rate": 8.99561086464603e-06, |
|
"logits/chosen": -1.4283573627471924, |
|
"logits/rejected": -1.4220378398895264, |
|
"logps/chosen": -102.64058685302734, |
|
"logps/rejected": -220.1312713623047, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 24.416704177856445, |
|
"rewards/margins": 9.940857887268066, |
|
"rewards/rejected": 14.475845336914062, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 31.526750564575195, |
|
"learning_rate": 8.942450520622371e-06, |
|
"logits/chosen": -1.4221882820129395, |
|
"logits/rejected": -1.4053022861480713, |
|
"logps/chosen": -96.04936218261719, |
|
"logps/rejected": -219.9590301513672, |
|
"loss": 0.8734, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 25.633686065673828, |
|
"rewards/margins": 10.437232971191406, |
|
"rewards/rejected": 15.196454048156738, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 21.012407302856445, |
|
"learning_rate": 8.888085120910917e-06, |
|
"logits/chosen": -1.5149024724960327, |
|
"logits/rejected": -1.5197780132293701, |
|
"logps/chosen": -121.2115249633789, |
|
"logps/rejected": -217.0883026123047, |
|
"loss": 0.9286, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 23.104969024658203, |
|
"rewards/margins": 8.534513473510742, |
|
"rewards/rejected": 14.570454597473145, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 31.061025619506836, |
|
"learning_rate": 8.83253128292609e-06, |
|
"logits/chosen": -1.3974319696426392, |
|
"logits/rejected": -1.365886926651001, |
|
"logps/chosen": -106.92268371582031, |
|
"logps/rejected": -213.5463409423828, |
|
"loss": 1.0028, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 23.568645477294922, |
|
"rewards/margins": 9.186344146728516, |
|
"rewards/rejected": 14.382299423217773, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_logits/chosen": -0.8742575645446777, |
|
"eval_logits/rejected": -0.8668540120124817, |
|
"eval_logps/chosen": -161.5126495361328, |
|
"eval_logps/rejected": -197.9254608154297, |
|
"eval_loss": 2.674311876296997, |
|
"eval_rewards/accuracies": 0.5843971371650696, |
|
"eval_rewards/chosen": 14.97301197052002, |
|
"eval_rewards/margins": 1.1387754678726196, |
|
"eval_rewards/rejected": 13.834238052368164, |
|
"eval_runtime": 926.938, |
|
"eval_samples_per_second": 0.761, |
|
"eval_steps_per_second": 0.761, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.5695064067840576, |
|
"learning_rate": 8.77580598734224e-06, |
|
"logits/chosen": -1.3901078701019287, |
|
"logits/rejected": -1.361106514930725, |
|
"logps/chosen": -100.05810546875, |
|
"logps/rejected": -217.40853881835938, |
|
"loss": 0.8265, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 24.2711181640625, |
|
"rewards/margins": 9.111645698547363, |
|
"rewards/rejected": 15.159472465515137, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 37.29205322265625, |
|
"learning_rate": 8.717926572903315e-06, |
|
"logits/chosen": -1.4033088684082031, |
|
"logits/rejected": -1.3668584823608398, |
|
"logps/chosen": -95.80254364013672, |
|
"logps/rejected": -216.8607635498047, |
|
"loss": 0.8961, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 25.137277603149414, |
|
"rewards/margins": 9.997251510620117, |
|
"rewards/rejected": 15.140027046203613, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 13.271622657775879, |
|
"learning_rate": 8.658910731123056e-06, |
|
"logits/chosen": -1.2801754474639893, |
|
"logits/rejected": -1.2473911046981812, |
|
"logps/chosen": -97.76548767089844, |
|
"logps/rejected": -216.43551635742188, |
|
"loss": 0.8534, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 24.32241439819336, |
|
"rewards/margins": 9.534911155700684, |
|
"rewards/rejected": 14.787504196166992, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 38.644073486328125, |
|
"learning_rate": 8.598776500877398e-06, |
|
"logits/chosen": -1.557314157485962, |
|
"logits/rejected": -1.5338244438171387, |
|
"logps/chosen": -101.72956085205078, |
|
"logps/rejected": -228.32504272460938, |
|
"loss": 0.7487, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 24.7454891204834, |
|
"rewards/margins": 11.237323760986328, |
|
"rewards/rejected": 13.50816535949707, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 3.0210349559783936, |
|
"learning_rate": 8.537542262890664e-06, |
|
"logits/chosen": -1.6236152648925781, |
|
"logits/rejected": -1.5971102714538574, |
|
"logps/chosen": -95.90837097167969, |
|
"logps/rejected": -224.8739471435547, |
|
"loss": 0.6576, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 26.812732696533203, |
|
"rewards/margins": 12.709887504577637, |
|
"rewards/rejected": 14.102846145629883, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 31.46013641357422, |
|
"learning_rate": 8.475226734117293e-06, |
|
"logits/chosen": -1.275268793106079, |
|
"logits/rejected": -1.2527453899383545, |
|
"logps/chosen": -90.21646881103516, |
|
"logps/rejected": -206.49935913085938, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 25.937389373779297, |
|
"rewards/margins": 10.999462127685547, |
|
"rewards/rejected": 14.93792724609375, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 8.342381477355957, |
|
"learning_rate": 8.411848962020786e-06, |
|
"logits/chosen": -1.283499002456665, |
|
"logits/rejected": -1.2215421199798584, |
|
"logps/chosen": -92.12187194824219, |
|
"logps/rejected": -205.30447387695312, |
|
"loss": 0.7592, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 25.408042907714844, |
|
"rewards/margins": 9.172635078430176, |
|
"rewards/rejected": 16.235408782958984, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 21.772287368774414, |
|
"learning_rate": 8.347428318751623e-06, |
|
"logits/chosen": -1.44618821144104, |
|
"logits/rejected": -1.3733718395233154, |
|
"logps/chosen": -84.4385986328125, |
|
"logps/rejected": -211.10861206054688, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 26.974166870117188, |
|
"rewards/margins": 11.247968673706055, |
|
"rewards/rejected": 15.726198196411133, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.757871389389038, |
|
"learning_rate": 8.281984495225938e-06, |
|
"logits/chosen": -1.810275673866272, |
|
"logits/rejected": -1.757764220237732, |
|
"logps/chosen": -85.5127182006836, |
|
"logps/rejected": -223.26803588867188, |
|
"loss": 0.8267, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 26.434656143188477, |
|
"rewards/margins": 11.342155456542969, |
|
"rewards/rejected": 15.092500686645508, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 23.952486038208008, |
|
"learning_rate": 8.215537495106781e-06, |
|
"logits/chosen": -2.0263965129852295, |
|
"logits/rejected": -1.9776378870010376, |
|
"logps/chosen": -78.3379898071289, |
|
"logps/rejected": -242.8697967529297, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 28.698970794677734, |
|
"rewards/margins": 14.983367919921875, |
|
"rewards/rejected": 13.715606689453125, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 27.391632080078125, |
|
"learning_rate": 8.148107628689736e-06, |
|
"logits/chosen": -1.8597463369369507, |
|
"logits/rejected": -1.8375170230865479, |
|
"logps/chosen": -97.95671844482422, |
|
"logps/rejected": -241.65005493164062, |
|
"loss": 0.8206, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 25.743389129638672, |
|
"rewards/margins": 13.008593559265137, |
|
"rewards/rejected": 12.734795570373535, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 32.186092376708984, |
|
"learning_rate": 8.07971550669487e-06, |
|
"logits/chosen": -1.7367337942123413, |
|
"logits/rejected": -1.6837384700775146, |
|
"logps/chosen": -97.98736572265625, |
|
"logps/rejected": -236.2861785888672, |
|
"loss": 0.7388, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 25.235164642333984, |
|
"rewards/margins": 12.560583114624023, |
|
"rewards/rejected": 12.674580574035645, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.0279977321624756, |
|
"learning_rate": 8.01038203396682e-06, |
|
"logits/chosen": -1.7110230922698975, |
|
"logits/rejected": -1.6617202758789062, |
|
"logps/chosen": -86.60490417480469, |
|
"logps/rejected": -218.7451171875, |
|
"loss": 0.7764, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 27.18789291381836, |
|
"rewards/margins": 12.605551719665527, |
|
"rewards/rejected": 14.582344055175781, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 4.693509101867676, |
|
"learning_rate": 7.940128403084979e-06, |
|
"logits/chosen": -1.803138017654419, |
|
"logits/rejected": -1.7637073993682861, |
|
"logps/chosen": -82.79511260986328, |
|
"logps/rejected": -221.371826171875, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 28.16043472290039, |
|
"rewards/margins": 13.802328109741211, |
|
"rewards/rejected": 14.358105659484863, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 27.00885772705078, |
|
"learning_rate": 7.868976087885741e-06, |
|
"logits/chosen": -1.769118309020996, |
|
"logits/rejected": -1.7129123210906982, |
|
"logps/chosen": -83.30874633789062, |
|
"logps/rejected": -247.39993286132812, |
|
"loss": 0.8273, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 25.751117706298828, |
|
"rewards/margins": 12.938650131225586, |
|
"rewards/rejected": 12.812464714050293, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 36.580535888671875, |
|
"learning_rate": 7.796946836898781e-06, |
|
"logits/chosen": -1.8945682048797607, |
|
"logits/rejected": -1.8643999099731445, |
|
"logps/chosen": -83.80934143066406, |
|
"logps/rejected": -228.9951629638672, |
|
"loss": 0.9133, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 25.17868995666504, |
|
"rewards/margins": 11.825202941894531, |
|
"rewards/rejected": 13.353485107421875, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 53.96921920776367, |
|
"learning_rate": 7.724062666699359e-06, |
|
"logits/chosen": -1.8547554016113281, |
|
"logits/rejected": -1.8166589736938477, |
|
"logps/chosen": -96.59284973144531, |
|
"logps/rejected": -250.556640625, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 25.961719512939453, |
|
"rewards/margins": 14.7571382522583, |
|
"rewards/rejected": 11.204582214355469, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 21.306289672851562, |
|
"learning_rate": 7.650345855178695e-06, |
|
"logits/chosen": -1.9091202020645142, |
|
"logits/rejected": -1.8824758529663086, |
|
"logps/chosen": -85.81441497802734, |
|
"logps/rejected": -241.8865509033203, |
|
"loss": 0.6111, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 25.982864379882812, |
|
"rewards/margins": 13.588445663452148, |
|
"rewards/rejected": 12.394417762756348, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 8.490675926208496, |
|
"learning_rate": 7.575818934734482e-06, |
|
"logits/chosen": -1.7473344802856445, |
|
"logits/rejected": -1.7193949222564697, |
|
"logps/chosen": -83.74988555908203, |
|
"logps/rejected": -207.8963165283203, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 26.92534828186035, |
|
"rewards/margins": 12.920440673828125, |
|
"rewards/rejected": 14.004905700683594, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.250255584716797, |
|
"learning_rate": 7.500504685383589e-06, |
|
"logits/chosen": -1.8129802942276, |
|
"logits/rejected": -1.748167634010315, |
|
"logps/chosen": -73.27245330810547, |
|
"logps/rejected": -218.0284423828125, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 27.97617530822754, |
|
"rewards/margins": 13.350275993347168, |
|
"rewards/rejected": 14.625898361206055, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -0.8561357855796814, |
|
"eval_logits/rejected": -0.8501406908035278, |
|
"eval_logps/chosen": -157.18006896972656, |
|
"eval_logps/rejected": -196.33729553222656, |
|
"eval_loss": 2.5238630771636963, |
|
"eval_rewards/accuracies": 0.6000000238418579, |
|
"eval_rewards/chosen": 15.406268119812012, |
|
"eval_rewards/margins": 1.4132167100906372, |
|
"eval_rewards/rejected": 13.993051528930664, |
|
"eval_runtime": 959.2657, |
|
"eval_samples_per_second": 0.735, |
|
"eval_steps_per_second": 0.735, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 2.0936248302459717, |
|
"learning_rate": 7.4244261277990935e-06, |
|
"logits/chosen": -1.645000696182251, |
|
"logits/rejected": -1.5917456150054932, |
|
"logps/chosen": -73.57734680175781, |
|
"logps/rejected": -235.9907684326172, |
|
"loss": 0.4526, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 28.39139747619629, |
|
"rewards/margins": 15.543986320495605, |
|
"rewards/rejected": 12.847410202026367, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 3.0842278003692627, |
|
"learning_rate": 7.347606516273741e-06, |
|
"logits/chosen": -1.7528049945831299, |
|
"logits/rejected": -1.7014877796173096, |
|
"logps/chosen": -78.36769104003906, |
|
"logps/rejected": -245.0220947265625, |
|
"loss": 0.463, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.718791961669922, |
|
"rewards/margins": 15.73725700378418, |
|
"rewards/rejected": 12.981534004211426, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 3.525000810623169, |
|
"learning_rate": 7.270069331612e-06, |
|
"logits/chosen": -1.6429353952407837, |
|
"logits/rejected": -1.5838866233825684, |
|
"logps/chosen": -79.90181732177734, |
|
"logps/rejected": -221.4831085205078, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 26.645065307617188, |
|
"rewards/margins": 13.249574661254883, |
|
"rewards/rejected": 13.395492553710938, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 24.071266174316406, |
|
"learning_rate": 7.1918382739528804e-06, |
|
"logits/chosen": -1.838573694229126, |
|
"logits/rejected": -1.77816903591156, |
|
"logps/chosen": -75.74369812011719, |
|
"logps/rejected": -250.9163055419922, |
|
"loss": 0.4283, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 27.663928985595703, |
|
"rewards/margins": 14.643702507019043, |
|
"rewards/rejected": 13.020225524902344, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 2.601551055908203, |
|
"learning_rate": 7.112937255525722e-06, |
|
"logits/chosen": -1.651479721069336, |
|
"logits/rejected": -1.5933506488800049, |
|
"logps/chosen": -69.61723327636719, |
|
"logps/rejected": -234.6320037841797, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 28.57497215270996, |
|
"rewards/margins": 15.81470775604248, |
|
"rewards/rejected": 12.760263442993164, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.3393607139587402, |
|
"learning_rate": 7.033390393341133e-06, |
|
"logits/chosen": -1.9843261241912842, |
|
"logits/rejected": -1.8986890316009521, |
|
"logps/chosen": -56.452239990234375, |
|
"logps/rejected": -254.9853057861328, |
|
"loss": 0.338, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.793537139892578, |
|
"rewards/margins": 16.600866317749023, |
|
"rewards/rejected": 12.192671775817871, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 23.764225006103516, |
|
"learning_rate": 6.953222001819347e-06, |
|
"logits/chosen": -1.808882474899292, |
|
"logits/rejected": -1.7674134969711304, |
|
"logps/chosen": -70.89783477783203, |
|
"logps/rejected": -221.87173461914062, |
|
"loss": 0.4405, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 29.50577163696289, |
|
"rewards/margins": 16.218791961669922, |
|
"rewards/rejected": 13.286974906921387, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 5.011995315551758, |
|
"learning_rate": 6.87245658535825e-06, |
|
"logits/chosen": -1.7610832452774048, |
|
"logits/rejected": -1.7023169994354248, |
|
"logps/chosen": -64.40223693847656, |
|
"logps/rejected": -239.07498168945312, |
|
"loss": 0.3924, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.532089233398438, |
|
"rewards/margins": 15.520894050598145, |
|
"rewards/rejected": 13.011195182800293, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 3.537167549133301, |
|
"learning_rate": 6.791118830843311e-06, |
|
"logits/chosen": -1.6986154317855835, |
|
"logits/rejected": -1.6274213790893555, |
|
"logps/chosen": -73.15770721435547, |
|
"logps/rejected": -220.58633422851562, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 27.489849090576172, |
|
"rewards/margins": 13.804224967956543, |
|
"rewards/rejected": 13.68562126159668, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 7.608737945556641, |
|
"learning_rate": 6.709233600101761e-06, |
|
"logits/chosen": -1.7815144062042236, |
|
"logits/rejected": -1.7079941034317017, |
|
"logps/chosen": -64.2397689819336, |
|
"logps/rejected": -222.42898559570312, |
|
"loss": 0.374, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.80181884765625, |
|
"rewards/margins": 15.93518352508545, |
|
"rewards/rejected": 13.8666353225708, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 7.344120025634766, |
|
"learning_rate": 6.626825922303287e-06, |
|
"logits/chosen": -1.482596755027771, |
|
"logits/rejected": -1.398471713066101, |
|
"logps/chosen": -69.20109558105469, |
|
"logps/rejected": -227.3030548095703, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 26.281591415405273, |
|
"rewards/margins": 13.225834846496582, |
|
"rewards/rejected": 13.055760383605957, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 2.493931293487549, |
|
"learning_rate": 6.5439209863095675e-06, |
|
"logits/chosen": -1.86245596408844, |
|
"logits/rejected": -1.805001974105835, |
|
"logps/chosen": -58.1787223815918, |
|
"logps/rejected": -235.18923950195312, |
|
"loss": 0.3453, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.87502098083496, |
|
"rewards/margins": 16.159029006958008, |
|
"rewards/rejected": 12.715993881225586, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 5.012514114379883, |
|
"learning_rate": 6.460544132975014e-06, |
|
"logits/chosen": -1.6647535562515259, |
|
"logits/rejected": -1.6346346139907837, |
|
"logps/chosen": -67.99676513671875, |
|
"logps/rejected": -234.99331665039062, |
|
"loss": 0.4353, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 27.812246322631836, |
|
"rewards/margins": 16.04288673400879, |
|
"rewards/rejected": 11.76936149597168, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 25.745906829833984, |
|
"learning_rate": 6.376720847401042e-06, |
|
"logits/chosen": -1.816080093383789, |
|
"logits/rejected": -1.754172921180725, |
|
"logps/chosen": -61.254425048828125, |
|
"logps/rejected": -244.97998046875, |
|
"loss": 0.3916, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 28.01883888244629, |
|
"rewards/margins": 16.0445499420166, |
|
"rewards/rejected": 11.974291801452637, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 3.14039945602417, |
|
"learning_rate": 6.292476751146255e-06, |
|
"logits/chosen": -1.8254092931747437, |
|
"logits/rejected": -1.7369928359985352, |
|
"logps/chosen": -59.7559814453125, |
|
"logps/rejected": -227.97384643554688, |
|
"loss": 0.41, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 29.0753173828125, |
|
"rewards/margins": 16.199630737304688, |
|
"rewards/rejected": 12.875683784484863, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 4.181332588195801, |
|
"learning_rate": 6.207837594394913e-06, |
|
"logits/chosen": -1.819637656211853, |
|
"logits/rejected": -1.7874256372451782, |
|
"logps/chosen": -60.003814697265625, |
|
"logps/rejected": -217.7976531982422, |
|
"loss": 0.3721, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.516149520874023, |
|
"rewards/margins": 15.223426818847656, |
|
"rewards/rejected": 13.292726516723633, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 11.886590003967285, |
|
"learning_rate": 6.1228292480861e-06, |
|
"logits/chosen": -1.7983691692352295, |
|
"logits/rejected": -1.707035779953003, |
|
"logps/chosen": -70.19294738769531, |
|
"logps/rejected": -250.7736358642578, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 28.486637115478516, |
|
"rewards/margins": 16.149328231811523, |
|
"rewards/rejected": 12.337307929992676, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 2.3090531826019287, |
|
"learning_rate": 6.037477696005966e-06, |
|
"logits/chosen": -1.7597004175186157, |
|
"logits/rejected": -1.6807353496551514, |
|
"logps/chosen": -61.509925842285156, |
|
"logps/rejected": -227.34652709960938, |
|
"loss": 0.4032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.05527687072754, |
|
"rewards/margins": 16.505048751831055, |
|
"rewards/rejected": 12.550226211547852, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 2.6496028900146484, |
|
"learning_rate": 5.95180902684548e-06, |
|
"logits/chosen": -1.6354789733886719, |
|
"logits/rejected": -1.545203447341919, |
|
"logps/chosen": -61.32403564453125, |
|
"logps/rejected": -239.3839874267578, |
|
"loss": 0.396, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.665252685546875, |
|
"rewards/margins": 16.576025009155273, |
|
"rewards/rejected": 12.08923053741455, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 2.990304946899414, |
|
"learning_rate": 5.8658494262261215e-06, |
|
"logits/chosen": -1.8462188243865967, |
|
"logits/rejected": -1.7873852252960205, |
|
"logps/chosen": -64.35298919677734, |
|
"logps/rejected": -239.20401000976562, |
|
"loss": 0.3787, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.686588287353516, |
|
"rewards/margins": 15.4246826171875, |
|
"rewards/rejected": 13.2619047164917, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_logits/chosen": -0.8357726335525513, |
|
"eval_logits/rejected": -0.8384636044502258, |
|
"eval_logps/chosen": -158.54798889160156, |
|
"eval_logps/rejected": -197.1554718017578, |
|
"eval_loss": 2.5951449871063232, |
|
"eval_rewards/accuracies": 0.6141843795776367, |
|
"eval_rewards/chosen": 15.269478797912598, |
|
"eval_rewards/margins": 1.3582426309585571, |
|
"eval_rewards/rejected": 13.911234855651855, |
|
"eval_runtime": 933.4217, |
|
"eval_samples_per_second": 0.755, |
|
"eval_steps_per_second": 0.755, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 29.393373489379883, |
|
"learning_rate": 5.779625168695943e-06, |
|
"logits/chosen": -1.8400392532348633, |
|
"logits/rejected": -1.7939599752426147, |
|
"logps/chosen": -53.9841423034668, |
|
"logps/rejected": -231.19735717773438, |
|
"loss": 0.3973, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 29.610637664794922, |
|
"rewards/margins": 16.809568405151367, |
|
"rewards/rejected": 12.801069259643555, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 3.947079658508301, |
|
"learning_rate": 5.6931626096984475e-06, |
|
"logits/chosen": -1.7477422952651978, |
|
"logits/rejected": -1.700312614440918, |
|
"logps/chosen": -60.37701416015625, |
|
"logps/rejected": -254.6928253173828, |
|
"loss": 0.37, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 27.82967185974121, |
|
"rewards/margins": 15.517053604125977, |
|
"rewards/rejected": 12.31261920928955, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 19.618087768554688, |
|
"learning_rate": 5.6064881775167445e-06, |
|
"logits/chosen": -1.7533241510391235, |
|
"logits/rejected": -1.6893295049667358, |
|
"logps/chosen": -61.474517822265625, |
|
"logps/rejected": -223.3421173095703, |
|
"loss": 0.4478, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 27.814483642578125, |
|
"rewards/margins": 14.598594665527344, |
|
"rewards/rejected": 13.215890884399414, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 3.5263216495513916, |
|
"learning_rate": 5.5196283651954375e-06, |
|
"logits/chosen": -1.5104930400848389, |
|
"logits/rejected": -1.413916826248169, |
|
"logps/chosen": -70.25590515136719, |
|
"logps/rejected": -236.75833129882812, |
|
"loss": 0.4924, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 28.405406951904297, |
|
"rewards/margins": 15.316106796264648, |
|
"rewards/rejected": 13.089299201965332, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 3.5469884872436523, |
|
"learning_rate": 5.432609722442715e-06, |
|
"logits/chosen": -1.7516591548919678, |
|
"logits/rejected": -1.6808538436889648, |
|
"logps/chosen": -69.64612579345703, |
|
"logps/rejected": -237.8245086669922, |
|
"loss": 0.4107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.481128692626953, |
|
"rewards/margins": 16.653011322021484, |
|
"rewards/rejected": 11.828117370605469, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 4.012500286102295, |
|
"learning_rate": 5.345458847515133e-06, |
|
"logits/chosen": -1.3920238018035889, |
|
"logits/rejected": -1.284397840499878, |
|
"logps/chosen": -67.01615905761719, |
|
"logps/rejected": -207.93798828125, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 27.689502716064453, |
|
"rewards/margins": 13.398959159851074, |
|
"rewards/rejected": 14.290542602539062, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 2.9533350467681885, |
|
"learning_rate": 5.258202379087537e-06, |
|
"logits/chosen": -1.7657880783081055, |
|
"logits/rejected": -1.693655014038086, |
|
"logps/chosen": -64.3235092163086, |
|
"logps/rejected": -251.1502227783203, |
|
"loss": 0.4456, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 28.66314125061035, |
|
"rewards/margins": 16.5991268157959, |
|
"rewards/rejected": 12.06401252746582, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 3.6378014087677, |
|
"learning_rate": 5.170866988110656e-06, |
|
"logits/chosen": -1.7154031991958618, |
|
"logits/rejected": -1.657488226890564, |
|
"logps/chosen": -66.95178985595703, |
|
"logps/rejected": -236.646240234375, |
|
"loss": 0.4076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.9368839263916, |
|
"rewards/margins": 16.125226974487305, |
|
"rewards/rejected": 12.811657905578613, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 2.8945438861846924, |
|
"learning_rate": 5.083479369658807e-06, |
|
"logits/chosen": -1.696937918663025, |
|
"logits/rejected": -1.6348483562469482, |
|
"logps/chosen": -58.26952362060547, |
|
"logps/rejected": -230.33377075195312, |
|
"loss": 0.3785, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.738727569580078, |
|
"rewards/margins": 15.88463306427002, |
|
"rewards/rejected": 12.854090690612793, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.305314302444458, |
|
"learning_rate": 4.9960662347702405e-06, |
|
"logits/chosen": -1.5210245847702026, |
|
"logits/rejected": -1.4456034898757935, |
|
"logps/chosen": -54.75811004638672, |
|
"logps/rejected": -232.2668914794922, |
|
"loss": 0.3659, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 29.211429595947266, |
|
"rewards/margins": 16.932321548461914, |
|
"rewards/rejected": 12.279109954833984, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 30.96356964111328, |
|
"learning_rate": 4.908654302282602e-06, |
|
"logits/chosen": -1.9021522998809814, |
|
"logits/rejected": -1.8719444274902344, |
|
"logps/chosen": -55.43334197998047, |
|
"logps/rejected": -241.4973602294922, |
|
"loss": 0.3925, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 29.54325294494629, |
|
"rewards/margins": 18.098033905029297, |
|
"rewards/rejected": 11.44521713256836, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 2.172999858856201, |
|
"learning_rate": 4.821270290666007e-06, |
|
"logits/chosen": -1.9638334512710571, |
|
"logits/rejected": -1.933005928993225, |
|
"logps/chosen": -50.753273010253906, |
|
"logps/rejected": -264.568603515625, |
|
"loss": 0.3296, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 29.600269317626953, |
|
"rewards/margins": 18.301733016967773, |
|
"rewards/rejected": 11.29853630065918, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 3.1367807388305664, |
|
"learning_rate": 4.733940909856239e-06, |
|
"logits/chosen": -1.8073303699493408, |
|
"logits/rejected": -1.769351601600647, |
|
"logps/chosen": -56.24708938598633, |
|
"logps/rejected": -268.60906982421875, |
|
"loss": 0.349, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.238876342773438, |
|
"rewards/margins": 17.679536819458008, |
|
"rewards/rejected": 11.559335708618164, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 2.4563372135162354, |
|
"learning_rate": 4.646692853090539e-06, |
|
"logits/chosen": -1.8005622625350952, |
|
"logits/rejected": -1.783666968345642, |
|
"logps/chosen": -58.6412467956543, |
|
"logps/rejected": -242.1209716796875, |
|
"loss": 0.3657, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.608911514282227, |
|
"rewards/margins": 18.011024475097656, |
|
"rewards/rejected": 11.597890853881836, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 2.5796990394592285, |
|
"learning_rate": 4.559552788748507e-06, |
|
"logits/chosen": -2.053856134414673, |
|
"logits/rejected": -2.0160202980041504, |
|
"logps/chosen": -49.09900665283203, |
|
"logps/rejected": -269.3837890625, |
|
"loss": 0.2841, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.68400001525879, |
|
"rewards/margins": 19.868282318115234, |
|
"rewards/rejected": 10.815717697143555, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 2.9965362548828125, |
|
"learning_rate": 4.472547352200615e-06, |
|
"logits/chosen": -1.8396308422088623, |
|
"logits/rejected": -1.8210567235946655, |
|
"logps/chosen": -52.045127868652344, |
|
"logps/rejected": -257.56011962890625, |
|
"loss": 0.3359, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 29.730632781982422, |
|
"rewards/margins": 18.14250373840332, |
|
"rewards/rejected": 11.58813190460205, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 3.3091185092926025, |
|
"learning_rate": 4.385703137666784e-06, |
|
"logits/chosen": -1.7598381042480469, |
|
"logits/rejected": -1.7196592092514038, |
|
"logps/chosen": -55.36179733276367, |
|
"logps/rejected": -260.2227478027344, |
|
"loss": 0.3407, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.468708038330078, |
|
"rewards/margins": 19.425325393676758, |
|
"rewards/rejected": 11.043380737304688, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 1.9838672876358032, |
|
"learning_rate": 4.2990466900875625e-06, |
|
"logits/chosen": -1.8335222005844116, |
|
"logits/rejected": -1.776532769203186, |
|
"logps/chosen": -51.03464889526367, |
|
"logps/rejected": -237.20083618164062, |
|
"loss": 0.3608, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 29.984943389892578, |
|
"rewards/margins": 17.165090560913086, |
|
"rewards/rejected": 12.819851875305176, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 3.0028090476989746, |
|
"learning_rate": 4.212604497010346e-06, |
|
"logits/chosen": -1.7095162868499756, |
|
"logits/rejected": -1.6648433208465576, |
|
"logps/chosen": -48.533897399902344, |
|
"logps/rejected": -234.913330078125, |
|
"loss": 0.3397, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 30.055179595947266, |
|
"rewards/margins": 17.712875366210938, |
|
"rewards/rejected": 12.342303276062012, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 11.127403259277344, |
|
"learning_rate": 4.126402980493171e-06, |
|
"logits/chosen": -1.7350540161132812, |
|
"logits/rejected": -1.6723353862762451, |
|
"logps/chosen": -60.74663543701172, |
|
"logps/rejected": -234.6821746826172, |
|
"loss": 0.381, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.135547637939453, |
|
"rewards/margins": 16.059070587158203, |
|
"rewards/rejected": 13.07647705078125, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_logits/chosen": -0.7807645797729492, |
|
"eval_logits/rejected": -0.7846214771270752, |
|
"eval_logps/chosen": -161.0571746826172, |
|
"eval_logps/rejected": -201.4547576904297, |
|
"eval_loss": 2.581395387649536, |
|
"eval_rewards/accuracies": 0.6212766170501709, |
|
"eval_rewards/chosen": 15.018560409545898, |
|
"eval_rewards/margins": 1.537255883216858, |
|
"eval_rewards/rejected": 13.481305122375488, |
|
"eval_runtime": 933.6733, |
|
"eval_samples_per_second": 0.755, |
|
"eval_steps_per_second": 0.755, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 2.986391305923462, |
|
"learning_rate": 4.0404684890284815e-06, |
|
"logits/chosen": -1.6968259811401367, |
|
"logits/rejected": -1.62332022190094, |
|
"logps/chosen": -54.682281494140625, |
|
"logps/rejected": -259.25335693359375, |
|
"loss": 0.3598, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 30.244094848632812, |
|
"rewards/margins": 18.490741729736328, |
|
"rewards/rejected": 11.753351211547852, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 3.1635079383850098, |
|
"learning_rate": 3.954827289489429e-06, |
|
"logits/chosen": -1.8833897113800049, |
|
"logits/rejected": -1.7493565082550049, |
|
"logps/chosen": -53.182655334472656, |
|
"logps/rejected": -240.314697265625, |
|
"loss": 0.3551, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 30.2075252532959, |
|
"rewards/margins": 17.21380615234375, |
|
"rewards/rejected": 12.993721008300781, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 2.4462807178497314, |
|
"learning_rate": 3.86950555910108e-06, |
|
"logits/chosen": -1.6473881006240845, |
|
"logits/rejected": -1.5638983249664307, |
|
"logps/chosen": -54.882896423339844, |
|
"logps/rejected": -230.82162475585938, |
|
"loss": 0.3772, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 29.62346839904785, |
|
"rewards/margins": 15.928651809692383, |
|
"rewards/rejected": 13.694814682006836, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 4.168172836303711, |
|
"learning_rate": 3.784529377439067e-06, |
|
"logits/chosen": -1.6475791931152344, |
|
"logits/rejected": -1.5899393558502197, |
|
"logps/chosen": -59.67030715942383, |
|
"logps/rejected": -221.40066528320312, |
|
"loss": 0.3826, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 29.023029327392578, |
|
"rewards/margins": 16.1686954498291, |
|
"rewards/rejected": 12.854331970214844, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 17.74908447265625, |
|
"learning_rate": 3.699924718458036e-06, |
|
"logits/chosen": -1.7890431880950928, |
|
"logits/rejected": -1.7369229793548584, |
|
"logps/chosen": -48.85768127441406, |
|
"logps/rejected": -251.3222198486328, |
|
"loss": 0.3095, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 30.195240020751953, |
|
"rewards/margins": 17.660083770751953, |
|
"rewards/rejected": 12.53515338897705, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 6.809030532836914, |
|
"learning_rate": 3.615717442552429e-06, |
|
"logits/chosen": -1.7820018529891968, |
|
"logits/rejected": -1.7489522695541382, |
|
"logps/chosen": -54.51831817626953, |
|
"logps/rejected": -246.6309356689453, |
|
"loss": 0.337, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.5898380279541, |
|
"rewards/margins": 18.221317291259766, |
|
"rewards/rejected": 11.368522644042969, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 7.795753002166748, |
|
"learning_rate": 3.5319332886519393e-06, |
|
"logits/chosen": -1.6910899877548218, |
|
"logits/rejected": -1.6332054138183594, |
|
"logps/chosen": -46.748443603515625, |
|
"logps/rejected": -252.2515106201172, |
|
"loss": 0.3168, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.91961669921875, |
|
"rewards/margins": 18.060075759887695, |
|
"rewards/rejected": 11.85954475402832, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 4.77955436706543, |
|
"learning_rate": 3.4485978663541233e-06, |
|
"logits/chosen": -1.4919878244400024, |
|
"logits/rejected": -1.4713367223739624, |
|
"logps/chosen": -66.22251892089844, |
|
"logps/rejected": -239.391357421875, |
|
"loss": 0.4289, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.419330596923828, |
|
"rewards/margins": 17.399511337280273, |
|
"rewards/rejected": 11.019818305969238, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 2.38405179977417, |
|
"learning_rate": 3.3657366480965158e-06, |
|
"logits/chosen": -1.8986093997955322, |
|
"logits/rejected": -1.8109142780303955, |
|
"logps/chosen": -47.243350982666016, |
|
"logps/rejected": -263.37347412109375, |
|
"loss": 0.2852, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.1839656829834, |
|
"rewards/margins": 18.492839813232422, |
|
"rewards/rejected": 11.691123962402344, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 5.4925994873046875, |
|
"learning_rate": 3.2833749613706988e-06, |
|
"logits/chosen": -1.583234429359436, |
|
"logits/rejected": -1.497194766998291, |
|
"logps/chosen": -58.58795166015625, |
|
"logps/rejected": -240.7587432861328, |
|
"loss": 0.4343, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 27.720067977905273, |
|
"rewards/margins": 14.912628173828125, |
|
"rewards/rejected": 12.807439804077148, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 3.2018399238586426, |
|
"learning_rate": 3.201537980980646e-06, |
|
"logits/chosen": -1.3224772214889526, |
|
"logits/rejected": -1.2666256427764893, |
|
"logps/chosen": -54.696876525878906, |
|
"logps/rejected": -243.18539428710938, |
|
"loss": 0.3813, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 28.097915649414062, |
|
"rewards/margins": 16.58795738220215, |
|
"rewards/rejected": 11.50995922088623, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 4.00109338760376, |
|
"learning_rate": 3.1202507213477658e-06, |
|
"logits/chosen": -1.6143379211425781, |
|
"logits/rejected": -1.5576748847961426, |
|
"logps/chosen": -56.855003356933594, |
|
"logps/rejected": -233.1626739501953, |
|
"loss": 0.3651, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.580801010131836, |
|
"rewards/margins": 18.555524826049805, |
|
"rewards/rejected": 12.025275230407715, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 5.0345635414123535, |
|
"learning_rate": 3.039538028864939e-06, |
|
"logits/chosen": -1.6249618530273438, |
|
"logits/rejected": -1.6053358316421509, |
|
"logps/chosen": -57.446815490722656, |
|
"logps/rejected": -246.0026092529297, |
|
"loss": 0.3495, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.101943969726562, |
|
"rewards/margins": 17.34486198425293, |
|
"rewards/rejected": 11.757081985473633, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 2.065876007080078, |
|
"learning_rate": 2.9594245743019477e-06, |
|
"logits/chosen": -1.8201977014541626, |
|
"logits/rejected": -1.8249000310897827, |
|
"logps/chosen": -52.830360412597656, |
|
"logps/rejected": -243.8861083984375, |
|
"loss": 0.3389, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 31.09743881225586, |
|
"rewards/margins": 18.6008358001709, |
|
"rewards/rejected": 12.496602058410645, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 4.4573588371276855, |
|
"learning_rate": 2.8799348452645515e-06, |
|
"logits/chosen": -1.7230621576309204, |
|
"logits/rejected": -1.6711606979370117, |
|
"logps/chosen": -52.5847282409668, |
|
"logps/rejected": -234.39968872070312, |
|
"loss": 0.3286, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.327728271484375, |
|
"rewards/margins": 18.147693634033203, |
|
"rewards/rejected": 12.180032730102539, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 14.367749214172363, |
|
"learning_rate": 2.801093138709582e-06, |
|
"logits/chosen": -1.7738536596298218, |
|
"logits/rejected": -1.687819480895996, |
|
"logps/chosen": -49.588436126708984, |
|
"logps/rejected": -238.64501953125, |
|
"loss": 0.3485, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 29.91376304626465, |
|
"rewards/margins": 17.238935470581055, |
|
"rewards/rejected": 12.674827575683594, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 9.441333770751953, |
|
"learning_rate": 2.722923553518285e-06, |
|
"logits/chosen": -1.5205357074737549, |
|
"logits/rejected": -1.480017900466919, |
|
"logps/chosen": -57.287017822265625, |
|
"logps/rejected": -243.42919921875, |
|
"loss": 0.3522, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.02022933959961, |
|
"rewards/margins": 17.072509765625, |
|
"rewards/rejected": 11.947721481323242, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 12.407715797424316, |
|
"learning_rate": 2.6454499831302223e-06, |
|
"logits/chosen": -1.6342418193817139, |
|
"logits/rejected": -1.5738509893417358, |
|
"logps/chosen": -60.34296798706055, |
|
"logps/rejected": -226.05990600585938, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 30.577117919921875, |
|
"rewards/margins": 17.6352481842041, |
|
"rewards/rejected": 12.941868782043457, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 3.367372512817383, |
|
"learning_rate": 2.5686961082399716e-06, |
|
"logits/chosen": -1.8151267766952515, |
|
"logits/rejected": -1.7858302593231201, |
|
"logps/chosen": -57.720130920410156, |
|
"logps/rejected": -260.25494384765625, |
|
"loss": 0.3546, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.963693618774414, |
|
"rewards/margins": 18.514841079711914, |
|
"rewards/rejected": 11.44885540008545, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.254979133605957, |
|
"learning_rate": 2.4926853895588343e-06, |
|
"logits/chosen": -1.7511732578277588, |
|
"logits/rejected": -1.6836206912994385, |
|
"logps/chosen": -41.321022033691406, |
|
"logps/rejected": -248.452880859375, |
|
"loss": 0.2993, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 31.503093719482422, |
|
"rewards/margins": 19.927473068237305, |
|
"rewards/rejected": 11.575621604919434, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -0.7554205060005188, |
|
"eval_logits/rejected": -0.758985161781311, |
|
"eval_logps/chosen": -160.93551635742188, |
|
"eval_logps/rejected": -202.3504638671875, |
|
"eval_loss": 2.5816383361816406, |
|
"eval_rewards/accuracies": 0.6382978558540344, |
|
"eval_rewards/chosen": 15.030726432800293, |
|
"eval_rewards/margins": 1.6389882564544678, |
|
"eval_rewards/rejected": 13.39173698425293, |
|
"eval_runtime": 930.5524, |
|
"eval_samples_per_second": 0.758, |
|
"eval_steps_per_second": 0.758, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 2.5797388553619385, |
|
"learning_rate": 2.417441060643809e-06, |
|
"logits/chosen": -1.783384084701538, |
|
"logits/rejected": -1.7092987298965454, |
|
"logps/chosen": -39.536407470703125, |
|
"logps/rejected": -250.8395538330078, |
|
"loss": 0.2602, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.661413192749023, |
|
"rewards/margins": 19.599422454833984, |
|
"rewards/rejected": 12.061990737915039, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 3.2678422927856445, |
|
"learning_rate": 2.342986120795978e-06, |
|
"logits/chosen": -1.6378501653671265, |
|
"logits/rejected": -1.6011245250701904, |
|
"logps/chosen": -50.96418762207031, |
|
"logps/rejected": -246.3638153076172, |
|
"loss": 0.3088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.954126358032227, |
|
"rewards/margins": 19.514755249023438, |
|
"rewards/rejected": 11.439367294311523, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 2.6984269618988037, |
|
"learning_rate": 2.2693433280305127e-06, |
|
"logits/chosen": -1.7482858896255493, |
|
"logits/rejected": -1.7367308139801025, |
|
"logps/chosen": -43.29402160644531, |
|
"logps/rejected": -253.78939819335938, |
|
"loss": 0.2572, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.882701873779297, |
|
"rewards/margins": 21.47805404663086, |
|
"rewards/rejected": 10.40464973449707, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 3.53029727935791, |
|
"learning_rate": 2.19653519212041e-06, |
|
"logits/chosen": -1.7963911294937134, |
|
"logits/rejected": -1.7613227367401123, |
|
"logps/chosen": -44.7977294921875, |
|
"logps/rejected": -243.6311798095703, |
|
"loss": 0.2862, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.34331703186035, |
|
"rewards/margins": 19.52144432067871, |
|
"rewards/rejected": 10.821873664855957, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 1.513169288635254, |
|
"learning_rate": 2.124583967716136e-06, |
|
"logits/chosen": -2.0151898860931396, |
|
"logits/rejected": -1.9886600971221924, |
|
"logps/chosen": -38.372108459472656, |
|
"logps/rejected": -267.5242919921875, |
|
"loss": 0.2252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.756805419921875, |
|
"rewards/margins": 20.560382843017578, |
|
"rewards/rejected": 11.196426391601562, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 2.537264347076416, |
|
"learning_rate": 2.053511647543218e-06, |
|
"logits/chosen": -1.4479506015777588, |
|
"logits/rejected": -1.408527135848999, |
|
"logps/chosen": -49.19340896606445, |
|
"logps/rejected": -229.5871124267578, |
|
"loss": 0.3208, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.4880428314209, |
|
"rewards/margins": 17.939146041870117, |
|
"rewards/rejected": 11.548896789550781, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 2.1548752784729004, |
|
"learning_rate": 1.98333995567992e-06, |
|
"logits/chosen": -1.5170295238494873, |
|
"logits/rejected": -1.4900450706481934, |
|
"logps/chosen": -50.18815231323242, |
|
"logps/rejected": -242.35986328125, |
|
"loss": 0.3042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.04024887084961, |
|
"rewards/margins": 19.42563247680664, |
|
"rewards/rejected": 11.614619255065918, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 2.6000428199768066, |
|
"learning_rate": 1.9140903409170276e-06, |
|
"logits/chosen": -1.59740149974823, |
|
"logits/rejected": -1.5533560514450073, |
|
"logps/chosen": -49.30552291870117, |
|
"logps/rejected": -255.7830810546875, |
|
"loss": 0.3064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.910289764404297, |
|
"rewards/margins": 19.63786506652832, |
|
"rewards/rejected": 11.272429466247559, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 2.6881704330444336, |
|
"learning_rate": 1.8457839702017783e-06, |
|
"logits/chosen": -1.788030982017517, |
|
"logits/rejected": -1.7674751281738281, |
|
"logps/chosen": -38.47369384765625, |
|
"logps/rejected": -241.8249053955078, |
|
"loss": 0.2368, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.96920394897461, |
|
"rewards/margins": 19.61397933959961, |
|
"rewards/rejected": 11.355224609375, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 1.9455982446670532, |
|
"learning_rate": 1.7784417221679346e-06, |
|
"logits/chosen": -1.7658576965332031, |
|
"logits/rejected": -1.7436749935150146, |
|
"logps/chosen": -43.32396697998047, |
|
"logps/rejected": -260.0502624511719, |
|
"loss": 0.2658, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.932819366455078, |
|
"rewards/margins": 20.97707176208496, |
|
"rewards/rejected": 9.955748558044434, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 2.7157328128814697, |
|
"learning_rate": 1.7120841807539867e-06, |
|
"logits/chosen": -1.5605119466781616, |
|
"logits/rejected": -1.533529281616211, |
|
"logps/chosen": -45.2177848815918, |
|
"logps/rejected": -252.9283905029297, |
|
"loss": 0.2779, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.456167221069336, |
|
"rewards/margins": 20.617300033569336, |
|
"rewards/rejected": 10.8388671875, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 4.16888952255249, |
|
"learning_rate": 1.6467316289114365e-06, |
|
"logits/chosen": -1.7607736587524414, |
|
"logits/rejected": -1.72702157497406, |
|
"logps/chosen": -52.18560028076172, |
|
"logps/rejected": -237.4579620361328, |
|
"loss": 0.3116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.68082618713379, |
|
"rewards/margins": 19.22063446044922, |
|
"rewards/rejected": 11.460187911987305, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 3.37455153465271, |
|
"learning_rate": 1.5824040424050763e-06, |
|
"logits/chosen": -1.8833210468292236, |
|
"logits/rejected": -1.844347357749939, |
|
"logps/chosen": -40.77017593383789, |
|
"logps/rejected": -248.3719940185547, |
|
"loss": 0.2508, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 32.0168342590332, |
|
"rewards/margins": 21.67666244506836, |
|
"rewards/rejected": 10.34017562866211, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 1.9589617252349854, |
|
"learning_rate": 1.5191210837071695e-06, |
|
"logits/chosen": -1.8483455181121826, |
|
"logits/rejected": -1.8569103479385376, |
|
"logps/chosen": -37.26815414428711, |
|
"logps/rejected": -267.3866882324219, |
|
"loss": 0.231, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 32.11620330810547, |
|
"rewards/margins": 21.5185604095459, |
|
"rewards/rejected": 10.597644805908203, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 3.257993221282959, |
|
"learning_rate": 1.4569020959873809e-06, |
|
"logits/chosen": -1.786929726600647, |
|
"logits/rejected": -1.7171766757965088, |
|
"logps/chosen": -38.82440948486328, |
|
"logps/rejected": -267.8984375, |
|
"loss": 0.2323, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.542959213256836, |
|
"rewards/margins": 20.44384002685547, |
|
"rewards/rejected": 11.099119186401367, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 2.4979026317596436, |
|
"learning_rate": 1.3957660972003167e-06, |
|
"logits/chosen": -1.6343905925750732, |
|
"logits/rejected": -1.6573156118392944, |
|
"logps/chosen": -45.56403732299805, |
|
"logps/rejected": -253.07199096679688, |
|
"loss": 0.2767, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.549020767211914, |
|
"rewards/margins": 20.293630599975586, |
|
"rewards/rejected": 10.255391120910645, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 1.8096556663513184, |
|
"learning_rate": 1.3357317742724658e-06, |
|
"logits/chosen": -1.5966780185699463, |
|
"logits/rejected": -1.570854902267456, |
|
"logps/chosen": -48.08317184448242, |
|
"logps/rejected": -251.5933380126953, |
|
"loss": 0.2885, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.559154510498047, |
|
"rewards/margins": 19.232830047607422, |
|
"rewards/rejected": 11.326326370239258, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 2.8283846378326416, |
|
"learning_rate": 1.2768174773903263e-06, |
|
"logits/chosen": -1.86501145362854, |
|
"logits/rejected": -1.822446584701538, |
|
"logps/chosen": -47.01121139526367, |
|
"logps/rejected": -263.478515625, |
|
"loss": 0.2764, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 32.028045654296875, |
|
"rewards/margins": 20.432361602783203, |
|
"rewards/rejected": 11.595685958862305, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 2.776090621948242, |
|
"learning_rate": 1.2190412143914536e-06, |
|
"logits/chosen": -1.513856291770935, |
|
"logits/rejected": -1.46199631690979, |
|
"logps/chosen": -42.10026931762695, |
|
"logps/rejected": -247.57339477539062, |
|
"loss": 0.2768, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.788198471069336, |
|
"rewards/margins": 19.206623077392578, |
|
"rewards/rejected": 10.58157730102539, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 2.4359893798828125, |
|
"learning_rate": 1.1624206452601623e-06, |
|
"logits/chosen": -1.7050933837890625, |
|
"logits/rejected": -1.6352756023406982, |
|
"logps/chosen": -48.3925666809082, |
|
"logps/rejected": -255.81640625, |
|
"loss": 0.2917, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.2396240234375, |
|
"rewards/margins": 19.301578521728516, |
|
"rewards/rejected": 10.938047409057617, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_logits/chosen": -0.8292198181152344, |
|
"eval_logits/rejected": -0.8336656093597412, |
|
"eval_logps/chosen": -165.87318420410156, |
|
"eval_logps/rejected": -207.3828887939453, |
|
"eval_loss": 2.626953363418579, |
|
"eval_rewards/accuracies": 0.6425532102584839, |
|
"eval_rewards/chosen": 14.536958694458008, |
|
"eval_rewards/margins": 1.6484651565551758, |
|
"eval_rewards/rejected": 12.888493537902832, |
|
"eval_runtime": 931.7841, |
|
"eval_samples_per_second": 0.757, |
|
"eval_steps_per_second": 0.757, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 2.7294886112213135, |
|
"learning_rate": 1.1069730767295394e-06, |
|
"logits/chosen": -1.7735283374786377, |
|
"logits/rejected": -1.7543232440948486, |
|
"logps/chosen": -40.81293869018555, |
|
"logps/rejected": -259.8978576660156, |
|
"loss": 0.2463, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.607662200927734, |
|
"rewards/margins": 20.695466995239258, |
|
"rewards/rejected": 10.912198066711426, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 2.734895944595337, |
|
"learning_rate": 1.0527154569914472e-06, |
|
"logits/chosen": -1.5312741994857788, |
|
"logits/rejected": -1.4923324584960938, |
|
"logps/chosen": -42.63439178466797, |
|
"logps/rejected": -244.82046508789062, |
|
"loss": 0.2697, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.897241592407227, |
|
"rewards/margins": 18.947063446044922, |
|
"rewards/rejected": 10.950177192687988, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 2.473545789718628, |
|
"learning_rate": 9.996643705161125e-07, |
|
"logits/chosen": -1.6525815725326538, |
|
"logits/rejected": -1.6200284957885742, |
|
"logps/chosen": -46.02938461303711, |
|
"logps/rejected": -261.2445068359375, |
|
"loss": 0.2916, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.908071517944336, |
|
"rewards/margins": 19.2839298248291, |
|
"rewards/rejected": 10.624141693115234, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 2.676964044570923, |
|
"learning_rate": 9.47836032982884e-07, |
|
"logits/chosen": -1.5494722127914429, |
|
"logits/rejected": -1.4936171770095825, |
|
"logps/chosen": -47.933658599853516, |
|
"logps/rejected": -252.27401733398438, |
|
"loss": 0.3175, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.43538475036621, |
|
"rewards/margins": 18.781505584716797, |
|
"rewards/rejected": 10.653879165649414, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 2.895836591720581, |
|
"learning_rate": 8.972462863237341e-07, |
|
"logits/chosen": -1.6493561267852783, |
|
"logits/rejected": -1.5964713096618652, |
|
"logps/chosen": -44.10541915893555, |
|
"logps/rejected": -271.24859619140625, |
|
"loss": 0.2734, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.411285400390625, |
|
"rewards/margins": 19.59650993347168, |
|
"rewards/rejected": 9.814775466918945, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 1.9935178756713867, |
|
"learning_rate": 8.479105938809701e-07, |
|
"logits/chosen": -1.485527515411377, |
|
"logits/rejected": -1.435868263244629, |
|
"logps/chosen": -50.575355529785156, |
|
"logps/rejected": -253.8799591064453, |
|
"loss": 0.3184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.476688385009766, |
|
"rewards/margins": 18.307661056518555, |
|
"rewards/rejected": 11.169027328491211, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 2.0916085243225098, |
|
"learning_rate": 7.998440356807075e-07, |
|
"logits/chosen": -1.8527294397354126, |
|
"logits/rejected": -1.8244997262954712, |
|
"logps/chosen": -34.15217208862305, |
|
"logps/rejected": -262.5540466308594, |
|
"loss": 0.2042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.451465606689453, |
|
"rewards/margins": 20.44220542907715, |
|
"rewards/rejected": 11.009258270263672, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 3.5937747955322266, |
|
"learning_rate": 7.530613038234646e-07, |
|
"logits/chosen": -1.5336072444915771, |
|
"logits/rejected": -1.4814928770065308, |
|
"logps/chosen": -42.957305908203125, |
|
"logps/rejected": -248.6365966796875, |
|
"loss": 0.2776, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.6094970703125, |
|
"rewards/margins": 20.037199020385742, |
|
"rewards/rejected": 11.572298049926758, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 2.563070774078369, |
|
"learning_rate": 7.075766979933674e-07, |
|
"logits/chosen": -1.6885433197021484, |
|
"logits/rejected": -1.663726806640625, |
|
"logps/chosen": -35.838836669921875, |
|
"logps/rejected": -253.3392333984375, |
|
"loss": 0.2286, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 32.139644622802734, |
|
"rewards/margins": 20.651351928710938, |
|
"rewards/rejected": 11.48829174041748, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 1.7424818277359009, |
|
"learning_rate": 6.634041210872743e-07, |
|
"logits/chosen": -1.8395694494247437, |
|
"logits/rejected": -1.8219817876815796, |
|
"logps/chosen": -43.62278366088867, |
|
"logps/rejected": -261.8132629394531, |
|
"loss": 0.259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.96283531188965, |
|
"rewards/margins": 20.133371353149414, |
|
"rewards/rejected": 10.829463005065918, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 3.5391411781311035, |
|
"learning_rate": 6.205570749652002e-07, |
|
"logits/chosen": -1.5959376096725464, |
|
"logits/rejected": -1.5644586086273193, |
|
"logps/chosen": -47.74095916748047, |
|
"logps/rejected": -254.2753448486328, |
|
"loss": 0.3061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.81204605102539, |
|
"rewards/margins": 19.811317443847656, |
|
"rewards/rejected": 11.00072956085205, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 2.486269235610962, |
|
"learning_rate": 5.790486563233145e-07, |
|
"logits/chosen": -1.8033177852630615, |
|
"logits/rejected": -1.7998552322387695, |
|
"logps/chosen": -35.27519226074219, |
|
"logps/rejected": -256.3056640625, |
|
"loss": 0.2171, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.810821533203125, |
|
"rewards/margins": 20.776798248291016, |
|
"rewards/rejected": 11.034022331237793, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 2.615795612335205, |
|
"learning_rate": 5.388915526907862e-07, |
|
"logits/chosen": -1.7600593566894531, |
|
"logits/rejected": -1.7274726629257202, |
|
"logps/chosen": -38.91143035888672, |
|
"logps/rejected": -256.5948791503906, |
|
"loss": 0.2455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.60187339782715, |
|
"rewards/margins": 19.52530288696289, |
|
"rewards/rejected": 11.076571464538574, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 3.510915994644165, |
|
"learning_rate": 5.000980385516935e-07, |
|
"logits/chosen": -1.6004310846328735, |
|
"logits/rejected": -1.5690761804580688, |
|
"logps/chosen": -41.87531280517578, |
|
"logps/rejected": -251.2759246826172, |
|
"loss": 0.2711, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.125137329101562, |
|
"rewards/margins": 18.800806045532227, |
|
"rewards/rejected": 11.32433032989502, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 3.1003832817077637, |
|
"learning_rate": 4.626799715931812e-07, |
|
"logits/chosen": -1.6454312801361084, |
|
"logits/rejected": -1.6356630325317383, |
|
"logps/chosen": -47.9179801940918, |
|
"logps/rejected": -259.88311767578125, |
|
"loss": 0.293, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.139379501342773, |
|
"rewards/margins": 18.82354736328125, |
|
"rewards/rejected": 11.315831184387207, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 2.93013334274292, |
|
"learning_rate": 4.2664878908102556e-07, |
|
"logits/chosen": -1.88095223903656, |
|
"logits/rejected": -1.8393617868423462, |
|
"logps/chosen": -32.803287506103516, |
|
"logps/rejected": -261.6094055175781, |
|
"loss": 0.2059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.75473976135254, |
|
"rewards/margins": 20.777175903320312, |
|
"rewards/rejected": 10.977563858032227, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 2.920027017593384, |
|
"learning_rate": 3.9201550436370026e-07, |
|
"logits/chosen": -1.7541553974151611, |
|
"logits/rejected": -1.7304025888442993, |
|
"logps/chosen": -40.56893539428711, |
|
"logps/rejected": -264.2108459472656, |
|
"loss": 0.2521, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.008968353271484, |
|
"rewards/margins": 20.572416305541992, |
|
"rewards/rejected": 10.436552047729492, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 2.7568492889404297, |
|
"learning_rate": 3.587907035060195e-07, |
|
"logits/chosen": -1.5946696996688843, |
|
"logits/rejected": -1.577462911605835, |
|
"logps/chosen": -43.67282485961914, |
|
"logps/rejected": -259.1739807128906, |
|
"loss": 0.2793, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.5866641998291, |
|
"rewards/margins": 18.64238166809082, |
|
"rewards/rejected": 10.944284439086914, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 2.2731993198394775, |
|
"learning_rate": 3.269845420533824e-07, |
|
"logits/chosen": -1.7446489334106445, |
|
"logits/rejected": -1.701433777809143, |
|
"logps/chosen": -37.443397521972656, |
|
"logps/rejected": -254.092529296875, |
|
"loss": 0.279, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 31.358057022094727, |
|
"rewards/margins": 19.601125717163086, |
|
"rewards/rejected": 11.756929397583008, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 3.0889647006988525, |
|
"learning_rate": 2.9660674192761753e-07, |
|
"logits/chosen": -1.5739749670028687, |
|
"logits/rejected": -1.5466270446777344, |
|
"logps/chosen": -46.05111312866211, |
|
"logps/rejected": -245.4317626953125, |
|
"loss": 0.2881, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.035449981689453, |
|
"rewards/margins": 18.32761573791504, |
|
"rewards/rejected": 10.707832336425781, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_logits/chosen": -0.8468231558799744, |
|
"eval_logits/rejected": -0.8503096103668213, |
|
"eval_logps/chosen": -167.3941192626953, |
|
"eval_logps/rejected": -209.29457092285156, |
|
"eval_loss": 2.6357526779174805, |
|
"eval_rewards/accuracies": 0.6468085050582886, |
|
"eval_rewards/chosen": 14.38486385345459, |
|
"eval_rewards/margins": 1.6875391006469727, |
|
"eval_rewards/rejected": 12.697324752807617, |
|
"eval_runtime": 934.2741, |
|
"eval_samples_per_second": 0.755, |
|
"eval_steps_per_second": 0.755, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 3.4763405323028564, |
|
"learning_rate": 2.676665884553559e-07, |
|
"logits/chosen": -1.6974531412124634, |
|
"logits/rejected": -1.6716630458831787, |
|
"logps/chosen": -45.91952133178711, |
|
"logps/rejected": -240.70156860351562, |
|
"loss": 0.288, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.833568572998047, |
|
"rewards/margins": 19.545482635498047, |
|
"rewards/rejected": 11.288084030151367, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 2.333751916885376, |
|
"learning_rate": 2.401729275298753e-07, |
|
"logits/chosen": -1.663021445274353, |
|
"logits/rejected": -1.6606937646865845, |
|
"logps/chosen": -41.728370666503906, |
|
"logps/rejected": -257.09600830078125, |
|
"loss": 0.2824, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 31.06747817993164, |
|
"rewards/margins": 21.039501190185547, |
|
"rewards/rejected": 10.02797794342041, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 3.9069831371307373, |
|
"learning_rate": 2.1413416290723966e-07, |
|
"logits/chosen": -1.7814449071884155, |
|
"logits/rejected": -1.7398021221160889, |
|
"logps/chosen": -39.289886474609375, |
|
"logps/rejected": -252.5143585205078, |
|
"loss": 0.2494, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.424646377563477, |
|
"rewards/margins": 20.74696922302246, |
|
"rewards/rejected": 10.677675247192383, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 2.8962225914001465, |
|
"learning_rate": 1.8955825363760172e-07, |
|
"logits/chosen": -1.6453288793563843, |
|
"logits/rejected": -1.6407811641693115, |
|
"logps/chosen": -48.07708740234375, |
|
"logps/rejected": -257.2157287597656, |
|
"loss": 0.2834, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.098657608032227, |
|
"rewards/margins": 19.977779388427734, |
|
"rewards/rejected": 10.120881080627441, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 3.3172779083251953, |
|
"learning_rate": 1.6645271163242106e-07, |
|
"logits/chosen": -1.6090644598007202, |
|
"logits/rejected": -1.5861512422561646, |
|
"logps/chosen": -46.873756408691406, |
|
"logps/rejected": -253.38778686523438, |
|
"loss": 0.2934, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.051366806030273, |
|
"rewards/margins": 18.360876083374023, |
|
"rewards/rejected": 10.6904935836792, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 3.810804605484009, |
|
"learning_rate": 1.448245993683639e-07, |
|
"logits/chosen": -1.6912416219711304, |
|
"logits/rejected": -1.6398060321807861, |
|
"logps/chosen": -42.75129318237305, |
|
"logps/rejected": -255.9859619140625, |
|
"loss": 0.2546, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.15047836303711, |
|
"rewards/margins": 19.870410919189453, |
|
"rewards/rejected": 11.280068397521973, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 2.5805416107177734, |
|
"learning_rate": 1.2468052772857786e-07, |
|
"logits/chosen": -1.6538400650024414, |
|
"logits/rejected": -1.6332565546035767, |
|
"logps/chosen": -39.50425338745117, |
|
"logps/rejected": -243.5617218017578, |
|
"loss": 0.2568, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.246089935302734, |
|
"rewards/margins": 18.635494232177734, |
|
"rewards/rejected": 10.610593795776367, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 3.1274917125701904, |
|
"learning_rate": 1.060266539819932e-07, |
|
"logits/chosen": -1.642760992050171, |
|
"logits/rejected": -1.6645612716674805, |
|
"logps/chosen": -52.89581298828125, |
|
"logps/rejected": -240.6681671142578, |
|
"loss": 0.3194, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.198993682861328, |
|
"rewards/margins": 18.33091926574707, |
|
"rewards/rejected": 10.868074417114258, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 4.011273384094238, |
|
"learning_rate": 8.886867990128722e-08, |
|
"logits/chosen": -1.5743844509124756, |
|
"logits/rejected": -1.5511356592178345, |
|
"logps/chosen": -42.92226028442383, |
|
"logps/rejected": -239.4049072265625, |
|
"loss": 0.2702, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.1669921875, |
|
"rewards/margins": 20.256168365478516, |
|
"rewards/rejected": 10.91082763671875, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 3.313141345977783, |
|
"learning_rate": 7.321185002006848e-08, |
|
"logits/chosen": -1.7491827011108398, |
|
"logits/rejected": -1.710752248764038, |
|
"logps/chosen": -36.811859130859375, |
|
"logps/rejected": -276.8099365234375, |
|
"loss": 0.2412, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.183746337890625, |
|
"rewards/margins": 20.148881912231445, |
|
"rewards/rejected": 10.034868240356445, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 2.1705501079559326, |
|
"learning_rate": 5.906095002982615e-08, |
|
"logits/chosen": -1.6982301473617554, |
|
"logits/rejected": -1.6695678234100342, |
|
"logps/chosen": -39.64902877807617, |
|
"logps/rejected": -265.86370849609375, |
|
"loss": 0.258, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.876617431640625, |
|
"rewards/margins": 21.336835861206055, |
|
"rewards/rejected": 10.539777755737305, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 3.416759729385376, |
|
"learning_rate": 4.642030531712582e-08, |
|
"logits/chosen": -1.6781730651855469, |
|
"logits/rejected": -1.6320860385894775, |
|
"logps/chosen": -44.278289794921875, |
|
"logps/rejected": -270.3473205566406, |
|
"loss": 0.2723, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.628204345703125, |
|
"rewards/margins": 20.757869720458984, |
|
"rewards/rejected": 10.87033748626709, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 2.6415388584136963, |
|
"learning_rate": 3.5293779641508156e-08, |
|
"logits/chosen": -1.7743009328842163, |
|
"logits/rejected": -1.7475459575653076, |
|
"logps/chosen": -41.21794891357422, |
|
"logps/rejected": -267.9061584472656, |
|
"loss": 0.2469, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.46852684020996, |
|
"rewards/margins": 19.723846435546875, |
|
"rewards/rejected": 11.74467945098877, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 3.2879960536956787, |
|
"learning_rate": 2.5684773954482433e-08, |
|
"logits/chosen": -1.7669696807861328, |
|
"logits/rejected": -1.705436110496521, |
|
"logps/chosen": -38.20746612548828, |
|
"logps/rejected": -279.90460205078125, |
|
"loss": 0.2434, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 31.641986846923828, |
|
"rewards/margins": 21.152555465698242, |
|
"rewards/rejected": 10.489428520202637, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 2.513577461242676, |
|
"learning_rate": 1.7596225359988728e-08, |
|
"logits/chosen": -1.8220970630645752, |
|
"logits/rejected": -1.7889318466186523, |
|
"logps/chosen": -40.94774627685547, |
|
"logps/rejected": -259.34320068359375, |
|
"loss": 0.2511, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.275482177734375, |
|
"rewards/margins": 19.547029495239258, |
|
"rewards/rejected": 10.728452682495117, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 3.861309766769409, |
|
"learning_rate": 1.1030606216637097e-08, |
|
"logits/chosen": -1.4674508571624756, |
|
"logits/rejected": -1.425626277923584, |
|
"logps/chosen": -48.60122299194336, |
|
"logps/rejected": -243.1677703857422, |
|
"loss": 0.3138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 28.8472957611084, |
|
"rewards/margins": 19.466501235961914, |
|
"rewards/rejected": 9.380796432495117, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 3.0707430839538574, |
|
"learning_rate": 5.989923382003216e-09, |
|
"logits/chosen": -1.5614324808120728, |
|
"logits/rejected": -1.5531564950942993, |
|
"logps/chosen": -50.88349151611328, |
|
"logps/rejected": -241.66943359375, |
|
"loss": 0.3286, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.8480167388916, |
|
"rewards/margins": 17.89462661743164, |
|
"rewards/rejected": 11.953387260437012, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 1.1481070518493652, |
|
"learning_rate": 2.4757175992079496e-09, |
|
"logits/chosen": -1.654233694076538, |
|
"logits/rejected": -1.6192829608917236, |
|
"logps/chosen": -43.71112060546875, |
|
"logps/rejected": -246.0106201171875, |
|
"loss": 0.2606, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.80698013305664, |
|
"rewards/margins": 19.817432403564453, |
|
"rewards/rejected": 10.989545822143555, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 1.825456976890564, |
|
"learning_rate": 4.890630259724027e-10, |
|
"logits/chosen": -1.703809380531311, |
|
"logits/rejected": -1.6901493072509766, |
|
"logps/chosen": -53.00714874267578, |
|
"logps/rejected": -266.7369384765625, |
|
"loss": 0.3145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 30.395549774169922, |
|
"rewards/margins": 19.31685447692871, |
|
"rewards/rejected": 11.078699111938477, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1797, |
|
"total_flos": 5.920748380997222e+18, |
|
"train_loss": 0.5555986505650652, |
|
"train_runtime": 48641.0087, |
|
"train_samples_per_second": 0.296, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1797, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"total_flos": 5.920748380997222e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|