|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008, |
|
"grad_norm": 1.3400232791900635, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -2.951728105545044, |
|
"logits/rejected": -3.0115513801574707, |
|
"logps/chosen": -261.5080261230469, |
|
"logps/rejected": -337.26708984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004, |
|
"grad_norm": 1.3120373487472534, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"logits/chosen": -2.892902374267578, |
|
"logits/rejected": -2.8663315773010254, |
|
"logps/chosen": -327.0978088378906, |
|
"logps/rejected": -271.5657043457031, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.421875, |
|
"rewards/chosen": 0.0005052188062109053, |
|
"rewards/margins": 0.0005590975051745772, |
|
"rewards/rejected": -5.3878684411756694e-05, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 1.4112803936004639, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -2.8438823223114014, |
|
"logits/rejected": -2.8227336406707764, |
|
"logps/chosen": -278.82293701171875, |
|
"logps/rejected": -225.83895874023438, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.000398232601583004, |
|
"rewards/margins": 0.0003281077661085874, |
|
"rewards/rejected": -0.0007263403385877609, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.012, |
|
"grad_norm": 1.446423888206482, |
|
"learning_rate": 6.000000000000001e-07, |
|
"logits/chosen": -2.9413020610809326, |
|
"logits/rejected": -2.9195456504821777, |
|
"logps/chosen": -338.17449951171875, |
|
"logps/rejected": -264.4447021484375, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.00040648109279572964, |
|
"rewards/margins": -0.00015654772869311273, |
|
"rewards/rejected": -0.0002499335096217692, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 1.2574288845062256, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -2.8474764823913574, |
|
"logits/rejected": -2.80472993850708, |
|
"logps/chosen": -284.499755859375, |
|
"logps/rejected": -265.3078918457031, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00043911076500080526, |
|
"rewards/margins": 0.000450963998446241, |
|
"rewards/rejected": -1.1853216165036429e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.0634217262268066, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"logits/chosen": -2.9212899208068848, |
|
"logits/rejected": -2.8857905864715576, |
|
"logps/chosen": -282.70477294921875, |
|
"logps/rejected": -250.54092407226562, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 4.3422729504527524e-05, |
|
"rewards/margins": 0.0001133212135755457, |
|
"rewards/rejected": -6.989858957240358e-05, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.024, |
|
"grad_norm": 1.285556435585022, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -2.8694255352020264, |
|
"logits/rejected": -2.821131944656372, |
|
"logps/chosen": -248.3980255126953, |
|
"logps/rejected": -239.7887420654297, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0004804051131941378, |
|
"rewards/margins": 0.0003015303227584809, |
|
"rewards/rejected": 0.00017887470312416553, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.028, |
|
"grad_norm": 1.469935417175293, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"logits/chosen": -2.8213143348693848, |
|
"logits/rejected": -2.830141305923462, |
|
"logps/chosen": -260.638916015625, |
|
"logps/rejected": -252.28701782226562, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0009641313808970153, |
|
"rewards/margins": -0.0007172044133767486, |
|
"rewards/rejected": -0.00024692711303941905, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 1.6270555257797241, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -2.841813802719116, |
|
"logits/rejected": -2.8207640647888184, |
|
"logps/chosen": -225.63058471679688, |
|
"logps/rejected": -254.8463592529297, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 5.770945062977262e-05, |
|
"rewards/margins": 0.0010281356517225504, |
|
"rewards/rejected": -0.0009704261319711804, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.036, |
|
"grad_norm": 1.2644410133361816, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"logits/chosen": -2.8922643661499023, |
|
"logits/rejected": -2.8955490589141846, |
|
"logps/chosen": -262.701416015625, |
|
"logps/rejected": -257.9744567871094, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0004659576225094497, |
|
"rewards/margins": 0.0013163817347958684, |
|
"rewards/rejected": -0.0008504241704940796, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.38533353805542, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -2.8122503757476807, |
|
"logits/rejected": -2.768099069595337, |
|
"logps/chosen": -246.9253387451172, |
|
"logps/rejected": -221.22207641601562, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.00014323795039672405, |
|
"rewards/margins": 0.0004161189717706293, |
|
"rewards/rejected": -0.00027288100682199, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.044, |
|
"grad_norm": 1.1605815887451172, |
|
"learning_rate": 2.2e-06, |
|
"logits/chosen": -2.8503968715667725, |
|
"logits/rejected": -2.8304195404052734, |
|
"logps/chosen": -289.88092041015625, |
|
"logps/rejected": -304.974609375, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.00040247183642350137, |
|
"rewards/margins": -0.00022780350991524756, |
|
"rewards/rejected": -0.00017466834106016904, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 1.273954153060913, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -2.89465594291687, |
|
"logits/rejected": -2.875718832015991, |
|
"logps/chosen": -265.1037902832031, |
|
"logps/rejected": -274.2173767089844, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0005437713698484004, |
|
"rewards/margins": 0.001443098415620625, |
|
"rewards/rejected": -0.0008993271621875465, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.052, |
|
"grad_norm": 1.0502583980560303, |
|
"learning_rate": 2.6e-06, |
|
"logits/chosen": -2.862156391143799, |
|
"logits/rejected": -2.837881565093994, |
|
"logps/chosen": -242.22341918945312, |
|
"logps/rejected": -258.85699462890625, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.000915366574190557, |
|
"rewards/margins": 0.001800536410883069, |
|
"rewards/rejected": -0.0008851696038618684, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.056, |
|
"grad_norm": 1.203967571258545, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -2.8611202239990234, |
|
"logits/rejected": -2.8584847450256348, |
|
"logps/chosen": -256.2646179199219, |
|
"logps/rejected": -239.5492401123047, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0021997836884111166, |
|
"rewards/margins": 0.0023759277537465096, |
|
"rewards/rejected": -0.00017614415264688432, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.4282208681106567, |
|
"learning_rate": 3e-06, |
|
"logits/chosen": -2.921938180923462, |
|
"logits/rejected": -2.8600423336029053, |
|
"logps/chosen": -286.9979553222656, |
|
"logps/rejected": -258.0650939941406, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0020814109593629837, |
|
"rewards/margins": 0.0028700605034828186, |
|
"rewards/rejected": -0.0007886493694968522, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 1.4035444259643555, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -2.8771731853485107, |
|
"logits/rejected": -2.8718912601470947, |
|
"logps/chosen": -257.95562744140625, |
|
"logps/rejected": -248.6924285888672, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.004527095705270767, |
|
"rewards/margins": 0.003182282205671072, |
|
"rewards/rejected": 0.0013448137324303389, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.068, |
|
"grad_norm": 1.4235211610794067, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"logits/chosen": -2.9201653003692627, |
|
"logits/rejected": -2.8719587326049805, |
|
"logps/chosen": -307.7834777832031, |
|
"logps/rejected": -268.5202941894531, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.005866583436727524, |
|
"rewards/margins": 0.0034526665695011616, |
|
"rewards/rejected": 0.0024139168672263622, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.072, |
|
"grad_norm": 1.1721140146255493, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -2.8801143169403076, |
|
"logits/rejected": -2.844945192337036, |
|
"logps/chosen": -238.00100708007812, |
|
"logps/rejected": -238.72006225585938, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.005659895483404398, |
|
"rewards/margins": 0.0026023960672318935, |
|
"rewards/rejected": 0.003057498950511217, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.076, |
|
"grad_norm": 1.2834060192108154, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"logits/chosen": -2.896768093109131, |
|
"logits/rejected": -2.9003615379333496, |
|
"logps/chosen": -268.2308654785156, |
|
"logps/rejected": -241.99667358398438, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0076117864809930325, |
|
"rewards/margins": 0.005503328982740641, |
|
"rewards/rejected": 0.0021084570325911045, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.233703374862671, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -2.8716769218444824, |
|
"logits/rejected": -2.8608012199401855, |
|
"logps/chosen": -292.55133056640625, |
|
"logps/rejected": -255.74276733398438, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.009478461928665638, |
|
"rewards/margins": 0.007306996732950211, |
|
"rewards/rejected": 0.00217146473005414, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": -2.890129566192627, |
|
"eval_logits/rejected": -2.8480653762817383, |
|
"eval_logps/chosen": -282.2447204589844, |
|
"eval_logps/rejected": -247.7537384033203, |
|
"eval_loss": 0.6896404027938843, |
|
"eval_rewards/accuracies": 0.6626983880996704, |
|
"eval_rewards/chosen": 0.00993373617529869, |
|
"eval_rewards/margins": 0.007174656726419926, |
|
"eval_rewards/rejected": 0.0027590803802013397, |
|
"eval_runtime": 167.6825, |
|
"eval_samples_per_second": 2.982, |
|
"eval_steps_per_second": 0.376, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.084, |
|
"grad_norm": 1.3204169273376465, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"logits/chosen": -2.8663666248321533, |
|
"logits/rejected": -2.820469856262207, |
|
"logps/chosen": -272.216552734375, |
|
"logps/rejected": -261.4291076660156, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.008843997493386269, |
|
"rewards/margins": 0.005469636060297489, |
|
"rewards/rejected": 0.003374360501766205, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.088, |
|
"grad_norm": 1.2558304071426392, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -2.935101270675659, |
|
"logits/rejected": -2.903439521789551, |
|
"logps/chosen": -251.76089477539062, |
|
"logps/rejected": -246.2103729248047, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.010050037875771523, |
|
"rewards/margins": 0.00540496688336134, |
|
"rewards/rejected": 0.004645070992410183, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.092, |
|
"grad_norm": 2.3479883670806885, |
|
"learning_rate": 4.600000000000001e-06, |
|
"logits/chosen": -2.8293395042419434, |
|
"logits/rejected": -2.8186140060424805, |
|
"logps/chosen": -225.02096557617188, |
|
"logps/rejected": -294.81719970703125, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.012401686981320381, |
|
"rewards/margins": 0.01355154998600483, |
|
"rewards/rejected": -0.0011498630046844482, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 3.1538350582122803, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -2.7399497032165527, |
|
"logits/rejected": -2.749803066253662, |
|
"logps/chosen": -275.8015441894531, |
|
"logps/rejected": -249.06002807617188, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.01128525473177433, |
|
"rewards/margins": 0.009098999202251434, |
|
"rewards/rejected": 0.002186256693676114, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.5341688394546509, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -2.9408421516418457, |
|
"logits/rejected": -2.924834728240967, |
|
"logps/chosen": -310.1289367675781, |
|
"logps/rejected": -280.08770751953125, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.012295490130782127, |
|
"rewards/margins": 0.01287610363215208, |
|
"rewards/rejected": -0.0005806152475997806, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.104, |
|
"grad_norm": 1.4902671575546265, |
|
"learning_rate": 4.999756310023261e-06, |
|
"logits/chosen": -2.889176845550537, |
|
"logits/rejected": -2.896610975265503, |
|
"logps/chosen": -286.29925537109375, |
|
"logps/rejected": -309.28265380859375, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.00844976119697094, |
|
"rewards/margins": 0.011819533072412014, |
|
"rewards/rejected": -0.0033697723411023617, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.108, |
|
"grad_norm": 1.4190293550491333, |
|
"learning_rate": 4.999025287600886e-06, |
|
"logits/chosen": -2.892551898956299, |
|
"logits/rejected": -2.908194065093994, |
|
"logps/chosen": -274.4278869628906, |
|
"logps/rejected": -265.5429382324219, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.012322185561060905, |
|
"rewards/margins": 0.025636225938796997, |
|
"rewards/rejected": -0.013314038515090942, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.112, |
|
"grad_norm": 1.3311868906021118, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": -2.8810911178588867, |
|
"logits/rejected": -2.859647750854492, |
|
"logps/chosen": -247.0091094970703, |
|
"logps/rejected": -236.913818359375, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.004415545146912336, |
|
"rewards/margins": 0.009412359446287155, |
|
"rewards/rejected": -0.013827905058860779, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.116, |
|
"grad_norm": 1.286658525466919, |
|
"learning_rate": 4.996101910454953e-06, |
|
"logits/chosen": -2.9036033153533936, |
|
"logits/rejected": -2.8598740100860596, |
|
"logps/chosen": -273.776123046875, |
|
"logps/rejected": -244.0740509033203, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.0010833339765667915, |
|
"rewards/margins": 0.026883091777563095, |
|
"rewards/rejected": -0.02579975686967373, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.6749712228775024, |
|
"learning_rate": 4.993910125649561e-06, |
|
"logits/chosen": -2.891787528991699, |
|
"logits/rejected": -2.856782913208008, |
|
"logps/chosen": -293.8242492675781, |
|
"logps/rejected": -247.73068237304688, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.010322624817490578, |
|
"rewards/margins": 0.025890743359923363, |
|
"rewards/rejected": -0.015568114817142487, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.124, |
|
"grad_norm": 1.5311990976333618, |
|
"learning_rate": 4.9912321481237616e-06, |
|
"logits/chosen": -2.7779576778411865, |
|
"logits/rejected": -2.774000883102417, |
|
"logps/chosen": -231.47427368164062, |
|
"logps/rejected": -290.8375549316406, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0004987965803593397, |
|
"rewards/margins": 0.019459182396531105, |
|
"rewards/rejected": -0.018960384652018547, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 1.5341625213623047, |
|
"learning_rate": 4.988068499954578e-06, |
|
"logits/chosen": -2.890897512435913, |
|
"logits/rejected": -2.889413356781006, |
|
"logps/chosen": -316.82025146484375, |
|
"logps/rejected": -312.2231140136719, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.031246433034539223, |
|
"rewards/margins": 0.045340072363615036, |
|
"rewards/rejected": -0.014093644917011261, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.132, |
|
"grad_norm": 1.6371203660964966, |
|
"learning_rate": 4.984419797901491e-06, |
|
"logits/chosen": -2.9241220951080322, |
|
"logits/rejected": -2.9127113819122314, |
|
"logps/chosen": -311.5802001953125, |
|
"logps/rejected": -282.0766906738281, |
|
"loss": 0.6702, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03161744773387909, |
|
"rewards/margins": 0.04772466421127319, |
|
"rewards/rejected": -0.016107218340039253, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.136, |
|
"grad_norm": 1.5368744134902954, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": -2.917865514755249, |
|
"logits/rejected": -2.9101455211639404, |
|
"logps/chosen": -275.5201110839844, |
|
"logps/rejected": -273.40301513671875, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.02875286340713501, |
|
"rewards/margins": 0.03745796158909798, |
|
"rewards/rejected": -0.008705099113285542, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.5606811046600342, |
|
"learning_rate": 4.975670171853926e-06, |
|
"logits/chosen": -2.881425380706787, |
|
"logits/rejected": -2.82181978225708, |
|
"logps/chosen": -268.76611328125, |
|
"logps/rejected": -241.20675659179688, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.01504159439355135, |
|
"rewards/margins": 0.04509888216853142, |
|
"rewards/rejected": -0.030057286843657494, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.144, |
|
"grad_norm": 1.5441210269927979, |
|
"learning_rate": 4.970570953616383e-06, |
|
"logits/chosen": -2.871035099029541, |
|
"logits/rejected": -2.8470711708068848, |
|
"logps/chosen": -271.70977783203125, |
|
"logps/rejected": -250.18887329101562, |
|
"loss": 0.6577, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.02740972302854061, |
|
"rewards/margins": 0.07525759935379028, |
|
"rewards/rejected": -0.04784787446260452, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.148, |
|
"grad_norm": 1.7091320753097534, |
|
"learning_rate": 4.964990092676263e-06, |
|
"logits/chosen": -2.827620506286621, |
|
"logits/rejected": -2.824796676635742, |
|
"logps/chosen": -272.52923583984375, |
|
"logps/rejected": -226.22506713867188, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0010557698551565409, |
|
"rewards/margins": 0.03629080206155777, |
|
"rewards/rejected": -0.03734657168388367, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.152, |
|
"grad_norm": 1.8644754886627197, |
|
"learning_rate": 4.958928677033465e-06, |
|
"logits/chosen": -2.8307862281799316, |
|
"logits/rejected": -2.8190550804138184, |
|
"logps/chosen": -276.7012023925781, |
|
"logps/rejected": -289.3759765625, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.007176427636295557, |
|
"rewards/margins": 0.06312907487154007, |
|
"rewards/rejected": -0.05595264956355095, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.156, |
|
"grad_norm": 1.796454906463623, |
|
"learning_rate": 4.9523878883729794e-06, |
|
"logits/chosen": -2.8766260147094727, |
|
"logits/rejected": -2.8518166542053223, |
|
"logps/chosen": -288.412841796875, |
|
"logps/rejected": -255.20870971679688, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.004013822879642248, |
|
"rewards/margins": 0.0801478773355484, |
|
"rewards/rejected": -0.07613405585289001, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.861384391784668, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -2.8510830402374268, |
|
"logits/rejected": -2.8279526233673096, |
|
"logps/chosen": -255.71249389648438, |
|
"logps/rejected": -257.49163818359375, |
|
"loss": 0.653, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.003837744938209653, |
|
"rewards/margins": 0.08905676007270813, |
|
"rewards/rejected": -0.09289450943470001, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -2.874197244644165, |
|
"eval_logits/rejected": -2.8339242935180664, |
|
"eval_logps/chosen": -284.5635070800781, |
|
"eval_logps/rejected": -257.5692443847656, |
|
"eval_loss": 0.6569300293922424, |
|
"eval_rewards/accuracies": 0.6865079402923584, |
|
"eval_rewards/chosen": -0.013254065066576004, |
|
"eval_rewards/margins": 0.08214230835437775, |
|
"eval_rewards/rejected": -0.09539636969566345, |
|
"eval_runtime": 166.8148, |
|
"eval_samples_per_second": 2.997, |
|
"eval_steps_per_second": 0.378, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.164, |
|
"grad_norm": 1.9093883037567139, |
|
"learning_rate": 4.937873385763909e-06, |
|
"logits/chosen": -2.862431764602661, |
|
"logits/rejected": -2.829721689224243, |
|
"logps/chosen": -287.107177734375, |
|
"logps/rejected": -284.347412109375, |
|
"loss": 0.6582, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03399428352713585, |
|
"rewards/margins": 0.07983867824077606, |
|
"rewards/rejected": -0.1138329729437828, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.168, |
|
"grad_norm": 1.8923628330230713, |
|
"learning_rate": 4.9299025014463665e-06, |
|
"logits/chosen": -2.8810172080993652, |
|
"logits/rejected": -2.8641536235809326, |
|
"logps/chosen": -248.9971466064453, |
|
"logps/rejected": -245.23355102539062, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.046056050807237625, |
|
"rewards/margins": 0.05324975773692131, |
|
"rewards/rejected": -0.09930581599473953, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.172, |
|
"grad_norm": 1.887384057044983, |
|
"learning_rate": 4.921457902821578e-06, |
|
"logits/chosen": -2.8637945652008057, |
|
"logits/rejected": -2.8070335388183594, |
|
"logps/chosen": -316.33355712890625, |
|
"logps/rejected": -285.98590087890625, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.018491100519895554, |
|
"rewards/margins": 0.06612573564052582, |
|
"rewards/rejected": -0.08461683988571167, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.176, |
|
"grad_norm": 2.3003458976745605, |
|
"learning_rate": 4.912541236180779e-06, |
|
"logits/chosen": -2.7972946166992188, |
|
"logits/rejected": -2.7600889205932617, |
|
"logps/chosen": -325.50244140625, |
|
"logps/rejected": -316.9506530761719, |
|
"loss": 0.6418, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.016769036650657654, |
|
"rewards/margins": 0.11797495931386948, |
|
"rewards/rejected": -0.13474401831626892, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.6942790746688843, |
|
"learning_rate": 4.903154239845798e-06, |
|
"logits/chosen": -2.8859641551971436, |
|
"logits/rejected": -2.8270339965820312, |
|
"logps/chosen": -271.8771667480469, |
|
"logps/rejected": -247.03146362304688, |
|
"loss": 0.6483, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.06943339854478836, |
|
"rewards/margins": 0.10388622432947159, |
|
"rewards/rejected": -0.17331962287425995, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.184, |
|
"grad_norm": 2.1547300815582275, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": -2.793713092803955, |
|
"logits/rejected": -2.806272029876709, |
|
"logps/chosen": -302.48150634765625, |
|
"logps/rejected": -302.66973876953125, |
|
"loss": 0.6308, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.04446331784129143, |
|
"rewards/margins": 0.1429525464773178, |
|
"rewards/rejected": -0.18741586804389954, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.188, |
|
"grad_norm": 2.574212074279785, |
|
"learning_rate": 4.882976669482368e-06, |
|
"logits/chosen": -2.8080554008483887, |
|
"logits/rejected": -2.778135061264038, |
|
"logps/chosen": -275.0320129394531, |
|
"logps/rejected": -280.06610107421875, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.09150179475545883, |
|
"rewards/margins": 0.12502221763134003, |
|
"rewards/rejected": -0.21652403473854065, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 2.8856008052825928, |
|
"learning_rate": 4.8721900291112415e-06, |
|
"logits/chosen": -2.8577849864959717, |
|
"logits/rejected": -2.836282253265381, |
|
"logps/chosen": -291.05352783203125, |
|
"logps/rejected": -275.5093078613281, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.016148822382092476, |
|
"rewards/margins": 0.11701644957065582, |
|
"rewards/rejected": -0.13316525518894196, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.196, |
|
"grad_norm": 2.3244190216064453, |
|
"learning_rate": 4.860940925593703e-06, |
|
"logits/chosen": -2.8803956508636475, |
|
"logits/rejected": -2.8489887714385986, |
|
"logps/chosen": -288.46856689453125, |
|
"logps/rejected": -274.30364990234375, |
|
"loss": 0.637, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.012815827503800392, |
|
"rewards/margins": 0.13950756192207336, |
|
"rewards/rejected": -0.12669174373149872, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.1376349925994873, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -2.8547749519348145, |
|
"logits/rejected": -2.8269331455230713, |
|
"logps/chosen": -254.5790557861328, |
|
"logps/rejected": -242.4806365966797, |
|
"loss": 0.6555, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.016418656334280968, |
|
"rewards/margins": 0.09427244961261749, |
|
"rewards/rejected": -0.11069109290838242, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.204, |
|
"grad_norm": 3.7023279666900635, |
|
"learning_rate": 4.837064190990036e-06, |
|
"logits/chosen": -2.7928366661071777, |
|
"logits/rejected": -2.8065311908721924, |
|
"logps/chosen": -286.6212158203125, |
|
"logps/rejected": -284.4375, |
|
"loss": 0.6467, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.05689004808664322, |
|
"rewards/margins": 0.11735578626394272, |
|
"rewards/rejected": -0.17424583435058594, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.208, |
|
"grad_norm": 3.1189470291137695, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": -2.826845169067383, |
|
"logits/rejected": -2.839430332183838, |
|
"logps/chosen": -330.69378662109375, |
|
"logps/rejected": -295.32781982421875, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11839203536510468, |
|
"rewards/margins": 0.09098449349403381, |
|
"rewards/rejected": -0.2093765288591385, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.212, |
|
"grad_norm": 5.202083110809326, |
|
"learning_rate": 4.811365084030784e-06, |
|
"logits/chosen": -2.7906274795532227, |
|
"logits/rejected": -2.7402591705322266, |
|
"logps/chosen": -240.4190673828125, |
|
"logps/rejected": -258.24639892578125, |
|
"loss": 0.6288, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13684885203838348, |
|
"rewards/margins": 0.1477588713169098, |
|
"rewards/rejected": -0.28460773825645447, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.216, |
|
"grad_norm": 4.058558940887451, |
|
"learning_rate": 4.7978383481380865e-06, |
|
"logits/chosen": -2.8255257606506348, |
|
"logits/rejected": -2.827247381210327, |
|
"logps/chosen": -284.66058349609375, |
|
"logps/rejected": -326.15008544921875, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1267203986644745, |
|
"rewards/margins": 0.1748858392238617, |
|
"rewards/rejected": -0.3016062378883362, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.6630849838256836, |
|
"learning_rate": 4.783863644106502e-06, |
|
"logits/chosen": -2.8801255226135254, |
|
"logits/rejected": -2.8717246055603027, |
|
"logps/chosen": -279.7027893066406, |
|
"logps/rejected": -273.7590026855469, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.12922950088977814, |
|
"rewards/margins": 0.1460111439228058, |
|
"rewards/rejected": -0.2752406597137451, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 3.321779251098633, |
|
"learning_rate": 4.769443696332272e-06, |
|
"logits/chosen": -2.8768434524536133, |
|
"logits/rejected": -2.8461215496063232, |
|
"logps/chosen": -292.112548828125, |
|
"logps/rejected": -293.89886474609375, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.08245956897735596, |
|
"rewards/margins": 0.16300079226493835, |
|
"rewards/rejected": -0.24546034634113312, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.228, |
|
"grad_norm": 2.9796993732452393, |
|
"learning_rate": 4.754581316012785e-06, |
|
"logits/chosen": -2.875678300857544, |
|
"logits/rejected": -2.8014907836914062, |
|
"logps/chosen": -321.65411376953125, |
|
"logps/rejected": -297.4300842285156, |
|
"loss": 0.6025, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.09748764336109161, |
|
"rewards/margins": 0.22937169671058655, |
|
"rewards/rejected": -0.32685935497283936, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.232, |
|
"grad_norm": 3.133993148803711, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": -2.8018627166748047, |
|
"logits/rejected": -2.7942662239074707, |
|
"logps/chosen": -292.52667236328125, |
|
"logps/rejected": -271.00030517578125, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.21974220871925354, |
|
"rewards/margins": 0.23409290611743927, |
|
"rewards/rejected": -0.4538350999355316, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.236, |
|
"grad_norm": 3.11313796043396, |
|
"learning_rate": 4.723540933228245e-06, |
|
"logits/chosen": -2.820415496826172, |
|
"logits/rejected": -2.796581745147705, |
|
"logps/chosen": -329.7557678222656, |
|
"logps/rejected": -321.95233154296875, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3443993926048279, |
|
"rewards/margins": 0.10115940868854523, |
|
"rewards/rejected": -0.4455588757991791, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.609873056411743, |
|
"learning_rate": 4.707368982147318e-06, |
|
"logits/chosen": -2.8759727478027344, |
|
"logits/rejected": -2.825862407684326, |
|
"logps/chosen": -333.0445556640625, |
|
"logps/rejected": -285.5638122558594, |
|
"loss": 0.6385, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3127234876155853, |
|
"rewards/margins": 0.1507173478603363, |
|
"rewards/rejected": -0.46344083547592163, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": -2.8398690223693848, |
|
"eval_logits/rejected": -2.803138494491577, |
|
"eval_logps/chosen": -310.65655517578125, |
|
"eval_logps/rejected": -295.55364990234375, |
|
"eval_loss": 0.6190334558486938, |
|
"eval_rewards/accuracies": 0.6904761791229248, |
|
"eval_rewards/chosen": -0.27418458461761475, |
|
"eval_rewards/margins": 0.20105606317520142, |
|
"eval_rewards/rejected": -0.47524064779281616, |
|
"eval_runtime": 167.0651, |
|
"eval_samples_per_second": 2.993, |
|
"eval_steps_per_second": 0.377, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.244, |
|
"grad_norm": 5.130188941955566, |
|
"learning_rate": 4.690766700109659e-06, |
|
"logits/chosen": -2.814621925354004, |
|
"logits/rejected": -2.7681021690368652, |
|
"logps/chosen": -254.0991973876953, |
|
"logps/rejected": -227.88961791992188, |
|
"loss": 0.6404, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3078501522541046, |
|
"rewards/margins": 0.14654883742332458, |
|
"rewards/rejected": -0.4543989598751068, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.248, |
|
"grad_norm": 2.648085832595825, |
|
"learning_rate": 4.673737323763048e-06, |
|
"logits/chosen": -2.870044708251953, |
|
"logits/rejected": -2.8921992778778076, |
|
"logps/chosen": -322.07037353515625, |
|
"logps/rejected": -309.27874755859375, |
|
"loss": 0.5945, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.23309031128883362, |
|
"rewards/margins": 0.25641053915023804, |
|
"rewards/rejected": -0.48950082063674927, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.252, |
|
"grad_norm": 2.9251554012298584, |
|
"learning_rate": 4.656284173018144e-06, |
|
"logits/chosen": -2.796626567840576, |
|
"logits/rejected": -2.777113437652588, |
|
"logps/chosen": -303.8788146972656, |
|
"logps/rejected": -336.8778991699219, |
|
"loss": 0.6197, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.22741813957691193, |
|
"rewards/margins": 0.18721428513526917, |
|
"rewards/rejected": -0.4146324098110199, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 3.248079776763916, |
|
"learning_rate": 4.638410650401267e-06, |
|
"logits/chosen": -2.868063449859619, |
|
"logits/rejected": -2.8760440349578857, |
|
"logps/chosen": -310.02423095703125, |
|
"logps/rejected": -324.77301025390625, |
|
"loss": 0.6175, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.21097686886787415, |
|
"rewards/margins": 0.21348261833190918, |
|
"rewards/rejected": -0.42445945739746094, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.228531837463379, |
|
"learning_rate": 4.620120240391065e-06, |
|
"logits/chosen": -2.8391566276550293, |
|
"logits/rejected": -2.864546775817871, |
|
"logps/chosen": -333.9083251953125, |
|
"logps/rejected": -310.19256591796875, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.18001362681388855, |
|
"rewards/margins": 0.2388877123594284, |
|
"rewards/rejected": -0.41890135407447815, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.264, |
|
"grad_norm": 4.000072479248047, |
|
"learning_rate": 4.601416508739211e-06, |
|
"logits/chosen": -2.768284320831299, |
|
"logits/rejected": -2.7349295616149902, |
|
"logps/chosen": -294.70703125, |
|
"logps/rejected": -288.8208923339844, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.18951205909252167, |
|
"rewards/margins": 0.23551206290721893, |
|
"rewards/rejected": -0.4250241816043854, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.268, |
|
"grad_norm": 4.16896915435791, |
|
"learning_rate": 4.582303101775249e-06, |
|
"logits/chosen": -2.7774980068206787, |
|
"logits/rejected": -2.754678726196289, |
|
"logps/chosen": -302.11602783203125, |
|
"logps/rejected": -277.4075012207031, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.25999313592910767, |
|
"rewards/margins": 0.24224285781383514, |
|
"rewards/rejected": -0.502236008644104, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.272, |
|
"grad_norm": 3.032811403274536, |
|
"learning_rate": 4.562783745695738e-06, |
|
"logits/chosen": -2.7642288208007812, |
|
"logits/rejected": -2.8096585273742676, |
|
"logps/chosen": -216.1359100341797, |
|
"logps/rejected": -250.99014282226562, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.3114262819290161, |
|
"rewards/margins": 0.21660414338111877, |
|
"rewards/rejected": -0.5280304551124573, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.276, |
|
"grad_norm": 4.874217987060547, |
|
"learning_rate": 4.542862245837821e-06, |
|
"logits/chosen": -2.868809700012207, |
|
"logits/rejected": -2.817004680633545, |
|
"logps/chosen": -327.43060302734375, |
|
"logps/rejected": -330.1990966796875, |
|
"loss": 0.5829, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.24460521340370178, |
|
"rewards/margins": 0.30477675795555115, |
|
"rewards/rejected": -0.5493819117546082, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 4.413153648376465, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": -2.7310781478881836, |
|
"logits/rejected": -2.7017111778259277, |
|
"logps/chosen": -269.2838134765625, |
|
"logps/rejected": -288.1488952636719, |
|
"loss": 0.6175, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.31993380188941956, |
|
"rewards/margins": 0.23448209464550018, |
|
"rewards/rejected": -0.5544158220291138, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.284, |
|
"grad_norm": 3.4240481853485107, |
|
"learning_rate": 4.501828427371834e-06, |
|
"logits/chosen": -2.8249359130859375, |
|
"logits/rejected": -2.7783515453338623, |
|
"logps/chosen": -279.47833251953125, |
|
"logps/rejected": -266.2778015136719, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2718459963798523, |
|
"rewards/margins": 0.22681903839111328, |
|
"rewards/rejected": -0.4986650049686432, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 4.139309406280518, |
|
"learning_rate": 4.4807241083879774e-06, |
|
"logits/chosen": -2.8446147441864014, |
|
"logits/rejected": -2.8624658584594727, |
|
"logps/chosen": -301.0242614746094, |
|
"logps/rejected": -329.738037109375, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.29846441745758057, |
|
"rewards/margins": 0.22665563225746155, |
|
"rewards/rejected": -0.5251200199127197, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.292, |
|
"grad_norm": 3.9606006145477295, |
|
"learning_rate": 4.4592336433146e-06, |
|
"logits/chosen": -2.829144239425659, |
|
"logits/rejected": -2.8264288902282715, |
|
"logps/chosen": -306.7838439941406, |
|
"logps/rejected": -312.4410095214844, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3578701913356781, |
|
"rewards/margins": 0.22671571373939514, |
|
"rewards/rejected": -0.5845859050750732, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.296, |
|
"grad_norm": 4.797597885131836, |
|
"learning_rate": 4.437361221760449e-06, |
|
"logits/chosen": -2.8679544925689697, |
|
"logits/rejected": -2.8508830070495605, |
|
"logps/chosen": -314.5845947265625, |
|
"logps/rejected": -294.0763244628906, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.30855458974838257, |
|
"rewards/margins": 0.29460665583610535, |
|
"rewards/rejected": -0.6031612157821655, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 4.122452259063721, |
|
"learning_rate": 4.415111107797445e-06, |
|
"logits/chosen": -2.7813663482666016, |
|
"logits/rejected": -2.6964869499206543, |
|
"logps/chosen": -304.09637451171875, |
|
"logps/rejected": -295.1402587890625, |
|
"loss": 0.635, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.27096718549728394, |
|
"rewards/margins": 0.20372644066810608, |
|
"rewards/rejected": -0.4746936857700348, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.304, |
|
"grad_norm": 3.014270305633545, |
|
"learning_rate": 4.3924876391293915e-06, |
|
"logits/chosen": -2.813854217529297, |
|
"logits/rejected": -2.7791194915771484, |
|
"logps/chosen": -274.0520935058594, |
|
"logps/rejected": -276.63140869140625, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.14454008638858795, |
|
"rewards/margins": 0.22425620257854462, |
|
"rewards/rejected": -0.36879628896713257, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.308, |
|
"grad_norm": 3.01690936088562, |
|
"learning_rate": 4.36949522624633e-06, |
|
"logits/chosen": -2.8495421409606934, |
|
"logits/rejected": -2.8250906467437744, |
|
"logps/chosen": -324.4741516113281, |
|
"logps/rejected": -310.1029968261719, |
|
"loss": 0.5852, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.11460743099451065, |
|
"rewards/margins": 0.2905317544937134, |
|
"rewards/rejected": -0.4051392078399658, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.312, |
|
"grad_norm": 6.7993245124816895, |
|
"learning_rate": 4.346138351564711e-06, |
|
"logits/chosen": -2.8487377166748047, |
|
"logits/rejected": -2.778311252593994, |
|
"logps/chosen": -363.527587890625, |
|
"logps/rejected": -311.5059509277344, |
|
"loss": 0.6315, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.21368345618247986, |
|
"rewards/margins": 0.20435233414173126, |
|
"rewards/rejected": -0.4180358052253723, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.316, |
|
"grad_norm": 4.2425737380981445, |
|
"learning_rate": 4.322421568553529e-06, |
|
"logits/chosen": -2.8648476600646973, |
|
"logits/rejected": -2.8160643577575684, |
|
"logps/chosen": -383.7563781738281, |
|
"logps/rejected": -340.1937561035156, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1808895766735077, |
|
"rewards/margins": 0.22633692622184753, |
|
"rewards/rejected": -0.40722647309303284, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.8491086959838867, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"logits/chosen": -2.879483461380005, |
|
"logits/rejected": -2.8419952392578125, |
|
"logps/chosen": -320.96197509765625, |
|
"logps/rejected": -318.02056884765625, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.22649240493774414, |
|
"rewards/margins": 0.3563699424266815, |
|
"rewards/rejected": -0.5828623175621033, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/chosen": -2.8437469005584717, |
|
"eval_logits/rejected": -2.8083014488220215, |
|
"eval_logps/chosen": -312.95733642578125, |
|
"eval_logps/rejected": -305.2159423828125, |
|
"eval_loss": 0.6026533246040344, |
|
"eval_rewards/accuracies": 0.6944444179534912, |
|
"eval_rewards/chosen": -0.29719212651252747, |
|
"eval_rewards/margins": 0.27467086911201477, |
|
"eval_rewards/rejected": -0.5718629360198975, |
|
"eval_runtime": 166.7775, |
|
"eval_samples_per_second": 2.998, |
|
"eval_steps_per_second": 0.378, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.324, |
|
"grad_norm": 5.923827648162842, |
|
"learning_rate": 4.273926841341303e-06, |
|
"logits/chosen": -2.8288912773132324, |
|
"logits/rejected": -2.811527967453003, |
|
"logps/chosen": -270.85467529296875, |
|
"logps/rejected": -300.5176086425781, |
|
"loss": 0.6137, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3265763223171234, |
|
"rewards/margins": 0.2849840521812439, |
|
"rewards/rejected": -0.6115604639053345, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.328, |
|
"grad_norm": 4.179617404937744, |
|
"learning_rate": 4.249158351283414e-06, |
|
"logits/chosen": -2.830479860305786, |
|
"logits/rejected": -2.7938389778137207, |
|
"logps/chosen": -298.1662902832031, |
|
"logps/rejected": -311.0763244628906, |
|
"loss": 0.6207, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3962728977203369, |
|
"rewards/margins": 0.26946958899497986, |
|
"rewards/rejected": -0.6657425165176392, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.332, |
|
"grad_norm": 4.05703592300415, |
|
"learning_rate": 4.224048859339175e-06, |
|
"logits/chosen": -2.8106844425201416, |
|
"logits/rejected": -2.793975591659546, |
|
"logps/chosen": -320.8597717285156, |
|
"logps/rejected": -314.65643310546875, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.2955939471721649, |
|
"rewards/margins": 0.317967027425766, |
|
"rewards/rejected": -0.6135609745979309, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.336, |
|
"grad_norm": 3.603731632232666, |
|
"learning_rate": 4.198603260653792e-06, |
|
"logits/chosen": -2.828977108001709, |
|
"logits/rejected": -2.8087880611419678, |
|
"logps/chosen": -318.9561767578125, |
|
"logps/rejected": -296.2648010253906, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.312124639749527, |
|
"rewards/margins": 0.2338072955608368, |
|
"rewards/rejected": -0.5459319353103638, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.502253293991089, |
|
"learning_rate": 4.172826515897146e-06, |
|
"logits/chosen": -2.840507984161377, |
|
"logits/rejected": -2.8030855655670166, |
|
"logps/chosen": -284.6482238769531, |
|
"logps/rejected": -301.00970458984375, |
|
"loss": 0.5747, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.24788649380207062, |
|
"rewards/margins": 0.3554701507091522, |
|
"rewards/rejected": -0.6033565998077393, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.344, |
|
"grad_norm": 3.493734359741211, |
|
"learning_rate": 4.146723650296701e-06, |
|
"logits/chosen": -2.842768430709839, |
|
"logits/rejected": -2.8307838439941406, |
|
"logps/chosen": -303.18511962890625, |
|
"logps/rejected": -299.94586181640625, |
|
"loss": 0.5988, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.238745778799057, |
|
"rewards/margins": 0.2844436764717102, |
|
"rewards/rejected": -0.5231894254684448, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.348, |
|
"grad_norm": 4.337499618530273, |
|
"learning_rate": 4.120299752657828e-06, |
|
"logits/chosen": -2.820674180984497, |
|
"logits/rejected": -2.8131017684936523, |
|
"logps/chosen": -309.2215270996094, |
|
"logps/rejected": -302.0687561035156, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.20002242922782898, |
|
"rewards/margins": 0.3058350086212158, |
|
"rewards/rejected": -0.5058574080467224, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 4.533146381378174, |
|
"learning_rate": 4.093559974371725e-06, |
|
"logits/chosen": -2.8260464668273926, |
|
"logits/rejected": -2.8369338512420654, |
|
"logps/chosen": -313.92413330078125, |
|
"logps/rejected": -341.5757141113281, |
|
"loss": 0.5944, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.24169588088989258, |
|
"rewards/margins": 0.32093319296836853, |
|
"rewards/rejected": -0.562628984451294, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.356, |
|
"grad_norm": 6.319594860076904, |
|
"learning_rate": 4.066509528411151e-06, |
|
"logits/chosen": -2.7408385276794434, |
|
"logits/rejected": -2.70278000831604, |
|
"logps/chosen": -280.9513854980469, |
|
"logps/rejected": -315.61260986328125, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.34108084440231323, |
|
"rewards/margins": 0.41155967116355896, |
|
"rewards/rejected": -0.7526406049728394, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 7.433955192565918, |
|
"learning_rate": 4.039153688314146e-06, |
|
"logits/chosen": -2.865485906600952, |
|
"logits/rejected": -2.8098623752593994, |
|
"logps/chosen": -350.09820556640625, |
|
"logps/rejected": -329.5234680175781, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.49973931908607483, |
|
"rewards/margins": 0.3033995032310486, |
|
"rewards/rejected": -0.8031389117240906, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.364, |
|
"grad_norm": 4.991713047027588, |
|
"learning_rate": 4.011497787155938e-06, |
|
"logits/chosen": -2.777055501937866, |
|
"logits/rejected": -2.717963695526123, |
|
"logps/chosen": -340.73223876953125, |
|
"logps/rejected": -334.4681091308594, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6034590005874634, |
|
"rewards/margins": 0.3287494480609894, |
|
"rewards/rejected": -0.9322085380554199, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.368, |
|
"grad_norm": 4.300924301147461, |
|
"learning_rate": 3.983547216509254e-06, |
|
"logits/chosen": -2.8502197265625, |
|
"logits/rejected": -2.8119924068450928, |
|
"logps/chosen": -382.5280456542969, |
|
"logps/rejected": -336.8903503417969, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.48744526505470276, |
|
"rewards/margins": 0.3749392628669739, |
|
"rewards/rejected": -0.8623844981193542, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.372, |
|
"grad_norm": 4.1107025146484375, |
|
"learning_rate": 3.955307425393224e-06, |
|
"logits/chosen": -2.875734329223633, |
|
"logits/rejected": -2.834575891494751, |
|
"logps/chosen": -360.67852783203125, |
|
"logps/rejected": -366.3440856933594, |
|
"loss": 0.5174, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.29663369059562683, |
|
"rewards/margins": 0.5048123002052307, |
|
"rewards/rejected": -0.8014459609985352, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.376, |
|
"grad_norm": 5.430475234985352, |
|
"learning_rate": 3.92678391921108e-06, |
|
"logits/chosen": -2.725780963897705, |
|
"logits/rejected": -2.7029075622558594, |
|
"logps/chosen": -364.1918640136719, |
|
"logps/rejected": -378.3152160644531, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.42064231634140015, |
|
"rewards/margins": 0.38249263167381287, |
|
"rewards/rejected": -0.8031350374221802, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 6.495006561279297, |
|
"learning_rate": 3.897982258676867e-06, |
|
"logits/chosen": -2.775744915008545, |
|
"logits/rejected": -2.769195318222046, |
|
"logps/chosen": -308.8157043457031, |
|
"logps/rejected": -341.82037353515625, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.36774376034736633, |
|
"rewards/margins": 0.3316097855567932, |
|
"rewards/rejected": -0.6993535161018372, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 7.480860233306885, |
|
"learning_rate": 3.868908058731376e-06, |
|
"logits/chosen": -2.7915594577789307, |
|
"logits/rejected": -2.742253065109253, |
|
"logps/chosen": -352.02996826171875, |
|
"logps/rejected": -327.95928955078125, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5273100733757019, |
|
"rewards/margins": 0.15378466248512268, |
|
"rewards/rejected": -0.681094765663147, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.388, |
|
"grad_norm": 8.06702709197998, |
|
"learning_rate": 3.839566987447492e-06, |
|
"logits/chosen": -2.7685608863830566, |
|
"logits/rejected": -2.7417702674865723, |
|
"logps/chosen": -345.76495361328125, |
|
"logps/rejected": -346.05426025390625, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5824685096740723, |
|
"rewards/margins": 0.387678325176239, |
|
"rewards/rejected": -0.9701469540596008, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.392, |
|
"grad_norm": 5.584991931915283, |
|
"learning_rate": 3.8099647649251984e-06, |
|
"logits/chosen": -2.816774845123291, |
|
"logits/rejected": -2.766758441925049, |
|
"logps/chosen": -344.7107849121094, |
|
"logps/rejected": -350.0550842285156, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6366808414459229, |
|
"rewards/margins": 0.28556036949157715, |
|
"rewards/rejected": -0.9222410917282104, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.396, |
|
"grad_norm": 8.032232284545898, |
|
"learning_rate": 3.780107162176429e-06, |
|
"logits/chosen": -2.7901835441589355, |
|
"logits/rejected": -2.771435260772705, |
|
"logps/chosen": -359.55804443359375, |
|
"logps/rejected": -320.9138488769531, |
|
"loss": 0.5898, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7569794058799744, |
|
"rewards/margins": 0.3364938199520111, |
|
"rewards/rejected": -1.0934733152389526, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 6.636690616607666, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -2.7686400413513184, |
|
"logits/rejected": -2.747738838195801, |
|
"logps/chosen": -380.00469970703125, |
|
"logps/rejected": -391.73797607421875, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6767264008522034, |
|
"rewards/margins": 0.42483949661254883, |
|
"rewards/rejected": -1.1015657186508179, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": -2.7560079097747803, |
|
"eval_logits/rejected": -2.7151803970336914, |
|
"eval_logps/chosen": -349.3812255859375, |
|
"eval_logps/rejected": -355.066162109375, |
|
"eval_loss": 0.5749732851982117, |
|
"eval_rewards/accuracies": 0.7242063283920288, |
|
"eval_rewards/chosen": -0.6614311933517456, |
|
"eval_rewards/margins": 0.40893420577049255, |
|
"eval_rewards/rejected": -1.0703654289245605, |
|
"eval_runtime": 166.5841, |
|
"eval_samples_per_second": 3.001, |
|
"eval_steps_per_second": 0.378, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.404, |
|
"grad_norm": 5.858213901519775, |
|
"learning_rate": 3.7196491478468322e-06, |
|
"logits/chosen": -2.6860787868499756, |
|
"logits/rejected": -2.702346086502075, |
|
"logps/chosen": -351.7030944824219, |
|
"logps/rejected": -389.79852294921875, |
|
"loss": 0.5747, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7111638784408569, |
|
"rewards/margins": 0.4305337369441986, |
|
"rewards/rejected": -1.141697645187378, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.408, |
|
"grad_norm": 6.421969890594482, |
|
"learning_rate": 3.689060522675689e-06, |
|
"logits/chosen": -2.7720227241516113, |
|
"logits/rejected": -2.7550883293151855, |
|
"logps/chosen": -335.5453186035156, |
|
"logps/rejected": -353.2554931640625, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.47883662581443787, |
|
"rewards/margins": 0.3589397370815277, |
|
"rewards/rejected": -0.8377763628959656, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.412, |
|
"grad_norm": 6.96730375289917, |
|
"learning_rate": 3.658240087799655e-06, |
|
"logits/chosen": -2.702028751373291, |
|
"logits/rejected": -2.715606689453125, |
|
"logps/chosen": -309.8144836425781, |
|
"logps/rejected": -362.7301330566406, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4930266737937927, |
|
"rewards/margins": 0.47733306884765625, |
|
"rewards/rejected": -0.9703596830368042, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 4.9304680824279785, |
|
"learning_rate": 3.627193851723577e-06, |
|
"logits/chosen": -2.7465412616729736, |
|
"logits/rejected": -2.7204620838165283, |
|
"logps/chosen": -352.72674560546875, |
|
"logps/rejected": -372.55242919921875, |
|
"loss": 0.6349, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7522457242012024, |
|
"rewards/margins": 0.3193085789680481, |
|
"rewards/rejected": -1.0715543031692505, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 6.871084213256836, |
|
"learning_rate": 3.595927866972694e-06, |
|
"logits/chosen": -2.708409309387207, |
|
"logits/rejected": -2.7058815956115723, |
|
"logps/chosen": -292.47271728515625, |
|
"logps/rejected": -334.9390869140625, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6716808080673218, |
|
"rewards/margins": 0.46342235803604126, |
|
"rewards/rejected": -1.1351032257080078, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.424, |
|
"grad_norm": 6.238135814666748, |
|
"learning_rate": 3.564448228912682e-06, |
|
"logits/chosen": -2.6425788402557373, |
|
"logits/rejected": -2.6337852478027344, |
|
"logps/chosen": -382.01910400390625, |
|
"logps/rejected": -381.7391662597656, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7499979734420776, |
|
"rewards/margins": 0.37163788080215454, |
|
"rewards/rejected": -1.1216356754302979, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.428, |
|
"grad_norm": 14.889227867126465, |
|
"learning_rate": 3.532761074561355e-06, |
|
"logits/chosen": -2.6252596378326416, |
|
"logits/rejected": -2.573930501937866, |
|
"logps/chosen": -389.1572265625, |
|
"logps/rejected": -428.70489501953125, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7006493806838989, |
|
"rewards/margins": 0.508830189704895, |
|
"rewards/rejected": -1.209479570388794, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.432, |
|
"grad_norm": 5.182321548461914, |
|
"learning_rate": 3.5008725813922383e-06, |
|
"logits/chosen": -2.7459521293640137, |
|
"logits/rejected": -2.672208309173584, |
|
"logps/chosen": -356.2322998046875, |
|
"logps/rejected": -392.16021728515625, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6245570182800293, |
|
"rewards/margins": 0.5389561057090759, |
|
"rewards/rejected": -1.1635130643844604, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.436, |
|
"grad_norm": 8.57190990447998, |
|
"learning_rate": 3.4687889661302577e-06, |
|
"logits/chosen": -2.6495566368103027, |
|
"logits/rejected": -2.649183511734009, |
|
"logps/chosen": -315.9193115234375, |
|
"logps/rejected": -356.46417236328125, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6088639497756958, |
|
"rewards/margins": 0.5826314687728882, |
|
"rewards/rejected": -1.1914955377578735, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 6.0855207443237305, |
|
"learning_rate": 3.436516483539781e-06, |
|
"logits/chosen": -2.6810402870178223, |
|
"logits/rejected": -2.6679680347442627, |
|
"logps/chosen": -341.8032531738281, |
|
"logps/rejected": -360.6465759277344, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7102030515670776, |
|
"rewards/margins": 0.3800693154335022, |
|
"rewards/rejected": -1.090272307395935, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.444, |
|
"grad_norm": 6.288063049316406, |
|
"learning_rate": 3.4040614252052305e-06, |
|
"logits/chosen": -2.672840118408203, |
|
"logits/rejected": -2.665916919708252, |
|
"logps/chosen": -357.5086975097656, |
|
"logps/rejected": -378.4448547363281, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7066811919212341, |
|
"rewards/margins": 0.38481634855270386, |
|
"rewards/rejected": -1.091497540473938, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 6.352199554443359, |
|
"learning_rate": 3.3714301183045382e-06, |
|
"logits/chosen": -2.631598711013794, |
|
"logits/rejected": -2.574202299118042, |
|
"logps/chosen": -298.14923095703125, |
|
"logps/rejected": -323.47528076171875, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6164783239364624, |
|
"rewards/margins": 0.34816139936447144, |
|
"rewards/rejected": -0.9646397829055786, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.452, |
|
"grad_norm": 4.746293067932129, |
|
"learning_rate": 3.338628924375638e-06, |
|
"logits/chosen": -2.759838581085205, |
|
"logits/rejected": -2.7202701568603516, |
|
"logps/chosen": -286.07159423828125, |
|
"logps/rejected": -346.5191955566406, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.4790007174015045, |
|
"rewards/margins": 0.5368759036064148, |
|
"rewards/rejected": -1.0158765316009521, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.456, |
|
"grad_norm": 4.951190948486328, |
|
"learning_rate": 3.3056642380762783e-06, |
|
"logits/chosen": -2.7108511924743652, |
|
"logits/rejected": -2.6962389945983887, |
|
"logps/chosen": -269.0116271972656, |
|
"logps/rejected": -301.1402893066406, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.3556315302848816, |
|
"rewards/margins": 0.43046459555625916, |
|
"rewards/rejected": -0.7860961556434631, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 6.543835639953613, |
|
"learning_rate": 3.272542485937369e-06, |
|
"logits/chosen": -2.6798412799835205, |
|
"logits/rejected": -2.6179356575012207, |
|
"logps/chosen": -285.6194152832031, |
|
"logps/rejected": -290.96771240234375, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.43146055936813354, |
|
"rewards/margins": 0.3925173878669739, |
|
"rewards/rejected": -0.8239779472351074, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.464, |
|
"grad_norm": 4.029048442840576, |
|
"learning_rate": 3.2392701251101172e-06, |
|
"logits/chosen": -2.7277991771698, |
|
"logits/rejected": -2.6769065856933594, |
|
"logps/chosen": -338.96826171875, |
|
"logps/rejected": -356.6761169433594, |
|
"loss": 0.517, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.38630399107933044, |
|
"rewards/margins": 0.5923217535018921, |
|
"rewards/rejected": -0.9786256551742554, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.468, |
|
"grad_norm": 6.441251277923584, |
|
"learning_rate": 3.205853642107192e-06, |
|
"logits/chosen": -2.6596102714538574, |
|
"logits/rejected": -2.6377530097961426, |
|
"logps/chosen": -295.38067626953125, |
|
"logps/rejected": -321.3651428222656, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.518380343914032, |
|
"rewards/margins": 0.3751484155654907, |
|
"rewards/rejected": -0.8935287594795227, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.472, |
|
"grad_norm": 8.161737442016602, |
|
"learning_rate": 3.1722995515381644e-06, |
|
"logits/chosen": -2.6348493099212646, |
|
"logits/rejected": -2.622722625732422, |
|
"logps/chosen": -333.249755859375, |
|
"logps/rejected": -362.6484069824219, |
|
"loss": 0.5217, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5237576365470886, |
|
"rewards/margins": 0.5885855555534363, |
|
"rewards/rejected": -1.1123430728912354, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.476, |
|
"grad_norm": 5.944140911102295, |
|
"learning_rate": 3.1386143948394764e-06, |
|
"logits/chosen": -2.648714780807495, |
|
"logits/rejected": -2.6416144371032715, |
|
"logps/chosen": -329.64849853515625, |
|
"logps/rejected": -402.10260009765625, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7717787027359009, |
|
"rewards/margins": 0.5048314332962036, |
|
"rewards/rejected": -1.2766101360321045, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 7.663615703582764, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"logits/chosen": -2.652611494064331, |
|
"logits/rejected": -2.5794761180877686, |
|
"logps/chosen": -387.5853576660156, |
|
"logps/rejected": -337.8640441894531, |
|
"loss": 0.5884, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6710079908370972, |
|
"rewards/margins": 0.47154727578163147, |
|
"rewards/rejected": -1.1425553560256958, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_logits/chosen": -2.6723644733428955, |
|
"eval_logits/rejected": -2.6321890354156494, |
|
"eval_logps/chosen": -352.8876647949219, |
|
"eval_logps/rejected": -375.1052551269531, |
|
"eval_loss": 0.547924816608429, |
|
"eval_rewards/accuracies": 0.7123016119003296, |
|
"eval_rewards/chosen": -0.6964960694313049, |
|
"eval_rewards/margins": 0.5742600560188293, |
|
"eval_rewards/rejected": -1.2707562446594238, |
|
"eval_runtime": 166.6085, |
|
"eval_samples_per_second": 3.001, |
|
"eval_steps_per_second": 0.378, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.484, |
|
"grad_norm": 7.883712291717529, |
|
"learning_rate": 3.0708771752766397e-06, |
|
"logits/chosen": -2.699237585067749, |
|
"logits/rejected": -2.656472682952881, |
|
"logps/chosen": -396.58935546875, |
|
"logps/rejected": -403.0001525878906, |
|
"loss": 0.5645, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6690188050270081, |
|
"rewards/margins": 0.4926017224788666, |
|
"rewards/rejected": -1.1616204977035522, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.488, |
|
"grad_norm": 7.566442489624023, |
|
"learning_rate": 3.0368383179176584e-06, |
|
"logits/chosen": -2.6338205337524414, |
|
"logits/rejected": -2.5693295001983643, |
|
"logps/chosen": -367.86016845703125, |
|
"logps/rejected": -435.5393981933594, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7284402847290039, |
|
"rewards/margins": 0.6670497059822083, |
|
"rewards/rejected": -1.395490050315857, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.492, |
|
"grad_norm": 8.526657104492188, |
|
"learning_rate": 3.002694802864912e-06, |
|
"logits/chosen": -2.618443727493286, |
|
"logits/rejected": -2.598315715789795, |
|
"logps/chosen": -354.45587158203125, |
|
"logps/rejected": -394.5972900390625, |
|
"loss": 0.5754, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6152892112731934, |
|
"rewards/margins": 0.5398699045181274, |
|
"rewards/rejected": -1.1551592350006104, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.496, |
|
"grad_norm": 6.0026044845581055, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"logits/chosen": -2.611154317855835, |
|
"logits/rejected": -2.594029188156128, |
|
"logps/chosen": -322.4052734375, |
|
"logps/rejected": -360.61016845703125, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.49695730209350586, |
|
"rewards/margins": 0.6773632764816284, |
|
"rewards/rejected": -1.1743205785751343, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 19.88625144958496, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits/chosen": -2.6633830070495605, |
|
"logits/rejected": -2.670114040374756, |
|
"logps/chosen": -366.9710388183594, |
|
"logps/rejected": -338.7015075683594, |
|
"loss": 0.667, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5960913300514221, |
|
"rewards/margins": 0.2840344309806824, |
|
"rewards/rejected": -0.8801258206367493, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.504, |
|
"grad_norm": 14.67087173461914, |
|
"learning_rate": 2.8997029692295875e-06, |
|
"logits/chosen": -2.6533138751983643, |
|
"logits/rejected": -2.623382329940796, |
|
"logps/chosen": -282.66400146484375, |
|
"logps/rejected": -314.5120849609375, |
|
"loss": 0.6115, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.46091246604919434, |
|
"rewards/margins": 0.3925257921218872, |
|
"rewards/rejected": -0.8534382581710815, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.508, |
|
"grad_norm": 5.918943405151367, |
|
"learning_rate": 2.8652075714060296e-06, |
|
"logits/chosen": -2.6715197563171387, |
|
"logits/rejected": -2.688370704650879, |
|
"logps/chosen": -284.8374938964844, |
|
"logps/rejected": -335.3628845214844, |
|
"loss": 0.5706, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3965570032596588, |
|
"rewards/margins": 0.44703879952430725, |
|
"rewards/rejected": -0.8435958027839661, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 8.843329429626465, |
|
"learning_rate": 2.8306409756428067e-06, |
|
"logits/chosen": -2.623734951019287, |
|
"logits/rejected": -2.5950891971588135, |
|
"logps/chosen": -270.8127746582031, |
|
"logps/rejected": -276.7856750488281, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.30773457884788513, |
|
"rewards/margins": 0.39293187856674194, |
|
"rewards/rejected": -0.7006665468215942, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.516, |
|
"grad_norm": 7.708199501037598, |
|
"learning_rate": 2.7960099207662535e-06, |
|
"logits/chosen": -2.634336471557617, |
|
"logits/rejected": -2.602273941040039, |
|
"logps/chosen": -293.1026611328125, |
|
"logps/rejected": -321.48004150390625, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4055629372596741, |
|
"rewards/margins": 0.4888216555118561, |
|
"rewards/rejected": -0.8943845629692078, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 5.657192230224609, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": -2.6778979301452637, |
|
"logits/rejected": -2.681678533554077, |
|
"logps/chosen": -330.0281066894531, |
|
"logps/rejected": -331.8005065917969, |
|
"loss": 0.5934, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.48604241013526917, |
|
"rewards/margins": 0.386027991771698, |
|
"rewards/rejected": -0.8720704317092896, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.524, |
|
"grad_norm": 6.853002071380615, |
|
"learning_rate": 2.726581450494451e-06, |
|
"logits/chosen": -2.6293766498565674, |
|
"logits/rejected": -2.621476173400879, |
|
"logps/chosen": -326.91363525390625, |
|
"logps/rejected": -339.2771911621094, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.31295642256736755, |
|
"rewards/margins": 0.5676072835922241, |
|
"rewards/rejected": -0.8805637359619141, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.528, |
|
"grad_norm": 5.255690574645996, |
|
"learning_rate": 2.6917975703170466e-06, |
|
"logits/chosen": -2.6737003326416016, |
|
"logits/rejected": -2.660823106765747, |
|
"logps/chosen": -332.20001220703125, |
|
"logps/rejected": -395.01177978515625, |
|
"loss": 0.4708, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.47576016187667847, |
|
"rewards/margins": 0.7829462885856628, |
|
"rewards/rejected": -1.2587064504623413, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.532, |
|
"grad_norm": 6.153021335601807, |
|
"learning_rate": 2.6569762988232838e-06, |
|
"logits/chosen": -2.596040725708008, |
|
"logits/rejected": -2.6018855571746826, |
|
"logps/chosen": -314.1906433105469, |
|
"logps/rejected": -368.1581115722656, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6212738752365112, |
|
"rewards/margins": 0.5093709826469421, |
|
"rewards/rejected": -1.1306449174880981, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.536, |
|
"grad_norm": 12.637703895568848, |
|
"learning_rate": 2.6221244244890336e-06, |
|
"logits/chosen": -2.648380756378174, |
|
"logits/rejected": -2.5612568855285645, |
|
"logps/chosen": -381.6895751953125, |
|
"logps/rejected": -417.946044921875, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9110687375068665, |
|
"rewards/margins": 0.6113173961639404, |
|
"rewards/rejected": -1.5223863124847412, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 15.107666015625, |
|
"learning_rate": 2.587248741756253e-06, |
|
"logits/chosen": -2.6709227561950684, |
|
"logits/rejected": -2.6540889739990234, |
|
"logps/chosen": -357.17413330078125, |
|
"logps/rejected": -416.6585998535156, |
|
"loss": 0.5336, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7724355459213257, |
|
"rewards/margins": 0.6448327898979187, |
|
"rewards/rejected": -1.41726815700531, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 5.885760307312012, |
|
"learning_rate": 2.5523560497083927e-06, |
|
"logits/chosen": -2.681267023086548, |
|
"logits/rejected": -2.6446001529693604, |
|
"logps/chosen": -372.6912536621094, |
|
"logps/rejected": -416.1136779785156, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9625101089477539, |
|
"rewards/margins": 0.5871840715408325, |
|
"rewards/rejected": -1.549694299697876, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.548, |
|
"grad_norm": 9.579086303710938, |
|
"learning_rate": 2.517453150744904e-06, |
|
"logits/chosen": -2.6798925399780273, |
|
"logits/rejected": -2.6284408569335938, |
|
"logps/chosen": -407.7876892089844, |
|
"logps/rejected": -411.8699645996094, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9496753811836243, |
|
"rewards/margins": 0.48396366834640503, |
|
"rewards/rejected": -1.4336390495300293, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.552, |
|
"grad_norm": 6.947661399841309, |
|
"learning_rate": 2.482546849255096e-06, |
|
"logits/chosen": -2.6654515266418457, |
|
"logits/rejected": -2.622973918914795, |
|
"logps/chosen": -384.18719482421875, |
|
"logps/rejected": -447.71356201171875, |
|
"loss": 0.5146, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9958160519599915, |
|
"rewards/margins": 0.7850648760795593, |
|
"rewards/rejected": -1.7808809280395508, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.556, |
|
"grad_norm": 5.574069023132324, |
|
"learning_rate": 2.447643950291608e-06, |
|
"logits/chosen": -2.5033955574035645, |
|
"logits/rejected": -2.449953556060791, |
|
"logps/chosen": -345.85723876953125, |
|
"logps/rejected": -342.82061767578125, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8573344349861145, |
|
"rewards/margins": 0.5808383226394653, |
|
"rewards/rejected": -1.438172698020935, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 5.758213520050049, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"logits/chosen": -2.626072406768799, |
|
"logits/rejected": -2.6115658283233643, |
|
"logps/chosen": -373.35693359375, |
|
"logps/rejected": -407.4380187988281, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8556938171386719, |
|
"rewards/margins": 0.6256057620048523, |
|
"rewards/rejected": -1.4812995195388794, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_logits/chosen": -2.6541435718536377, |
|
"eval_logits/rejected": -2.6143555641174316, |
|
"eval_logps/chosen": -355.7809143066406, |
|
"eval_logps/rejected": -381.54388427734375, |
|
"eval_loss": 0.5462217330932617, |
|
"eval_rewards/accuracies": 0.7123016119003296, |
|
"eval_rewards/chosen": -0.7254281044006348, |
|
"eval_rewards/margins": 0.6097148060798645, |
|
"eval_rewards/rejected": -1.335142970085144, |
|
"eval_runtime": 166.5614, |
|
"eval_samples_per_second": 3.002, |
|
"eval_steps_per_second": 0.378, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.564, |
|
"grad_norm": 13.666606903076172, |
|
"learning_rate": 2.377875575510967e-06, |
|
"logits/chosen": -2.5781383514404297, |
|
"logits/rejected": -2.516011953353882, |
|
"logps/chosen": -360.7132873535156, |
|
"logps/rejected": -367.84564208984375, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8543895483016968, |
|
"rewards/margins": 0.5572708249092102, |
|
"rewards/rejected": -1.4116604328155518, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.568, |
|
"grad_norm": 15.681051254272461, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits/chosen": -2.6835620403289795, |
|
"logits/rejected": -2.6478209495544434, |
|
"logps/chosen": -351.29498291015625, |
|
"logps/rejected": -375.1694030761719, |
|
"loss": 0.579, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7802821397781372, |
|
"rewards/margins": 0.47849854826927185, |
|
"rewards/rejected": -1.2587807178497314, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.572, |
|
"grad_norm": 8.994268417358398, |
|
"learning_rate": 2.3082024296829538e-06, |
|
"logits/chosen": -2.6145269870758057, |
|
"logits/rejected": -2.5766470432281494, |
|
"logps/chosen": -295.27313232421875, |
|
"logps/rejected": -390.36279296875, |
|
"loss": 0.4613, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6295270919799805, |
|
"rewards/margins": 0.8818346858024597, |
|
"rewards/rejected": -1.511361837387085, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 7.825490474700928, |
|
"learning_rate": 2.2734185495055503e-06, |
|
"logits/chosen": -2.672497272491455, |
|
"logits/rejected": -2.5945022106170654, |
|
"logps/chosen": -352.99005126953125, |
|
"logps/rejected": -351.3237609863281, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6252859830856323, |
|
"rewards/margins": 0.4798332750797272, |
|
"rewards/rejected": -1.1051193475723267, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 7.579111099243164, |
|
"learning_rate": 2.238678841830867e-06, |
|
"logits/chosen": -2.6132864952087402, |
|
"logits/rejected": -2.5825414657592773, |
|
"logps/chosen": -348.1910705566406, |
|
"logps/rejected": -377.7493896484375, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5163500905036926, |
|
"rewards/margins": 0.5300666689872742, |
|
"rewards/rejected": -1.0464167594909668, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.584, |
|
"grad_norm": 8.520023345947266, |
|
"learning_rate": 2.2039900792337477e-06, |
|
"logits/chosen": -2.6524438858032227, |
|
"logits/rejected": -2.6290249824523926, |
|
"logps/chosen": -356.4406433105469, |
|
"logps/rejected": -374.92156982421875, |
|
"loss": 0.5875, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6103891730308533, |
|
"rewards/margins": 0.5113776922225952, |
|
"rewards/rejected": -1.1217668056488037, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.588, |
|
"grad_norm": 4.65096378326416, |
|
"learning_rate": 2.1693590243571937e-06, |
|
"logits/chosen": -2.6716837882995605, |
|
"logits/rejected": -2.613300323486328, |
|
"logps/chosen": -326.7117614746094, |
|
"logps/rejected": -364.78997802734375, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6327379941940308, |
|
"rewards/margins": 0.6464030742645264, |
|
"rewards/rejected": -1.2791410684585571, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.592, |
|
"grad_norm": 8.322911262512207, |
|
"learning_rate": 2.134792428593971e-06, |
|
"logits/chosen": -2.5958149433135986, |
|
"logits/rejected": -2.5660290718078613, |
|
"logps/chosen": -307.79656982421875, |
|
"logps/rejected": -354.429443359375, |
|
"loss": 0.5574, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7369760870933533, |
|
"rewards/margins": 0.45006656646728516, |
|
"rewards/rejected": -1.1870427131652832, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.596, |
|
"grad_norm": 7.646263599395752, |
|
"learning_rate": 2.1002970307704134e-06, |
|
"logits/chosen": -2.7141809463500977, |
|
"logits/rejected": -2.65040922164917, |
|
"logps/chosen": -403.737548828125, |
|
"logps/rejected": -440.7948303222656, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.694787859916687, |
|
"rewards/margins": 0.6611425280570984, |
|
"rewards/rejected": -1.3559304475784302, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 6.563836097717285, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -2.6275224685668945, |
|
"logits/rejected": -2.6394450664520264, |
|
"logps/chosen": -353.59765625, |
|
"logps/rejected": -409.802001953125, |
|
"loss": 0.517, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.674094021320343, |
|
"rewards/margins": 0.7538131475448608, |
|
"rewards/rejected": -1.427907109260559, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.604, |
|
"grad_norm": 5.990789890289307, |
|
"learning_rate": 2.031546713535688e-06, |
|
"logits/chosen": -2.631734848022461, |
|
"logits/rejected": -2.5736083984375, |
|
"logps/chosen": -346.9231872558594, |
|
"logps/rejected": -393.51727294921875, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6008895039558411, |
|
"rewards/margins": 0.6397835612297058, |
|
"rewards/rejected": -1.240673303604126, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.608, |
|
"grad_norm": 10.244848251342773, |
|
"learning_rate": 1.997305197135089e-06, |
|
"logits/chosen": -2.5579869747161865, |
|
"logits/rejected": -2.5684762001037598, |
|
"logps/chosen": -278.13470458984375, |
|
"logps/rejected": -336.5936584472656, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6302391886711121, |
|
"rewards/margins": 0.5114450454711914, |
|
"rewards/rejected": -1.1416842937469482, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.612, |
|
"grad_norm": 7.769974708557129, |
|
"learning_rate": 1.963161682082342e-06, |
|
"logits/chosen": -2.536005735397339, |
|
"logits/rejected": -2.584683656692505, |
|
"logps/chosen": -334.3309020996094, |
|
"logps/rejected": -362.7339782714844, |
|
"loss": 0.548, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5386423468589783, |
|
"rewards/margins": 0.5726658701896667, |
|
"rewards/rejected": -1.111308217048645, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.616, |
|
"grad_norm": 4.755601406097412, |
|
"learning_rate": 1.9291228247233607e-06, |
|
"logits/chosen": -2.5625104904174805, |
|
"logits/rejected": -2.5171782970428467, |
|
"logps/chosen": -320.0146484375, |
|
"logps/rejected": -359.59197998046875, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3926158845424652, |
|
"rewards/margins": 0.5113145112991333, |
|
"rewards/rejected": -0.9039304852485657, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 8.86306095123291, |
|
"learning_rate": 1.895195261000831e-06, |
|
"logits/chosen": -2.6329500675201416, |
|
"logits/rejected": -2.58791446685791, |
|
"logps/chosen": -342.7969665527344, |
|
"logps/rejected": -399.5611267089844, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.41523486375808716, |
|
"rewards/margins": 0.5993978977203369, |
|
"rewards/rejected": -1.0146328210830688, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.624, |
|
"grad_norm": 7.572498321533203, |
|
"learning_rate": 1.8613856051605242e-06, |
|
"logits/chosen": -2.4784908294677734, |
|
"logits/rejected": -2.511237621307373, |
|
"logps/chosen": -283.90972900390625, |
|
"logps/rejected": -327.43121337890625, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.45636481046676636, |
|
"rewards/margins": 0.4989432394504547, |
|
"rewards/rejected": -0.9553079605102539, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.628, |
|
"grad_norm": 4.849424362182617, |
|
"learning_rate": 1.827700448461836e-06, |
|
"logits/chosen": -2.6728811264038086, |
|
"logits/rejected": -2.5995230674743652, |
|
"logps/chosen": -365.33587646484375, |
|
"logps/rejected": -384.79388427734375, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6461849212646484, |
|
"rewards/margins": 0.504442572593689, |
|
"rewards/rejected": -1.150627613067627, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.632, |
|
"grad_norm": 4.748244285583496, |
|
"learning_rate": 1.7941463578928088e-06, |
|
"logits/chosen": -2.594512939453125, |
|
"logits/rejected": -2.5624542236328125, |
|
"logps/chosen": -407.58221435546875, |
|
"logps/rejected": -416.52752685546875, |
|
"loss": 0.5731, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.626873791217804, |
|
"rewards/margins": 0.539578914642334, |
|
"rewards/rejected": -1.1664526462554932, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.636, |
|
"grad_norm": 6.187716007232666, |
|
"learning_rate": 1.7607298748898844e-06, |
|
"logits/chosen": -2.6329944133758545, |
|
"logits/rejected": -2.623441219329834, |
|
"logps/chosen": -341.26910400390625, |
|
"logps/rejected": -384.6199645996094, |
|
"loss": 0.5801, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.59977126121521, |
|
"rewards/margins": 0.5203127264976501, |
|
"rewards/rejected": -1.1200840473175049, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 7.730658054351807, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -2.5077157020568848, |
|
"logits/rejected": -2.4561638832092285, |
|
"logps/chosen": -329.1211853027344, |
|
"logps/rejected": -394.10760498046875, |
|
"loss": 0.542, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.562198281288147, |
|
"rewards/margins": 0.5235614776611328, |
|
"rewards/rejected": -1.0857596397399902, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -2.616276264190674, |
|
"eval_logits/rejected": -2.5757086277008057, |
|
"eval_logps/chosen": -352.43634033203125, |
|
"eval_logps/rejected": -374.8915100097656, |
|
"eval_loss": 0.5450887680053711, |
|
"eval_rewards/accuracies": 0.726190447807312, |
|
"eval_rewards/chosen": -0.6919824481010437, |
|
"eval_rewards/margins": 0.5766366124153137, |
|
"eval_rewards/rejected": -1.2686189413070679, |
|
"eval_runtime": 166.5658, |
|
"eval_samples_per_second": 3.002, |
|
"eval_steps_per_second": 0.378, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.644, |
|
"grad_norm": 5.391225337982178, |
|
"learning_rate": 1.6943357619237227e-06, |
|
"logits/chosen": -2.571699619293213, |
|
"logits/rejected": -2.5610146522521973, |
|
"logps/chosen": -340.8096618652344, |
|
"logps/rejected": -366.74652099609375, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6746693849563599, |
|
"rewards/margins": 0.645226240158081, |
|
"rewards/rejected": -1.319895625114441, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.648, |
|
"grad_norm": 7.21277379989624, |
|
"learning_rate": 1.661371075624363e-06, |
|
"logits/chosen": -2.5949785709381104, |
|
"logits/rejected": -2.6454100608825684, |
|
"logps/chosen": -344.26763916015625, |
|
"logps/rejected": -466.04693603515625, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9223111867904663, |
|
"rewards/margins": 0.545967161655426, |
|
"rewards/rejected": -1.468278169631958, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.652, |
|
"grad_norm": 10.09349536895752, |
|
"learning_rate": 1.6285698816954626e-06, |
|
"logits/chosen": -2.614025115966797, |
|
"logits/rejected": -2.5793397426605225, |
|
"logps/chosen": -371.01141357421875, |
|
"logps/rejected": -389.6199035644531, |
|
"loss": 0.5079, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7361218333244324, |
|
"rewards/margins": 0.6544822454452515, |
|
"rewards/rejected": -1.3906042575836182, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.656, |
|
"grad_norm": 6.835726737976074, |
|
"learning_rate": 1.5959385747947697e-06, |
|
"logits/chosen": -2.546776294708252, |
|
"logits/rejected": -2.489529848098755, |
|
"logps/chosen": -324.41766357421875, |
|
"logps/rejected": -338.0316162109375, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.912179172039032, |
|
"rewards/margins": 0.5451852083206177, |
|
"rewards/rejected": -1.4573644399642944, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 9.413851737976074, |
|
"learning_rate": 1.56348351646022e-06, |
|
"logits/chosen": -2.4420504570007324, |
|
"logits/rejected": -2.3995440006256104, |
|
"logps/chosen": -330.77532958984375, |
|
"logps/rejected": -375.870849609375, |
|
"loss": 0.5706, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9180534482002258, |
|
"rewards/margins": 0.5257332921028137, |
|
"rewards/rejected": -1.443786859512329, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.664, |
|
"grad_norm": 7.294904708862305, |
|
"learning_rate": 1.5312110338697427e-06, |
|
"logits/chosen": -2.534151792526245, |
|
"logits/rejected": -2.457547426223755, |
|
"logps/chosen": -343.48895263671875, |
|
"logps/rejected": -404.5125427246094, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9970987439155579, |
|
"rewards/margins": 0.7042299509048462, |
|
"rewards/rejected": -1.7013286352157593, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.668, |
|
"grad_norm": 7.913896083831787, |
|
"learning_rate": 1.4991274186077632e-06, |
|
"logits/chosen": -2.5275869369506836, |
|
"logits/rejected": -2.5172677040100098, |
|
"logps/chosen": -372.0498046875, |
|
"logps/rejected": -428.5511779785156, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8607346415519714, |
|
"rewards/margins": 0.6735731959342957, |
|
"rewards/rejected": -1.5343079566955566, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 9.02523136138916, |
|
"learning_rate": 1.467238925438646e-06, |
|
"logits/chosen": -2.542417287826538, |
|
"logits/rejected": -2.4911322593688965, |
|
"logps/chosen": -409.1072082519531, |
|
"logps/rejected": -441.38818359375, |
|
"loss": 0.6043, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8746612668037415, |
|
"rewards/margins": 0.6565597653388977, |
|
"rewards/rejected": -1.5312209129333496, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.676, |
|
"grad_norm": 7.484790325164795, |
|
"learning_rate": 1.4355517710873184e-06, |
|
"logits/chosen": -2.505631446838379, |
|
"logits/rejected": -2.4716596603393555, |
|
"logps/chosen": -369.3199157714844, |
|
"logps/rejected": -385.47271728515625, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8367518186569214, |
|
"rewards/margins": 0.6746172904968262, |
|
"rewards/rejected": -1.5113691091537476, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 14.492733001708984, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"logits/chosen": -2.4774107933044434, |
|
"logits/rejected": -2.4899094104766846, |
|
"logps/chosen": -358.19085693359375, |
|
"logps/rejected": -413.7503967285156, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9458563923835754, |
|
"rewards/margins": 0.5173967480659485, |
|
"rewards/rejected": -1.4632532596588135, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.684, |
|
"grad_norm": 9.713050842285156, |
|
"learning_rate": 1.3728061482764238e-06, |
|
"logits/chosen": -2.6106739044189453, |
|
"logits/rejected": -2.6073949337005615, |
|
"logps/chosen": -393.0316162109375, |
|
"logps/rejected": -462.81964111328125, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7931762933731079, |
|
"rewards/margins": 0.5521876215934753, |
|
"rewards/rejected": -1.3453638553619385, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.688, |
|
"grad_norm": 8.317811012268066, |
|
"learning_rate": 1.3417599122003464e-06, |
|
"logits/chosen": -2.5949435234069824, |
|
"logits/rejected": -2.5823256969451904, |
|
"logps/chosen": -336.19964599609375, |
|
"logps/rejected": -373.7811279296875, |
|
"loss": 0.6114, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7611545324325562, |
|
"rewards/margins": 0.44461917877197266, |
|
"rewards/rejected": -1.2057737112045288, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.692, |
|
"grad_norm": 7.705052852630615, |
|
"learning_rate": 1.3109394773243117e-06, |
|
"logits/chosen": -2.5213277339935303, |
|
"logits/rejected": -2.5227763652801514, |
|
"logps/chosen": -378.220458984375, |
|
"logps/rejected": -433.517822265625, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.8036476373672485, |
|
"rewards/margins": 0.7761087417602539, |
|
"rewards/rejected": -1.5797563791275024, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.696, |
|
"grad_norm": 13.364889144897461, |
|
"learning_rate": 1.280350852153168e-06, |
|
"logits/chosen": -2.5971102714538574, |
|
"logits/rejected": -2.5233542919158936, |
|
"logps/chosen": -357.8860778808594, |
|
"logps/rejected": -374.4933776855469, |
|
"loss": 0.5301, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7840938568115234, |
|
"rewards/margins": 0.6052092909812927, |
|
"rewards/rejected": -1.3893029689788818, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 9.326637268066406, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"logits/chosen": -2.5254058837890625, |
|
"logits/rejected": -2.5030879974365234, |
|
"logps/chosen": -361.384521484375, |
|
"logps/rejected": -415.76226806640625, |
|
"loss": 0.5189, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.733026385307312, |
|
"rewards/margins": 0.6997817754745483, |
|
"rewards/rejected": -1.4328081607818604, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 10.961359024047852, |
|
"learning_rate": 1.2198928378235717e-06, |
|
"logits/chosen": -2.572036027908325, |
|
"logits/rejected": -2.561758518218994, |
|
"logps/chosen": -297.1030578613281, |
|
"logps/rejected": -387.46685791015625, |
|
"loss": 0.5146, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5809527635574341, |
|
"rewards/margins": 0.6962177753448486, |
|
"rewards/rejected": -1.2771704196929932, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.708, |
|
"grad_norm": 6.878293514251709, |
|
"learning_rate": 1.1900352350748026e-06, |
|
"logits/chosen": -2.54471492767334, |
|
"logits/rejected": -2.507836103439331, |
|
"logps/chosen": -375.3175354003906, |
|
"logps/rejected": -410.13555908203125, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7230747938156128, |
|
"rewards/margins": 0.817081093788147, |
|
"rewards/rejected": -1.5401558876037598, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.712, |
|
"grad_norm": 10.813645362854004, |
|
"learning_rate": 1.160433012552508e-06, |
|
"logits/chosen": -2.4845707416534424, |
|
"logits/rejected": -2.4921040534973145, |
|
"logps/chosen": -331.4786682128906, |
|
"logps/rejected": -384.5702819824219, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7888541221618652, |
|
"rewards/margins": 0.5834736824035645, |
|
"rewards/rejected": -1.3723278045654297, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.716, |
|
"grad_norm": 8.971263885498047, |
|
"learning_rate": 1.1310919412686248e-06, |
|
"logits/chosen": -2.5675597190856934, |
|
"logits/rejected": -2.566880464553833, |
|
"logps/chosen": -377.36639404296875, |
|
"logps/rejected": -408.0566711425781, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8293668627738953, |
|
"rewards/margins": 0.5729758739471436, |
|
"rewards/rejected": -1.402342677116394, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 6.206206321716309, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"logits/chosen": -2.5698678493499756, |
|
"logits/rejected": -2.544900417327881, |
|
"logps/chosen": -358.0696716308594, |
|
"logps/rejected": -385.30047607421875, |
|
"loss": 0.5282, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.8030437231063843, |
|
"rewards/margins": 0.6273149251937866, |
|
"rewards/rejected": -1.430358648300171, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_logits/chosen": -2.5715651512145996, |
|
"eval_logits/rejected": -2.526627779006958, |
|
"eval_logps/chosen": -362.9278869628906, |
|
"eval_logps/rejected": -390.782470703125, |
|
"eval_loss": 0.541217029094696, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": -0.7968972325325012, |
|
"eval_rewards/margins": 0.6306313872337341, |
|
"eval_rewards/rejected": -1.427528738975525, |
|
"eval_runtime": 166.5843, |
|
"eval_samples_per_second": 3.001, |
|
"eval_steps_per_second": 0.378, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.724, |
|
"grad_norm": 6.695834636688232, |
|
"learning_rate": 1.073216080788921e-06, |
|
"logits/chosen": -2.584725856781006, |
|
"logits/rejected": -2.5650360584259033, |
|
"logps/chosen": -371.1759033203125, |
|
"logps/rejected": -381.4906311035156, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8290435671806335, |
|
"rewards/margins": 0.34561312198638916, |
|
"rewards/rejected": -1.1746567487716675, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.728, |
|
"grad_norm": 9.597143173217773, |
|
"learning_rate": 1.0446925746067768e-06, |
|
"logits/chosen": -2.5334725379943848, |
|
"logits/rejected": -2.476060152053833, |
|
"logps/chosen": -324.6567687988281, |
|
"logps/rejected": -336.16448974609375, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7902948260307312, |
|
"rewards/margins": 0.6815158724784851, |
|
"rewards/rejected": -1.4718106985092163, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.732, |
|
"grad_norm": 8.9435396194458, |
|
"learning_rate": 1.0164527834907468e-06, |
|
"logits/chosen": -2.4533045291900635, |
|
"logits/rejected": -2.449897050857544, |
|
"logps/chosen": -347.75250244140625, |
|
"logps/rejected": -426.3138122558594, |
|
"loss": 0.4745, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8129841685295105, |
|
"rewards/margins": 0.7864343523979187, |
|
"rewards/rejected": -1.5994184017181396, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.736, |
|
"grad_norm": 9.8597993850708, |
|
"learning_rate": 9.88502212844063e-07, |
|
"logits/chosen": -2.54966139793396, |
|
"logits/rejected": -2.551966428756714, |
|
"logps/chosen": -350.10125732421875, |
|
"logps/rejected": -414.47265625, |
|
"loss": 0.6307, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8708189129829407, |
|
"rewards/margins": 0.4037759304046631, |
|
"rewards/rejected": -1.274594783782959, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 11.812902450561523, |
|
"learning_rate": 9.608463116858544e-07, |
|
"logits/chosen": -2.5522665977478027, |
|
"logits/rejected": -2.516507625579834, |
|
"logps/chosen": -359.7716064453125, |
|
"logps/rejected": -395.00726318359375, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8488423228263855, |
|
"rewards/margins": 0.5956013798713684, |
|
"rewards/rejected": -1.444443702697754, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.744, |
|
"grad_norm": 23.08143424987793, |
|
"learning_rate": 9.334904715888496e-07, |
|
"logits/chosen": -2.4799532890319824, |
|
"logits/rejected": -2.480583667755127, |
|
"logps/chosen": -345.7176208496094, |
|
"logps/rejected": -404.23931884765625, |
|
"loss": 0.5426, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.837591826915741, |
|
"rewards/margins": 0.6740916967391968, |
|
"rewards/rejected": -1.5116835832595825, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.748, |
|
"grad_norm": 9.993616104125977, |
|
"learning_rate": 9.064400256282757e-07, |
|
"logits/chosen": -2.557299852371216, |
|
"logits/rejected": -2.5293102264404297, |
|
"logps/chosen": -361.8759765625, |
|
"logps/rejected": -390.130859375, |
|
"loss": 0.549, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7642291784286499, |
|
"rewards/margins": 0.6016563773155212, |
|
"rewards/rejected": -1.3658854961395264, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.752, |
|
"grad_norm": 8.252609252929688, |
|
"learning_rate": 8.797002473421729e-07, |
|
"logits/chosen": -2.5289082527160645, |
|
"logits/rejected": -2.5367226600646973, |
|
"logps/chosen": -387.4449462890625, |
|
"logps/rejected": -411.28057861328125, |
|
"loss": 0.5075, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6062986254692078, |
|
"rewards/margins": 0.6882797479629517, |
|
"rewards/rejected": -1.2945783138275146, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.756, |
|
"grad_norm": 15.335317611694336, |
|
"learning_rate": 8.532763497032987e-07, |
|
"logits/chosen": -2.4454190731048584, |
|
"logits/rejected": -2.431666851043701, |
|
"logps/chosen": -370.828857421875, |
|
"logps/rejected": -450.534423828125, |
|
"loss": 0.5028, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7790817022323608, |
|
"rewards/margins": 0.7363722324371338, |
|
"rewards/rejected": -1.5154539346694946, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 8.324295997619629, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -2.608813762664795, |
|
"logits/rejected": -2.6157126426696777, |
|
"logps/chosen": -339.1045227050781, |
|
"logps/rejected": -371.37139892578125, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7105227708816528, |
|
"rewards/margins": 0.6277570128440857, |
|
"rewards/rejected": -1.3382797241210938, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.764, |
|
"grad_norm": 9.136221885681152, |
|
"learning_rate": 8.013967393462094e-07, |
|
"logits/chosen": -2.4687538146972656, |
|
"logits/rejected": -2.490540027618408, |
|
"logps/chosen": -356.60186767578125, |
|
"logps/rejected": -391.22430419921875, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8641288876533508, |
|
"rewards/margins": 0.5564495921134949, |
|
"rewards/rejected": -1.4205783605575562, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 6.628035068511963, |
|
"learning_rate": 7.759511406608255e-07, |
|
"logits/chosen": -2.5683350563049316, |
|
"logits/rejected": -2.500274181365967, |
|
"logps/chosen": -406.158447265625, |
|
"logps/rejected": -411.00360107421875, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9044888615608215, |
|
"rewards/margins": 0.8631958961486816, |
|
"rewards/rejected": -1.7676846981048584, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.772, |
|
"grad_norm": 8.616596221923828, |
|
"learning_rate": 7.508416487165862e-07, |
|
"logits/chosen": -2.4850573539733887, |
|
"logits/rejected": -2.4977798461914062, |
|
"logps/chosen": -373.7191467285156, |
|
"logps/rejected": -406.65887451171875, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8553136587142944, |
|
"rewards/margins": 0.5251432657241821, |
|
"rewards/rejected": -1.3804569244384766, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.776, |
|
"grad_norm": 13.198709487915039, |
|
"learning_rate": 7.260731586586983e-07, |
|
"logits/chosen": -2.459974527359009, |
|
"logits/rejected": -2.463366746902466, |
|
"logps/chosen": -344.30938720703125, |
|
"logps/rejected": -415.8106994628906, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9792687296867371, |
|
"rewards/margins": 0.5259648561477661, |
|
"rewards/rejected": -1.505233645439148, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 8.697169303894043, |
|
"learning_rate": 7.016504991533727e-07, |
|
"logits/chosen": -2.5883092880249023, |
|
"logits/rejected": -2.5588154792785645, |
|
"logps/chosen": -385.4810791015625, |
|
"logps/rejected": -427.9122619628906, |
|
"loss": 0.4801, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.6248154044151306, |
|
"rewards/margins": 0.7402738928794861, |
|
"rewards/rejected": -1.3650894165039062, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.784, |
|
"grad_norm": 5.463360786437988, |
|
"learning_rate": 6.775784314464717e-07, |
|
"logits/chosen": -2.490438461303711, |
|
"logits/rejected": -2.511699914932251, |
|
"logps/chosen": -339.4949645996094, |
|
"logps/rejected": -422.260498046875, |
|
"loss": 0.4897, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.7619988918304443, |
|
"rewards/margins": 0.7652215957641602, |
|
"rewards/rejected": -1.5272204875946045, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.788, |
|
"grad_norm": 9.091290473937988, |
|
"learning_rate": 6.538616484352902e-07, |
|
"logits/chosen": -2.534930944442749, |
|
"logits/rejected": -2.5236055850982666, |
|
"logps/chosen": -342.08734130859375, |
|
"logps/rejected": -378.9286804199219, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.758894145488739, |
|
"rewards/margins": 0.7211824655532837, |
|
"rewards/rejected": -1.480076551437378, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.792, |
|
"grad_norm": 8.22352123260498, |
|
"learning_rate": 6.305047737536707e-07, |
|
"logits/chosen": -2.502777576446533, |
|
"logits/rejected": -2.4551658630371094, |
|
"logps/chosen": -347.10638427734375, |
|
"logps/rejected": -367.161376953125, |
|
"loss": 0.555, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.8488829731941223, |
|
"rewards/margins": 0.6530700325965881, |
|
"rewards/rejected": -1.501952886581421, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.796, |
|
"grad_norm": 19.538936614990234, |
|
"learning_rate": 6.075123608706093e-07, |
|
"logits/chosen": -2.542541980743408, |
|
"logits/rejected": -2.5630409717559814, |
|
"logps/chosen": -365.1338806152344, |
|
"logps/rejected": -388.2347106933594, |
|
"loss": 0.5486, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7868693470954895, |
|
"rewards/margins": 0.5855667591094971, |
|
"rewards/rejected": -1.3724360466003418, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 7.248394012451172, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -2.4549734592437744, |
|
"logits/rejected": -2.4434893131256104, |
|
"logps/chosen": -330.5204162597656, |
|
"logps/rejected": -420.1793518066406, |
|
"loss": 0.5873, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9319890141487122, |
|
"rewards/margins": 0.5578263401985168, |
|
"rewards/rejected": -1.4898154735565186, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": -2.5692970752716064, |
|
"eval_logits/rejected": -2.5253682136535645, |
|
"eval_logps/chosen": -365.5719909667969, |
|
"eval_logps/rejected": -399.30718994140625, |
|
"eval_loss": 0.5368649363517761, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": -0.8233387470245361, |
|
"eval_rewards/margins": 0.689436674118042, |
|
"eval_rewards/rejected": -1.5127756595611572, |
|
"eval_runtime": 166.5941, |
|
"eval_samples_per_second": 3.001, |
|
"eval_steps_per_second": 0.378, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.804, |
|
"grad_norm": 11.21343994140625, |
|
"learning_rate": 5.626387782395512e-07, |
|
"logits/chosen": -2.5684406757354736, |
|
"logits/rejected": -2.536818265914917, |
|
"logps/chosen": -391.46490478515625, |
|
"logps/rejected": -438.1607971191406, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9650428891181946, |
|
"rewards/margins": 0.6077659726142883, |
|
"rewards/rejected": -1.5728086233139038, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.808, |
|
"grad_norm": 7.5349884033203125, |
|
"learning_rate": 5.407663566854008e-07, |
|
"logits/chosen": -2.5107808113098145, |
|
"logits/rejected": -2.4654345512390137, |
|
"logps/chosen": -377.01324462890625, |
|
"logps/rejected": -436.71728515625, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7660607695579529, |
|
"rewards/margins": 0.7943149209022522, |
|
"rewards/rejected": -1.5603755712509155, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.812, |
|
"grad_norm": 18.765304565429688, |
|
"learning_rate": 5.192758916120236e-07, |
|
"logits/chosen": -2.5258936882019043, |
|
"logits/rejected": -2.496175527572632, |
|
"logps/chosen": -376.3697509765625, |
|
"logps/rejected": -422.98529052734375, |
|
"loss": 0.5471, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.8881324529647827, |
|
"rewards/margins": 0.6989415884017944, |
|
"rewards/rejected": -1.5870741605758667, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.816, |
|
"grad_norm": 9.102749824523926, |
|
"learning_rate": 4.981715726281666e-07, |
|
"logits/chosen": -2.5151665210723877, |
|
"logits/rejected": -2.5129122734069824, |
|
"logps/chosen": -376.29168701171875, |
|
"logps/rejected": -392.8097229003906, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9464821815490723, |
|
"rewards/margins": 0.4086835980415344, |
|
"rewards/rejected": -1.355165958404541, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 5.625443935394287, |
|
"learning_rate": 4.774575140626317e-07, |
|
"logits/chosen": -2.5486044883728027, |
|
"logits/rejected": -2.55946683883667, |
|
"logps/chosen": -376.15631103515625, |
|
"logps/rejected": -424.7813415527344, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.7685045003890991, |
|
"rewards/margins": 0.8368158340454102, |
|
"rewards/rejected": -1.6053203344345093, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.824, |
|
"grad_norm": 8.010042190551758, |
|
"learning_rate": 4.5713775416217884e-07, |
|
"logits/chosen": -2.535797357559204, |
|
"logits/rejected": -2.5063552856445312, |
|
"logps/chosen": -369.76129150390625, |
|
"logps/rejected": -407.69281005859375, |
|
"loss": 0.494, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8518115282058716, |
|
"rewards/margins": 0.8361819982528687, |
|
"rewards/rejected": -1.6879936456680298, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.828, |
|
"grad_norm": 10.202120780944824, |
|
"learning_rate": 4.372162543042624e-07, |
|
"logits/chosen": -2.5730769634246826, |
|
"logits/rejected": -2.5308516025543213, |
|
"logps/chosen": -331.8706359863281, |
|
"logps/rejected": -356.06585693359375, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.003354787826538, |
|
"rewards/margins": 0.4766755998134613, |
|
"rewards/rejected": -1.4800306558609009, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 6.7073163986206055, |
|
"learning_rate": 4.1769689822475147e-07, |
|
"logits/chosen": -2.527463912963867, |
|
"logits/rejected": -2.508131980895996, |
|
"logps/chosen": -340.91851806640625, |
|
"logps/rejected": -381.59246826171875, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8549755215644836, |
|
"rewards/margins": 0.6245896220207214, |
|
"rewards/rejected": -1.479565143585205, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.836, |
|
"grad_norm": 11.138327598571777, |
|
"learning_rate": 3.9858349126078945e-07, |
|
"logits/chosen": -2.407921314239502, |
|
"logits/rejected": -2.4323601722717285, |
|
"logps/chosen": -367.8681640625, |
|
"logps/rejected": -432.36505126953125, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9345201253890991, |
|
"rewards/margins": 0.5518704652786255, |
|
"rewards/rejected": -1.4863905906677246, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 18.084115982055664, |
|
"learning_rate": 3.798797596089351e-07, |
|
"logits/chosen": -2.5881738662719727, |
|
"logits/rejected": -2.557568073272705, |
|
"logps/chosen": -387.9930725097656, |
|
"logps/rejected": -412.61773681640625, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9383655786514282, |
|
"rewards/margins": 0.635163426399231, |
|
"rewards/rejected": -1.5735290050506592, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.844, |
|
"grad_norm": 8.753804206848145, |
|
"learning_rate": 3.615893495987335e-07, |
|
"logits/chosen": -2.493389129638672, |
|
"logits/rejected": -2.512821912765503, |
|
"logps/chosen": -358.10015869140625, |
|
"logps/rejected": -455.28802490234375, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7339892983436584, |
|
"rewards/margins": 0.7650957703590393, |
|
"rewards/rejected": -1.4990851879119873, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.848, |
|
"grad_norm": 10.629037857055664, |
|
"learning_rate": 3.4371582698185636e-07, |
|
"logits/chosen": -2.5065648555755615, |
|
"logits/rejected": -2.5203864574432373, |
|
"logps/chosen": -394.06390380859375, |
|
"logps/rejected": -438.9784240722656, |
|
"loss": 0.4584, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9274409413337708, |
|
"rewards/margins": 0.83965003490448, |
|
"rewards/rejected": -1.7670910358428955, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.852, |
|
"grad_norm": 12.203771591186523, |
|
"learning_rate": 3.262626762369525e-07, |
|
"logits/chosen": -2.5433928966522217, |
|
"logits/rejected": -2.4640583992004395, |
|
"logps/chosen": -335.0285339355469, |
|
"logps/rejected": -358.05206298828125, |
|
"loss": 0.5235, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8205272555351257, |
|
"rewards/margins": 0.7125735282897949, |
|
"rewards/rejected": -1.5331008434295654, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.856, |
|
"grad_norm": 10.726627349853516, |
|
"learning_rate": 3.092332998903416e-07, |
|
"logits/chosen": -2.5511505603790283, |
|
"logits/rejected": -2.555680990219116, |
|
"logps/chosen": -387.1297912597656, |
|
"logps/rejected": -438.8545837402344, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7436598539352417, |
|
"rewards/margins": 0.5584943890571594, |
|
"rewards/rejected": -1.302154302597046, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 10.780182838439941, |
|
"learning_rate": 2.9263101785268253e-07, |
|
"logits/chosen": -2.5446937084198, |
|
"logits/rejected": -2.5189390182495117, |
|
"logps/chosen": -376.8099670410156, |
|
"logps/rejected": -390.14105224609375, |
|
"loss": 0.6353, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9379167556762695, |
|
"rewards/margins": 0.45755672454833984, |
|
"rewards/rejected": -1.3954734802246094, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 9.165637969970703, |
|
"learning_rate": 2.764590667717562e-07, |
|
"logits/chosen": -2.520571231842041, |
|
"logits/rejected": -2.5012898445129395, |
|
"logps/chosen": -348.20050048828125, |
|
"logps/rejected": -430.0379333496094, |
|
"loss": 0.4818, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.767501175403595, |
|
"rewards/margins": 0.8683005571365356, |
|
"rewards/rejected": -1.6358016729354858, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.868, |
|
"grad_norm": 15.374898910522461, |
|
"learning_rate": 2.6072059940146775e-07, |
|
"logits/chosen": -2.4875528812408447, |
|
"logits/rejected": -2.4620859622955322, |
|
"logps/chosen": -358.6385192871094, |
|
"logps/rejected": -375.6526794433594, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9842912554740906, |
|
"rewards/margins": 0.40321025252342224, |
|
"rewards/rejected": -1.3875017166137695, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.872, |
|
"grad_norm": 16.681074142456055, |
|
"learning_rate": 2.454186839872158e-07, |
|
"logits/chosen": -2.4695944786071777, |
|
"logits/rejected": -2.4315385818481445, |
|
"logps/chosen": -369.12884521484375, |
|
"logps/rejected": -432.47467041015625, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8553116917610168, |
|
"rewards/margins": 0.5986126065254211, |
|
"rewards/rejected": -1.453924298286438, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.876, |
|
"grad_norm": 12.826825141906738, |
|
"learning_rate": 2.3055630366772857e-07, |
|
"logits/chosen": -2.560246229171753, |
|
"logits/rejected": -2.5458738803863525, |
|
"logps/chosen": -357.64227294921875, |
|
"logps/rejected": -396.8709411621094, |
|
"loss": 0.5257, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7637326717376709, |
|
"rewards/margins": 0.737055242061615, |
|
"rewards/rejected": -1.5007880926132202, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 10.747795104980469, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": -2.552135944366455, |
|
"logits/rejected": -2.548779010772705, |
|
"logps/chosen": -344.90142822265625, |
|
"logps/rejected": -395.3324279785156, |
|
"loss": 0.5152, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8041820526123047, |
|
"rewards/margins": 0.7682543396949768, |
|
"rewards/rejected": -1.5724363327026367, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_logits/chosen": -2.5619542598724365, |
|
"eval_logits/rejected": -2.518826723098755, |
|
"eval_logps/chosen": -357.70245361328125, |
|
"eval_logps/rejected": -389.9855041503906, |
|
"eval_loss": 0.5383636951446533, |
|
"eval_rewards/accuracies": 0.7142857313156128, |
|
"eval_rewards/chosen": -0.7446432113647461, |
|
"eval_rewards/margins": 0.6749160289764404, |
|
"eval_rewards/rejected": -1.4195590019226074, |
|
"eval_runtime": 166.7191, |
|
"eval_samples_per_second": 2.999, |
|
"eval_steps_per_second": 0.378, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.884, |
|
"grad_norm": 15.551572799682617, |
|
"learning_rate": 2.0216165186191406e-07, |
|
"logits/chosen": -2.529543399810791, |
|
"logits/rejected": -2.511012554168701, |
|
"logps/chosen": -359.8935546875, |
|
"logps/rejected": -426.3463439941406, |
|
"loss": 0.5322, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7537140846252441, |
|
"rewards/margins": 0.747148871421814, |
|
"rewards/rejected": -1.5008628368377686, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.888, |
|
"grad_norm": 10.654102325439453, |
|
"learning_rate": 1.8863491596921745e-07, |
|
"logits/chosen": -2.531877040863037, |
|
"logits/rejected": -2.5003228187561035, |
|
"logps/chosen": -394.6542053222656, |
|
"logps/rejected": -419.4615173339844, |
|
"loss": 0.6303, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9292774200439453, |
|
"rewards/margins": 0.46585217118263245, |
|
"rewards/rejected": -1.3951294422149658, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.892, |
|
"grad_norm": 12.998001098632812, |
|
"learning_rate": 1.7555878527937164e-07, |
|
"logits/chosen": -2.6154770851135254, |
|
"logits/rejected": -2.573152542114258, |
|
"logps/chosen": -378.4177551269531, |
|
"logps/rejected": -407.0361328125, |
|
"loss": 0.4692, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.8625243902206421, |
|
"rewards/margins": 0.8568865060806274, |
|
"rewards/rejected": -1.7194106578826904, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 9.31609058380127, |
|
"learning_rate": 1.629358090099639e-07, |
|
"logits/chosen": -2.4966423511505127, |
|
"logits/rejected": -2.4891440868377686, |
|
"logps/chosen": -389.1834411621094, |
|
"logps/rejected": -428.36199951171875, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8896617889404297, |
|
"rewards/margins": 0.7314151525497437, |
|
"rewards/rejected": -1.6210769414901733, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 10.968253135681152, |
|
"learning_rate": 1.507684480352292e-07, |
|
"logits/chosen": -2.5237255096435547, |
|
"logits/rejected": -2.5314764976501465, |
|
"logps/chosen": -367.54150390625, |
|
"logps/rejected": -418.0208435058594, |
|
"loss": 0.5305, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9517295956611633, |
|
"rewards/margins": 0.6929836869239807, |
|
"rewards/rejected": -1.6447131633758545, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.904, |
|
"grad_norm": 9.858650207519531, |
|
"learning_rate": 1.3905907440629752e-07, |
|
"logits/chosen": -2.551881790161133, |
|
"logits/rejected": -2.5350139141082764, |
|
"logps/chosen": -366.10174560546875, |
|
"logps/rejected": -396.09478759765625, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8920289874076843, |
|
"rewards/margins": 0.6426266431808472, |
|
"rewards/rejected": -1.5346556901931763, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.908, |
|
"grad_norm": 9.662504196166992, |
|
"learning_rate": 1.278099708887587e-07, |
|
"logits/chosen": -2.5561671257019043, |
|
"logits/rejected": -2.5373544692993164, |
|
"logps/chosen": -344.9742431640625, |
|
"logps/rejected": -458.02313232421875, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.779849112033844, |
|
"rewards/margins": 0.7551368474960327, |
|
"rewards/rejected": -1.534985899925232, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.912, |
|
"grad_norm": 12.876849174499512, |
|
"learning_rate": 1.1702333051763271e-07, |
|
"logits/chosen": -2.5694494247436523, |
|
"logits/rejected": -2.5618202686309814, |
|
"logps/chosen": -399.39739990234375, |
|
"logps/rejected": -404.67535400390625, |
|
"loss": 0.541, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.8758378028869629, |
|
"rewards/margins": 0.7352150082588196, |
|
"rewards/rejected": -1.6110527515411377, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.916, |
|
"grad_norm": 10.277182579040527, |
|
"learning_rate": 1.067012561698319e-07, |
|
"logits/chosen": -2.5358214378356934, |
|
"logits/rejected": -2.5226242542266846, |
|
"logps/chosen": -378.08331298828125, |
|
"logps/rejected": -406.0291442871094, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8850030899047852, |
|
"rewards/margins": 0.44015589356422424, |
|
"rewards/rejected": -1.3251588344573975, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 17.924530029296875, |
|
"learning_rate": 9.684576015420277e-08, |
|
"logits/chosen": -2.4903244972229004, |
|
"logits/rejected": -2.4617245197296143, |
|
"logps/chosen": -328.5221252441406, |
|
"logps/rejected": -361.15728759765625, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7505359053611755, |
|
"rewards/margins": 0.7073702812194824, |
|
"rewards/rejected": -1.4579061269760132, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.924, |
|
"grad_norm": 14.877918243408203, |
|
"learning_rate": 8.745876381922147e-08, |
|
"logits/chosen": -2.4926960468292236, |
|
"logits/rejected": -2.525510549545288, |
|
"logps/chosen": -342.7877197265625, |
|
"logps/rejected": -372.467041015625, |
|
"loss": 0.5811, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8225164413452148, |
|
"rewards/margins": 0.6331211924552917, |
|
"rewards/rejected": -1.4556376934051514, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.928, |
|
"grad_norm": 14.825825691223145, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits/chosen": -2.560148239135742, |
|
"logits/rejected": -2.536252975463867, |
|
"logps/chosen": -389.2018127441406, |
|
"logps/rejected": -387.626220703125, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.9954813122749329, |
|
"rewards/margins": 0.3569512963294983, |
|
"rewards/rejected": -1.3524326086044312, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.932, |
|
"grad_norm": 14.72794246673584, |
|
"learning_rate": 7.009749855363457e-08, |
|
"logits/chosen": -2.5346550941467285, |
|
"logits/rejected": -2.5153279304504395, |
|
"logps/chosen": -340.6593322753906, |
|
"logps/rejected": -409.87274169921875, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.70791095495224, |
|
"rewards/margins": 0.6868191361427307, |
|
"rewards/rejected": -1.3947298526763916, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.936, |
|
"grad_norm": 12.192831993103027, |
|
"learning_rate": 6.212661423609184e-08, |
|
"logits/chosen": -2.601175308227539, |
|
"logits/rejected": -2.5409445762634277, |
|
"logps/chosen": -391.4974060058594, |
|
"logps/rejected": -429.298583984375, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9178189039230347, |
|
"rewards/margins": 0.643661379814148, |
|
"rewards/rejected": -1.5614804029464722, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 16.621145248413086, |
|
"learning_rate": 5.463099816548578e-08, |
|
"logits/chosen": -2.517141819000244, |
|
"logits/rejected": -2.512964963912964, |
|
"logps/chosen": -360.8076171875, |
|
"logps/rejected": -446.977294921875, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.9305839538574219, |
|
"rewards/margins": 0.7509930729866028, |
|
"rewards/rejected": -1.6815770864486694, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.944, |
|
"grad_norm": 9.267435073852539, |
|
"learning_rate": 4.761211162702117e-08, |
|
"logits/chosen": -2.5712485313415527, |
|
"logits/rejected": -2.508033037185669, |
|
"logps/chosen": -396.81658935546875, |
|
"logps/rejected": -446.858154296875, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7593949437141418, |
|
"rewards/margins": 0.6189205050468445, |
|
"rewards/rejected": -1.3783155679702759, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.948, |
|
"grad_norm": 6.9370436668396, |
|
"learning_rate": 4.1071322966535487e-08, |
|
"logits/chosen": -2.588221788406372, |
|
"logits/rejected": -2.5175411701202393, |
|
"logps/chosen": -418.30474853515625, |
|
"logps/rejected": -404.17840576171875, |
|
"loss": 0.4887, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7452821135520935, |
|
"rewards/margins": 0.8689033389091492, |
|
"rewards/rejected": -1.6141853332519531, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.952, |
|
"grad_norm": 4.675684452056885, |
|
"learning_rate": 3.5009907323737826e-08, |
|
"logits/chosen": -2.5127742290496826, |
|
"logits/rejected": -2.58686900138855, |
|
"logps/chosen": -371.75390625, |
|
"logps/rejected": -481.8087463378906, |
|
"loss": 0.4452, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6970502138137817, |
|
"rewards/margins": 0.9857767820358276, |
|
"rewards/rejected": -1.6828269958496094, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.956, |
|
"grad_norm": 8.315820693969727, |
|
"learning_rate": 2.9429046383618042e-08, |
|
"logits/chosen": -2.4666857719421387, |
|
"logits/rejected": -2.4609508514404297, |
|
"logps/chosen": -369.11480712890625, |
|
"logps/rejected": -399.99835205078125, |
|
"loss": 0.4684, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7124398946762085, |
|
"rewards/margins": 0.7592355608940125, |
|
"rewards/rejected": -1.4716756343841553, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 13.105049133300781, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"logits/chosen": -2.531951904296875, |
|
"logits/rejected": -2.5043699741363525, |
|
"logps/chosen": -376.4866943359375, |
|
"logps/rejected": -373.62109375, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8802105188369751, |
|
"rewards/margins": 0.7113884091377258, |
|
"rewards/rejected": -1.5915989875793457, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": -2.556800365447998, |
|
"eval_logits/rejected": -2.513496160507202, |
|
"eval_logps/chosen": -362.12188720703125, |
|
"eval_logps/rejected": -395.5133361816406, |
|
"eval_loss": 0.5369879007339478, |
|
"eval_rewards/accuracies": 0.7063491940498352, |
|
"eval_rewards/chosen": -0.7888382077217102, |
|
"eval_rewards/margins": 0.6859992146492004, |
|
"eval_rewards/rejected": -1.4748374223709106, |
|
"eval_runtime": 166.4514, |
|
"eval_samples_per_second": 3.004, |
|
"eval_steps_per_second": 0.378, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.964, |
|
"grad_norm": 8.366193771362305, |
|
"learning_rate": 1.9713246713805588e-08, |
|
"logits/chosen": -2.4164879322052, |
|
"logits/rejected": -2.395017623901367, |
|
"logps/chosen": -332.5264892578125, |
|
"logps/rejected": -406.001953125, |
|
"loss": 0.4566, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.618050217628479, |
|
"rewards/margins": 0.9078122973442078, |
|
"rewards/rejected": -1.525862455368042, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.968, |
|
"grad_norm": 9.794474601745605, |
|
"learning_rate": 1.5580202098509078e-08, |
|
"logits/chosen": -2.498256206512451, |
|
"logits/rejected": -2.455946207046509, |
|
"logps/chosen": -413.91802978515625, |
|
"logps/rejected": -458.69671630859375, |
|
"loss": 0.6104, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9604536890983582, |
|
"rewards/margins": 0.4910499155521393, |
|
"rewards/rejected": -1.4515035152435303, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.972, |
|
"grad_norm": 9.097041130065918, |
|
"learning_rate": 1.193150004542204e-08, |
|
"logits/chosen": -2.531789779663086, |
|
"logits/rejected": -2.5278313159942627, |
|
"logps/chosen": -356.40386962890625, |
|
"logps/rejected": -406.5818786621094, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6918624639511108, |
|
"rewards/margins": 0.5878754258155823, |
|
"rewards/rejected": -1.2797380685806274, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.976, |
|
"grad_norm": 10.505363464355469, |
|
"learning_rate": 8.767851876239075e-09, |
|
"logits/chosen": -2.512781858444214, |
|
"logits/rejected": -2.4617748260498047, |
|
"logps/chosen": -325.30657958984375, |
|
"logps/rejected": -373.95013427734375, |
|
"loss": 0.5752, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7784557342529297, |
|
"rewards/margins": 0.626113772392273, |
|
"rewards/rejected": -1.4045695066452026, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 9.12982177734375, |
|
"learning_rate": 6.089874350439507e-09, |
|
"logits/chosen": -2.5082201957702637, |
|
"logits/rejected": -2.4916832447052, |
|
"logps/chosen": -437.98638916015625, |
|
"logps/rejected": -452.052001953125, |
|
"loss": 0.5096, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8293756246566772, |
|
"rewards/margins": 0.6941211819648743, |
|
"rewards/rejected": -1.5234968662261963, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.984, |
|
"grad_norm": 9.886011123657227, |
|
"learning_rate": 3.8980895450474455e-09, |
|
"logits/chosen": -2.47562313079834, |
|
"logits/rejected": -2.472003221511841, |
|
"logps/chosen": -375.73370361328125, |
|
"logps/rejected": -489.0946350097656, |
|
"loss": 0.4262, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7051881551742554, |
|
"rewards/margins": 0.9877891540527344, |
|
"rewards/rejected": -1.6929775476455688, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.988, |
|
"grad_norm": 8.874799728393555, |
|
"learning_rate": 2.192924752854042e-09, |
|
"logits/chosen": -2.579641819000244, |
|
"logits/rejected": -2.561047077178955, |
|
"logps/chosen": -357.88421630859375, |
|
"logps/rejected": -407.43438720703125, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7861407995223999, |
|
"rewards/margins": 0.5933399796485901, |
|
"rewards/rejected": -1.3794807195663452, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"grad_norm": 8.425810813903809, |
|
"learning_rate": 9.747123991141193e-10, |
|
"logits/chosen": -2.442471981048584, |
|
"logits/rejected": -2.426997661590576, |
|
"logps/chosen": -376.2283020019531, |
|
"logps/rejected": -400.55718994140625, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9100795984268188, |
|
"rewards/margins": 0.6220163702964783, |
|
"rewards/rejected": -1.5320959091186523, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.996, |
|
"grad_norm": 9.747213363647461, |
|
"learning_rate": 2.43689976739403e-10, |
|
"logits/chosen": -2.400465250015259, |
|
"logits/rejected": -2.4487109184265137, |
|
"logps/chosen": -408.04913330078125, |
|
"logps/rejected": -413.04443359375, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8299952745437622, |
|
"rewards/margins": 0.6040025353431702, |
|
"rewards/rejected": -1.4339977502822876, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 9.561023712158203, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.478743076324463, |
|
"logits/rejected": -2.454713821411133, |
|
"logps/chosen": -397.37994384765625, |
|
"logps/rejected": -446.76275634765625, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9160435795783997, |
|
"rewards/margins": 0.6437736749649048, |
|
"rewards/rejected": -1.5598171949386597, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1250, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.0105, |
|
"train_samples_per_second": 1898476.441, |
|
"train_steps_per_second": 118654.778 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|