SELM-Llama-3-8B-Instruct-iter-3 / trainer_state.json
ZhangShenao's picture
Model save
6c053f3 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984301412872841,
"eval_steps": 500,
"global_step": 159,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006279434850863423,
"grad_norm": 44.82313301644843,
"learning_rate": 6.25e-09,
"logits/chosen": 0.8539759516716003,
"logits/rejected": 1.0286259651184082,
"logps/chosen": -335.678466796875,
"logps/pi_response": -136.65570068359375,
"logps/ref_response": -136.65570068359375,
"logps/rejected": -607.512451171875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06279434850863422,
"grad_norm": 37.3559401152222,
"learning_rate": 6.25e-08,
"logits/chosen": 0.5866143703460693,
"logits/rejected": 0.9108358025550842,
"logps/chosen": -331.2261657714844,
"logps/pi_response": -115.99703216552734,
"logps/ref_response": -116.07170867919922,
"logps/rejected": -542.704833984375,
"loss": 0.6928,
"rewards/accuracies": 0.4791666567325592,
"rewards/chosen": -0.0002141093573300168,
"rewards/margins": -9.146899537881836e-05,
"rewards/rejected": -0.0001226404565386474,
"step": 10
},
{
"epoch": 0.12558869701726844,
"grad_norm": 32.618665621131484,
"learning_rate": 9.980706626858606e-08,
"logits/chosen": 0.6913329362869263,
"logits/rejected": 1.0521622896194458,
"logps/chosen": -266.18450927734375,
"logps/pi_response": -121.02943420410156,
"logps/ref_response": -121.02423095703125,
"logps/rejected": -550.6239624023438,
"loss": 0.6868,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.007379357703030109,
"rewards/margins": 0.01433458924293518,
"rewards/rejected": -0.021713946014642715,
"step": 20
},
{
"epoch": 0.18838304552590268,
"grad_norm": 28.038145324089726,
"learning_rate": 9.765362502737097e-08,
"logits/chosen": 0.7383168935775757,
"logits/rejected": 1.0771757364273071,
"logps/chosen": -336.5181579589844,
"logps/pi_response": -119.6182861328125,
"logps/ref_response": -119.22322845458984,
"logps/rejected": -517.7554321289062,
"loss": 0.6645,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.05802997946739197,
"rewards/margins": 0.036242544651031494,
"rewards/rejected": -0.09427253156900406,
"step": 30
},
{
"epoch": 0.25117739403453687,
"grad_norm": 28.144482930637814,
"learning_rate": 9.320944188084241e-08,
"logits/chosen": 0.6981341242790222,
"logits/rejected": 1.0143299102783203,
"logps/chosen": -312.6896057128906,
"logps/pi_response": -114.02241516113281,
"logps/ref_response": -113.06685638427734,
"logps/rejected": -570.7511596679688,
"loss": 0.6223,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.1473049521446228,
"rewards/margins": 0.1629556119441986,
"rewards/rejected": -0.3102605938911438,
"step": 40
},
{
"epoch": 0.3139717425431711,
"grad_norm": 24.826701070167935,
"learning_rate": 8.668815171119019e-08,
"logits/chosen": 0.7617167234420776,
"logits/rejected": 1.1797912120819092,
"logps/chosen": -300.81610107421875,
"logps/pi_response": -123.53846740722656,
"logps/ref_response": -122.34455871582031,
"logps/rejected": -541.4984130859375,
"loss": 0.5948,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.16112999618053436,
"rewards/margins": 0.23305478692054749,
"rewards/rejected": -0.39418473839759827,
"step": 50
},
{
"epoch": 0.37676609105180536,
"grad_norm": 19.513203799747355,
"learning_rate": 7.840323733655779e-08,
"logits/chosen": 0.766559898853302,
"logits/rejected": 1.1061646938323975,
"logps/chosen": -339.07257080078125,
"logps/pi_response": -115.84468841552734,
"logps/ref_response": -112.74748229980469,
"logps/rejected": -619.0446166992188,
"loss": 0.5697,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.33040159940719604,
"rewards/margins": 0.37622275948524475,
"rewards/rejected": -0.7066243886947632,
"step": 60
},
{
"epoch": 0.43956043956043955,
"grad_norm": 17.524688561630104,
"learning_rate": 6.87529601804781e-08,
"logits/chosen": 0.9022890329360962,
"logits/rejected": 1.2815022468566895,
"logps/chosen": -287.8564758300781,
"logps/pi_response": -117.10661315917969,
"logps/ref_response": -110.9754409790039,
"logps/rejected": -641.4493408203125,
"loss": 0.5495,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.3896563947200775,
"rewards/margins": 0.6181625723838806,
"rewards/rejected": -1.0078189373016357,
"step": 70
},
{
"epoch": 0.5023547880690737,
"grad_norm": 22.238613223093857,
"learning_rate": 5.8201215576551086e-08,
"logits/chosen": 0.7839330434799194,
"logits/rejected": 1.2842929363250732,
"logps/chosen": -352.50201416015625,
"logps/pi_response": -128.81784057617188,
"logps/ref_response": -121.79289245605469,
"logps/rejected": -704.5844116210938,
"loss": 0.5423,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.48940858244895935,
"rewards/margins": 0.7611938118934631,
"rewards/rejected": -1.2506022453308105,
"step": 80
},
{
"epoch": 0.565149136577708,
"grad_norm": 20.05399901390604,
"learning_rate": 4.725523300678362e-08,
"logits/chosen": 0.9467741250991821,
"logits/rejected": 1.3314892053604126,
"logps/chosen": -390.7951965332031,
"logps/pi_response": -120.1943359375,
"logps/ref_response": -111.21858978271484,
"logps/rejected": -724.9862060546875,
"loss": 0.5435,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.5949150919914246,
"rewards/margins": 0.7683295011520386,
"rewards/rejected": -1.363244652748108,
"step": 90
},
{
"epoch": 0.6279434850863422,
"grad_norm": 15.488053965952703,
"learning_rate": 3.644119323817915e-08,
"logits/chosen": 0.9162198901176453,
"logits/rejected": 1.237339973449707,
"logps/chosen": -372.86065673828125,
"logps/pi_response": -127.0195083618164,
"logps/ref_response": -119.08543395996094,
"logps/rejected": -618.2140502929688,
"loss": 0.5384,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.5800319910049438,
"rewards/margins": 0.5143734812736511,
"rewards/rejected": -1.0944055318832397,
"step": 100
},
{
"epoch": 0.6907378335949764,
"grad_norm": 19.97941872789063,
"learning_rate": 2.6278934458271994e-08,
"logits/chosen": 0.8850401043891907,
"logits/rejected": 1.172430157661438,
"logps/chosen": -350.63067626953125,
"logps/pi_response": -121.9805908203125,
"logps/ref_response": -114.52159118652344,
"logps/rejected": -664.2578735351562,
"loss": 0.5634,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.5176302194595337,
"rewards/margins": 0.6778607368469238,
"rewards/rejected": -1.1954909563064575,
"step": 110
},
{
"epoch": 0.7535321821036107,
"grad_norm": 18.5617857059109,
"learning_rate": 1.725696330273575e-08,
"logits/chosen": 0.8677975535392761,
"logits/rejected": 1.1380926370620728,
"logps/chosen": -314.06951904296875,
"logps/pi_response": -123.45503234863281,
"logps/ref_response": -116.88655853271484,
"logps/rejected": -674.5084228515625,
"loss": 0.5261,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.43471240997314453,
"rewards/margins": 0.7433832287788391,
"rewards/rejected": -1.1780955791473389,
"step": 120
},
{
"epoch": 0.8163265306122449,
"grad_norm": 22.823575286285628,
"learning_rate": 9.808972011828054e-09,
"logits/chosen": 0.7576395273208618,
"logits/rejected": 1.283482313156128,
"logps/chosen": -336.9859619140625,
"logps/pi_response": -129.77108764648438,
"logps/ref_response": -124.2276382446289,
"logps/rejected": -663.2884521484375,
"loss": 0.547,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.43467482924461365,
"rewards/margins": 0.6862285733222961,
"rewards/rejected": -1.1209033727645874,
"step": 130
},
{
"epoch": 0.8791208791208791,
"grad_norm": 16.657603305669547,
"learning_rate": 4.2929905518041705e-09,
"logits/chosen": 0.8893574476242065,
"logits/rejected": 1.3175649642944336,
"logps/chosen": -363.91876220703125,
"logps/pi_response": -123.624267578125,
"logps/ref_response": -117.37095642089844,
"logps/rejected": -586.3743896484375,
"loss": 0.5436,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.4913572371006012,
"rewards/margins": 0.5228551030158997,
"rewards/rejected": -1.0142122507095337,
"step": 140
},
{
"epoch": 0.9419152276295133,
"grad_norm": 19.65034837624422,
"learning_rate": 9.741758728888216e-10,
"logits/chosen": 0.7108520269393921,
"logits/rejected": 1.1452034711837769,
"logps/chosen": -374.49493408203125,
"logps/pi_response": -138.4927978515625,
"logps/ref_response": -132.68472290039062,
"logps/rejected": -669.5167236328125,
"loss": 0.5336,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.45191335678100586,
"rewards/margins": 0.7034718990325928,
"rewards/rejected": -1.1553852558135986,
"step": 150
},
{
"epoch": 0.9984301412872841,
"step": 159,
"total_flos": 0.0,
"train_loss": 0.5769100609065602,
"train_runtime": 4399.847,
"train_samples_per_second": 4.632,
"train_steps_per_second": 0.036
}
],
"logging_steps": 10,
"max_steps": 159,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}