|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 16.074478059343143, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -1.9564645290374756, |
|
"logits/rejected": -2.1290814876556396, |
|
"logps/chosen": -144.1077423095703, |
|
"logps/pi_response": -268.6929931640625, |
|
"logps/ref_response": -268.6929931640625, |
|
"logps/rejected": -144.41493225097656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 17.576222912928348, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.241427183151245, |
|
"logits/rejected": -2.282970666885376, |
|
"logps/chosen": -171.37808227539062, |
|
"logps/pi_response": -273.0738525390625, |
|
"logps/ref_response": -271.9916687011719, |
|
"logps/rejected": -176.56832885742188, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.3923611044883728, |
|
"rewards/chosen": -0.004230719991028309, |
|
"rewards/margins": -0.0005770567222498357, |
|
"rewards/rejected": -0.0036536632105708122, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 18.23257699755048, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": -2.2886428833007812, |
|
"logits/rejected": -2.1147801876068115, |
|
"logps/chosen": -194.26535034179688, |
|
"logps/pi_response": -308.6405029296875, |
|
"logps/ref_response": -274.3199157714844, |
|
"logps/rejected": -196.698974609375, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.2213359773159027, |
|
"rewards/margins": 0.020678246393799782, |
|
"rewards/rejected": -0.24201424419879913, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 21.359473410005467, |
|
"learning_rate": 4.877641290737883e-07, |
|
"logits/chosen": -2.213491916656494, |
|
"logits/rejected": -2.1212565898895264, |
|
"logps/chosen": -213.91452026367188, |
|
"logps/pi_response": -317.0865783691406, |
|
"logps/ref_response": -260.5080261230469, |
|
"logps/rejected": -215.670166015625, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.35239773988723755, |
|
"rewards/margins": 0.03320372849702835, |
|
"rewards/rejected": -0.3856014609336853, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 19.54689711054047, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": -2.36901593208313, |
|
"logits/rejected": -2.241117000579834, |
|
"logps/chosen": -191.35202026367188, |
|
"logps/pi_response": -293.92608642578125, |
|
"logps/ref_response": -255.9798126220703, |
|
"logps/rejected": -191.24124145507812, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.22946178913116455, |
|
"rewards/margins": 0.020651038736104965, |
|
"rewards/rejected": -0.2501128315925598, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 19.66181931005281, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"logits/chosen": -2.2629857063293457, |
|
"logits/rejected": -2.1153407096862793, |
|
"logps/chosen": -225.6036834716797, |
|
"logps/pi_response": -330.4422912597656, |
|
"logps/ref_response": -266.11285400390625, |
|
"logps/rejected": -226.37161254882812, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/chosen": -0.4082844853401184, |
|
"rewards/margins": 0.03162597864866257, |
|
"rewards/rejected": -0.4399104118347168, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 34.081390496400246, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": -2.3039848804473877, |
|
"logits/rejected": -2.3428866863250732, |
|
"logps/chosen": -220.15634155273438, |
|
"logps/pi_response": -319.7514953613281, |
|
"logps/ref_response": -254.2370147705078, |
|
"logps/rejected": -226.6223907470703, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.3762189447879791, |
|
"rewards/margins": 0.03814256191253662, |
|
"rewards/rejected": -0.41436153650283813, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 17.20872152727463, |
|
"learning_rate": 3.378437060203357e-07, |
|
"logits/chosen": -2.387434959411621, |
|
"logits/rejected": -2.2482728958129883, |
|
"logps/chosen": -199.58290100097656, |
|
"logps/pi_response": -299.43707275390625, |
|
"logps/ref_response": -256.967529296875, |
|
"logps/rejected": -197.93199157714844, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.21634867787361145, |
|
"rewards/margins": 0.03388797491788864, |
|
"rewards/rejected": -0.2502366304397583, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 18.125911483507668, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": -2.2822232246398926, |
|
"logits/rejected": -2.355548620223999, |
|
"logps/chosen": -211.54409790039062, |
|
"logps/pi_response": -338.62335205078125, |
|
"logps/ref_response": -268.83172607421875, |
|
"logps/rejected": -212.7510223388672, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.39336004853248596, |
|
"rewards/margins": 0.008717315271496773, |
|
"rewards/rejected": -0.4020773470401764, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 18.45235135252255, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"logits/chosen": -2.3078341484069824, |
|
"logits/rejected": -2.3145835399627686, |
|
"logps/chosen": -221.66226196289062, |
|
"logps/pi_response": -324.65771484375, |
|
"logps/ref_response": -253.67257690429688, |
|
"logps/rejected": -230.7862091064453, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4485122263431549, |
|
"rewards/margins": 0.015239333733916283, |
|
"rewards/rejected": -0.46375155448913574, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 21.755132830081727, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": -2.324589729309082, |
|
"logits/rejected": -2.312774181365967, |
|
"logps/chosen": -224.4755401611328, |
|
"logps/pi_response": -331.3367919921875, |
|
"logps/ref_response": -261.8123474121094, |
|
"logps/rejected": -226.1329345703125, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.5234971642494202, |
|
"rewards/margins": 0.04355122521519661, |
|
"rewards/rejected": -0.5670484304428101, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 17.643769449739274, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"logits/chosen": -2.396841526031494, |
|
"logits/rejected": -2.2907986640930176, |
|
"logps/chosen": -216.90243530273438, |
|
"logps/pi_response": -318.94024658203125, |
|
"logps/ref_response": -251.3756561279297, |
|
"logps/rejected": -215.78512573242188, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.42508357763290405, |
|
"rewards/margins": 0.0596102774143219, |
|
"rewards/rejected": -0.48469385504722595, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 19.96301055359274, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": -2.3210701942443848, |
|
"logits/rejected": -2.387702465057373, |
|
"logps/chosen": -209.82119750976562, |
|
"logps/pi_response": -329.6842956542969, |
|
"logps/ref_response": -276.03692626953125, |
|
"logps/rejected": -222.03341674804688, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -0.32728347182273865, |
|
"rewards/margins": 0.0734453871846199, |
|
"rewards/rejected": -0.40072885155677795, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 20.360241724578835, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"logits/chosen": -2.3041348457336426, |
|
"logits/rejected": -2.2705655097961426, |
|
"logps/chosen": -210.88119506835938, |
|
"logps/pi_response": -328.033203125, |
|
"logps/ref_response": -266.6432189941406, |
|
"logps/rejected": -211.0803680419922, |
|
"loss": 0.6748, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -0.3793022036552429, |
|
"rewards/margins": 0.05234457924962044, |
|
"rewards/rejected": -0.43164676427841187, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 18.16724101735529, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": -2.401698350906372, |
|
"logits/rejected": -2.386355400085449, |
|
"logps/chosen": -201.68978881835938, |
|
"logps/pi_response": -315.3774719238281, |
|
"logps/ref_response": -254.541259765625, |
|
"logps/rejected": -215.79934692382812, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3424326777458191, |
|
"rewards/margins": 0.09304080158472061, |
|
"rewards/rejected": -0.4354734420776367, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 21.61601513002701, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"logits/chosen": -2.2798948287963867, |
|
"logits/rejected": -2.293689489364624, |
|
"logps/chosen": -211.88253784179688, |
|
"logps/pi_response": -325.4713439941406, |
|
"logps/ref_response": -264.48388671875, |
|
"logps/rejected": -218.7720489501953, |
|
"loss": 0.6717, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -0.3841695487499237, |
|
"rewards/margins": 0.059767745435237885, |
|
"rewards/rejected": -0.4439373016357422, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6820480842620898, |
|
"train_runtime": 31897.7284, |
|
"train_samples_per_second": 0.627, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|