|
{ |
|
"best_metric": 0.6304358839988708, |
|
"best_model_checkpoint": "../outputs/7b_chat_lora_0.05/checkpoint-1600", |
|
"epoch": 0.9644364074743822, |
|
"global_step": 1600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 2.544, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 2.4187, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 2.019, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.4918, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00015, |
|
"loss": 1.0844, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.9015, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.8297, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.798, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00027, |
|
"loss": 0.7381, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7017, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002980756895445798, |
|
"loss": 0.7003, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002961513790891597, |
|
"loss": 0.6933, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002942270686337396, |
|
"loss": 0.6878, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029230275817831937, |
|
"loss": 0.685, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029037844772289927, |
|
"loss": 0.6865, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002884541372674791, |
|
"loss": 0.6768, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00028652982681205897, |
|
"loss": 0.6737, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002846055163566389, |
|
"loss": 0.6704, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002826812059012187, |
|
"loss": 0.6741, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00028075689544579857, |
|
"loss": 0.6699, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 0.6688060164451599, |
|
"eval_runtime": 25.909, |
|
"eval_samples_per_second": 77.193, |
|
"eval_steps_per_second": 2.432, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002788325849903784, |
|
"loss": 0.6796, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00027690827453495827, |
|
"loss": 0.6694, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00027498396407953817, |
|
"loss": 0.6727, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000273059653624118, |
|
"loss": 0.6636, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00027113534316869787, |
|
"loss": 0.6602, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002692110327132777, |
|
"loss": 0.6617, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00026728672225785757, |
|
"loss": 0.6475, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002653624118024374, |
|
"loss": 0.6634, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002634381013470173, |
|
"loss": 0.6611, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00026151379089159717, |
|
"loss": 0.6636, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000259589480436177, |
|
"loss": 0.6565, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00025766516998075687, |
|
"loss": 0.6583, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002557408595253367, |
|
"loss": 0.6515, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00025381654906991656, |
|
"loss": 0.6687, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00025189223861449647, |
|
"loss": 0.6493, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002499679281590763, |
|
"loss": 0.6608, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00024804361770365617, |
|
"loss": 0.642, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.000246119307248236, |
|
"loss": 0.6549, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00024419499679281586, |
|
"loss": 0.6521, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00024227068633739577, |
|
"loss": 0.6517, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.6501107811927795, |
|
"eval_runtime": 26.1226, |
|
"eval_samples_per_second": 76.562, |
|
"eval_steps_per_second": 2.412, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00024034637588197562, |
|
"loss": 0.6536, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00023842206542655546, |
|
"loss": 0.6515, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00023649775497113534, |
|
"loss": 0.6605, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002345734445157152, |
|
"loss": 0.6499, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00023264913406029504, |
|
"loss": 0.6455, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00023072482360487491, |
|
"loss": 0.6545, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00022880051314945476, |
|
"loss": 0.6523, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002268762026940346, |
|
"loss": 0.6428, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002249518922386145, |
|
"loss": 0.6477, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00022302758178319434, |
|
"loss": 0.6512, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002211032713277742, |
|
"loss": 0.6402, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00021917896087235406, |
|
"loss": 0.6545, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002172546504169339, |
|
"loss": 0.6495, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002153303399615138, |
|
"loss": 0.6452, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00021340602950609364, |
|
"loss": 0.6548, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00021148171905067349, |
|
"loss": 0.6528, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00020955740859525336, |
|
"loss": 0.6514, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002076330981398332, |
|
"loss": 0.6366, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00020570878768441306, |
|
"loss": 0.6457, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00020378447722899294, |
|
"loss": 0.6469, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 0.6427023410797119, |
|
"eval_runtime": 26.0393, |
|
"eval_samples_per_second": 76.807, |
|
"eval_steps_per_second": 2.419, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00020186016677357278, |
|
"loss": 0.654, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019993585631815263, |
|
"loss": 0.6449, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001980115458627325, |
|
"loss": 0.6459, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019608723540731236, |
|
"loss": 0.6454, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001941629249518922, |
|
"loss": 0.641, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019223861449647208, |
|
"loss": 0.64, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019031430404105193, |
|
"loss": 0.6366, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00018838999358563178, |
|
"loss": 0.6452, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00018646568313021166, |
|
"loss": 0.6441, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001845413726747915, |
|
"loss": 0.6441, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00018261706221937138, |
|
"loss": 0.6496, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00018069275176395123, |
|
"loss": 0.6357, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00017876844130853108, |
|
"loss": 0.6347, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00017684413085311096, |
|
"loss": 0.6345, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001749198203976908, |
|
"loss": 0.6397, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017299550994227065, |
|
"loss": 0.6444, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017107119948685053, |
|
"loss": 0.64, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00016914688903143038, |
|
"loss": 0.6514, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016722257857601023, |
|
"loss": 0.6424, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001652982681205901, |
|
"loss": 0.643, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.6385083794593811, |
|
"eval_runtime": 25.9034, |
|
"eval_samples_per_second": 77.21, |
|
"eval_steps_per_second": 2.432, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016337395766516995, |
|
"loss": 0.6329, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001614496472097498, |
|
"loss": 0.6426, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015952533675432968, |
|
"loss": 0.6454, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015760102629890953, |
|
"loss": 0.6357, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015567671584348943, |
|
"loss": 0.6392, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00015375240538806928, |
|
"loss": 0.6408, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001518280949326491, |
|
"loss": 0.6369, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014990378447722898, |
|
"loss": 0.6354, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00014797947402180885, |
|
"loss": 0.6359, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001460551635663887, |
|
"loss": 0.6308, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00014413085311096855, |
|
"loss": 0.6442, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00014220654265554843, |
|
"loss": 0.6394, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00014028223220012828, |
|
"loss": 0.6494, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00013835792174470812, |
|
"loss": 0.6413, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.000136433611289288, |
|
"loss": 0.6373, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00013450930083386785, |
|
"loss": 0.6367, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001325849903784477, |
|
"loss": 0.6398, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00013066067992302757, |
|
"loss": 0.6413, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012873636946760742, |
|
"loss": 0.6334, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001268120590121873, |
|
"loss": 0.6331, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.6355478763580322, |
|
"eval_runtime": 25.8942, |
|
"eval_samples_per_second": 77.237, |
|
"eval_steps_per_second": 2.433, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00012488774855676715, |
|
"loss": 0.6412, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.000122963438101347, |
|
"loss": 0.6353, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00012103912764592687, |
|
"loss": 0.6315, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011911481719050672, |
|
"loss": 0.638, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011719050673508658, |
|
"loss": 0.6305, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00011526619627966645, |
|
"loss": 0.6305, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00011334188582424631, |
|
"loss": 0.6307, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00011141757536882616, |
|
"loss": 0.6394, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010949326491340602, |
|
"loss": 0.6331, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010756895445798588, |
|
"loss": 0.6333, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00010564464400256573, |
|
"loss": 0.6434, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001037203335471456, |
|
"loss": 0.6349, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00010179602309172546, |
|
"loss": 0.6454, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.987171263630532e-05, |
|
"loss": 0.6365, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.794740218088517e-05, |
|
"loss": 0.6338, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.602309172546503e-05, |
|
"loss": 0.6405, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.40987812700449e-05, |
|
"loss": 0.6338, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.217447081462474e-05, |
|
"loss": 0.6363, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.02501603592046e-05, |
|
"loss": 0.6403, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.832584990378447e-05, |
|
"loss": 0.6391, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.633297860622406, |
|
"eval_runtime": 25.8864, |
|
"eval_samples_per_second": 77.261, |
|
"eval_steps_per_second": 2.434, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.640153944836432e-05, |
|
"loss": 0.6349, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.447722899294418e-05, |
|
"loss": 0.6294, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.255291853752406e-05, |
|
"loss": 0.6292, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.062860808210392e-05, |
|
"loss": 0.6371, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.870429762668377e-05, |
|
"loss": 0.6342, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.677998717126363e-05, |
|
"loss": 0.6387, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.485567671584348e-05, |
|
"loss": 0.6405, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.293136626042334e-05, |
|
"loss": 0.627, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.10070558050032e-05, |
|
"loss": 0.6369, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.908274534958307e-05, |
|
"loss": 0.6339, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.715843489416291e-05, |
|
"loss": 0.6353, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.523412443874278e-05, |
|
"loss": 0.6372, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.330981398332264e-05, |
|
"loss": 0.632, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.138550352790249e-05, |
|
"loss": 0.6368, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.946119307248236e-05, |
|
"loss": 0.6263, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.753688261706221e-05, |
|
"loss": 0.6305, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.5612572161642076e-05, |
|
"loss": 0.6441, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.368826170622193e-05, |
|
"loss": 0.6382, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5.176395125080179e-05, |
|
"loss": 0.6389, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.983964079538165e-05, |
|
"loss": 0.6277, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 0.6312016844749451, |
|
"eval_runtime": 25.972, |
|
"eval_samples_per_second": 77.006, |
|
"eval_steps_per_second": 2.426, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.7915330339961505e-05, |
|
"loss": 0.6277, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.5991019884541375e-05, |
|
"loss": 0.6338, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.406670942912123e-05, |
|
"loss": 0.6328, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.2142398973701086e-05, |
|
"loss": 0.6348, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.021808851828095e-05, |
|
"loss": 0.6229, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.8293778062860804e-05, |
|
"loss": 0.6326, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.636946760744066e-05, |
|
"loss": 0.6413, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.444515715202052e-05, |
|
"loss": 0.633, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.2520846696600385e-05, |
|
"loss": 0.6201, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.059653624118024e-05, |
|
"loss": 0.6253, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8672225785760103e-05, |
|
"loss": 0.6329, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.6747915330339955e-05, |
|
"loss": 0.6405, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.4823604874919818e-05, |
|
"loss": 0.634, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.2899294419499677e-05, |
|
"loss": 0.6355, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.0974983964079536e-05, |
|
"loss": 0.6302, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.9050673508659395e-05, |
|
"loss": 0.6372, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.7126363053239254e-05, |
|
"loss": 0.6224, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5202052597819113e-05, |
|
"loss": 0.6335, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3277742142398974e-05, |
|
"loss": 0.6429, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1353431686978832e-05, |
|
"loss": 0.6368, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 0.6304358839988708, |
|
"eval_runtime": 25.8881, |
|
"eval_samples_per_second": 77.255, |
|
"eval_steps_per_second": 2.434, |
|
"step": 1600 |
|
} |
|
], |
|
"max_steps": 1659, |
|
"num_train_epochs": 1, |
|
"total_flos": 5.199220197147279e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|