|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.14619883040935672, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2695, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-05, |
|
"loss": 1.4688, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6e-05, |
|
"loss": 1.2927, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8e-05, |
|
"loss": 1.2911, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1655, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00012, |
|
"loss": 1.3511, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00014, |
|
"loss": 1.2358, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016, |
|
"loss": 1.287, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018, |
|
"loss": 1.1919, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3329, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019999993359236733, |
|
"loss": 1.0765, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019999973436955748, |
|
"loss": 1.2112, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019999940233183508, |
|
"loss": 1.2021, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001999989374796411, |
|
"loss": 1.306, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019999833981359296, |
|
"loss": 1.2094, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019999760933448442, |
|
"loss": 1.3033, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019999674604328566, |
|
"loss": 1.2345, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019999574994114335, |
|
"loss": 1.1763, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019999462102938037, |
|
"loss": 1.2333, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019999335930949612, |
|
"loss": 1.213, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 1.181932806968689, |
|
"eval_runtime": 76.4827, |
|
"eval_samples_per_second": 0.732, |
|
"eval_steps_per_second": 0.183, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019999196478316637, |
|
"loss": 1.1985, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019999043745224323, |
|
"loss": 1.2438, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019998877731875524, |
|
"loss": 1.0714, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019998698438490736, |
|
"loss": 1.2336, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019998505865308084, |
|
"loss": 1.1357, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019998300012583333, |
|
"loss": 1.1176, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001999808088058989, |
|
"loss": 1.2537, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001999784846961879, |
|
"loss": 1.2098, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001999760277997872, |
|
"loss": 1.3226, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019997343811995984, |
|
"loss": 1.271, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019997071566014535, |
|
"loss": 1.1423, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001999678604239596, |
|
"loss": 1.1422, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019996487241519473, |
|
"loss": 1.2029, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001999617516378193, |
|
"loss": 1.1305, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019995849809597814, |
|
"loss": 1.3264, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001999551117939925, |
|
"loss": 1.2266, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001999515927363599, |
|
"loss": 1.1214, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019994794092775418, |
|
"loss": 1.2081, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019994415637302547, |
|
"loss": 1.2039, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019994023907720027, |
|
"loss": 1.2026, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.152363896369934, |
|
"eval_runtime": 76.6763, |
|
"eval_samples_per_second": 0.73, |
|
"eval_steps_per_second": 0.183, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019993618904548131, |
|
"loss": 1.2681, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001999320062832477, |
|
"loss": 1.0463, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019992769079605477, |
|
"loss": 1.2364, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019992324258963413, |
|
"loss": 1.1188, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019991866166989367, |
|
"loss": 1.2932, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019991394804291758, |
|
"loss": 1.1784, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019990910171496627, |
|
"loss": 1.2541, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001999041226924764, |
|
"loss": 1.1132, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019989901098206082, |
|
"loss": 1.1555, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019989376659050877, |
|
"loss": 1.3394, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001998883895247855, |
|
"loss": 1.2951, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019988287979203265, |
|
"loss": 1.2399, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001998772373995679, |
|
"loss": 1.1664, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001998714623548853, |
|
"loss": 1.117, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019986555466565493, |
|
"loss": 1.1931, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019985951433972314, |
|
"loss": 1.2058, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019985334138511237, |
|
"loss": 1.1864, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001998470358100213, |
|
"loss": 1.2206, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019984059762282467, |
|
"loss": 1.0549, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019983402683207332, |
|
"loss": 1.0102, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.1410892009735107, |
|
"eval_runtime": 76.6475, |
|
"eval_samples_per_second": 0.731, |
|
"eval_steps_per_second": 0.183, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019982732344649433, |
|
"loss": 1.0848, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019982048747499081, |
|
"loss": 1.3566, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019981351892664194, |
|
"loss": 1.2252, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019980641781070307, |
|
"loss": 1.2283, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019979918413660553, |
|
"loss": 1.1728, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019979181791395672, |
|
"loss": 1.0926, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019978431915254017, |
|
"loss": 1.1868, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019977668786231534, |
|
"loss": 1.0868, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019976892405341773, |
|
"loss": 1.1116, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019976102773615892, |
|
"loss": 1.1881, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019975299892102636, |
|
"loss": 1.1795, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019974483761868358, |
|
"loss": 1.1028, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019973654383997007, |
|
"loss": 0.9675, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019972811759590118, |
|
"loss": 1.2491, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019971955889766825, |
|
"loss": 1.1393, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019971086775663857, |
|
"loss": 1.1288, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019970204418435526, |
|
"loss": 1.1794, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001996930881925374, |
|
"loss": 0.9819, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001996839997930799, |
|
"loss": 1.1291, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001996747789980536, |
|
"loss": 1.2502, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.1336756944656372, |
|
"eval_runtime": 76.5931, |
|
"eval_samples_per_second": 0.731, |
|
"eval_steps_per_second": 0.183, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000199665425819705, |
|
"loss": 1.0629, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019965594027045665, |
|
"loss": 1.0234, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019964632236290681, |
|
"loss": 1.1065, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019963657210982948, |
|
"loss": 1.1702, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001996266895241745, |
|
"loss": 1.2096, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019961667461906743, |
|
"loss": 1.232, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019960652740780966, |
|
"loss": 1.0296, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001995962479038782, |
|
"loss": 1.1686, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019958583612092576, |
|
"loss": 1.1196, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019957529207278082, |
|
"loss": 1.3479, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001995646157734475, |
|
"loss": 1.1783, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001995538072371055, |
|
"loss": 1.2225, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019954286647811027, |
|
"loss": 1.0859, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019953179351099275, |
|
"loss": 1.0763, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019952058835045957, |
|
"loss": 1.2688, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001995092510113929, |
|
"loss": 1.1738, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019949778150885042, |
|
"loss": 1.1326, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001994861798580654, |
|
"loss": 1.1722, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001994744460744466, |
|
"loss": 1.2148, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019946258017357828, |
|
"loss": 1.2499, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.1291786432266235, |
|
"eval_runtime": 76.5934, |
|
"eval_samples_per_second": 0.731, |
|
"eval_steps_per_second": 0.183, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019945058217122016, |
|
"loss": 1.1765, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019943845208330742, |
|
"loss": 1.2101, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001994261899259507, |
|
"loss": 1.1028, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019941379571543596, |
|
"loss": 1.2198, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019940126946822465, |
|
"loss": 1.2474, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019938861120095353, |
|
"loss": 1.1917, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001993758209304347, |
|
"loss": 1.2239, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019936289867365556, |
|
"loss": 1.2406, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001993498444477789, |
|
"loss": 1.2051, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019933665827014273, |
|
"loss": 1.146, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019932334015826023, |
|
"loss": 1.0871, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019930989012981992, |
|
"loss": 1.1184, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019929630820268552, |
|
"loss": 1.0224, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019928259439489589, |
|
"loss": 1.0958, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001992687487246651, |
|
"loss": 1.1811, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019925477121038218, |
|
"loss": 1.3126, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019924066187061156, |
|
"loss": 1.0572, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001992264207240925, |
|
"loss": 1.0597, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019921204778973944, |
|
"loss": 1.1559, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019919754308664187, |
|
"loss": 1.2146, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.1247767210006714, |
|
"eval_runtime": 80.1123, |
|
"eval_samples_per_second": 0.699, |
|
"eval_steps_per_second": 0.175, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001991829066340642, |
|
"loss": 1.2127, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019916813845144587, |
|
"loss": 1.1637, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019915323855840131, |
|
"loss": 1.2383, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019913820697471985, |
|
"loss": 1.0641, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019912304372036573, |
|
"loss": 1.115, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000199107748815478, |
|
"loss": 1.0733, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019909232228037076, |
|
"loss": 1.2392, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019907676413553267, |
|
"loss": 1.3308, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019906107440162743, |
|
"loss": 1.1723, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019904525309949334, |
|
"loss": 1.1327, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019902930025014347, |
|
"loss": 1.1309, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019901321587476574, |
|
"loss": 1.1563, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019899699999472258, |
|
"loss": 1.1824, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001989806526315512, |
|
"loss": 1.2014, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019896417380696333, |
|
"loss": 1.182, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001989475635428454, |
|
"loss": 1.1519, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001989308218612584, |
|
"loss": 1.2197, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019891394878443784, |
|
"loss": 1.01, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001988969443347937, |
|
"loss": 1.2011, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001988798085349105, |
|
"loss": 1.1338, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.120542287826538, |
|
"eval_runtime": 79.9836, |
|
"eval_samples_per_second": 0.7, |
|
"eval_steps_per_second": 0.175, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019886254140754722, |
|
"loss": 1.0325, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001988451429756372, |
|
"loss": 1.2331, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019882761326228825, |
|
"loss": 1.1751, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001988099522907825, |
|
"loss": 1.2066, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019879216008457642, |
|
"loss": 1.1406, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019877423666730075, |
|
"loss": 1.2226, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019875618206276053, |
|
"loss": 1.1612, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019873799629493508, |
|
"loss": 1.203, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001987196793879778, |
|
"loss": 1.1134, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019870123136621638, |
|
"loss": 1.164, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019868265225415265, |
|
"loss": 0.9973, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001986639420764624, |
|
"loss": 1.0642, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019864510085799568, |
|
"loss": 1.1626, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019862612862377648, |
|
"loss": 1.1529, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019860702539900287, |
|
"loss": 1.2552, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019858779120904678, |
|
"loss": 1.1867, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019856842607945418, |
|
"loss": 1.2168, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019854893003594493, |
|
"loss": 1.1797, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019852930310441274, |
|
"loss": 1.2606, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019850954531092517, |
|
"loss": 1.2599, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.1176784038543701, |
|
"eval_runtime": 79.9641, |
|
"eval_samples_per_second": 0.7, |
|
"eval_steps_per_second": 0.175, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019848965668172356, |
|
"loss": 0.9995, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001984696372432231, |
|
"loss": 1.2258, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019844948702201265, |
|
"loss": 1.085, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019842920604485473, |
|
"loss": 1.125, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001984087943386856, |
|
"loss": 1.1549, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019838825193061518, |
|
"loss": 1.1749, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019836757884792683, |
|
"loss": 1.1702, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001983467751180776, |
|
"loss": 1.2037, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019832584076869805, |
|
"loss": 1.0019, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019830477582759212, |
|
"loss": 1.1183, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019828358032273735, |
|
"loss": 1.0197, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019826225428228457, |
|
"loss": 1.107, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000198240797734558, |
|
"loss": 1.1855, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001982192107080552, |
|
"loss": 1.2044, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019819749323144709, |
|
"loss": 1.1586, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019817564533357773, |
|
"loss": 1.1092, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019815366704346453, |
|
"loss": 1.2592, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019813155839029797, |
|
"loss": 1.1808, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019810931940344176, |
|
"loss": 1.1179, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001980869501124326, |
|
"loss": 1.1333, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.1160587072372437, |
|
"eval_runtime": 79.8665, |
|
"eval_samples_per_second": 0.701, |
|
"eval_steps_per_second": 0.175, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019806445054698039, |
|
"loss": 1.0633, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019804182073696793, |
|
"loss": 1.1252, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019801906071245111, |
|
"loss": 1.1522, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001979961705036587, |
|
"loss": 1.1706, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019797315014099238, |
|
"loss": 1.1142, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019794999965502672, |
|
"loss": 1.0345, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001979267190765091, |
|
"loss": 1.1029, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019790330843635966, |
|
"loss": 1.1628, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019787976776567133, |
|
"loss": 1.1569, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019785609709570973, |
|
"loss": 1.2024, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019783229645791307, |
|
"loss": 1.1602, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019780836588389225, |
|
"loss": 1.1588, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019778430540543077, |
|
"loss": 1.1104, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019776011505448455, |
|
"loss": 1.1437, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019773579486318213, |
|
"loss": 1.2098, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019771134486382436, |
|
"loss": 1.125, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019768676508888467, |
|
"loss": 1.0576, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019766205557100868, |
|
"loss": 1.1982, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019763721634301443, |
|
"loss": 1.1395, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001976122474378922, |
|
"loss": 1.2491, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.1136831045150757, |
|
"eval_runtime": 79.9646, |
|
"eval_samples_per_second": 0.7, |
|
"eval_steps_per_second": 0.175, |
|
"step": 200 |
|
} |
|
], |
|
"max_steps": 2736, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.588426799910912e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|