|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.999996159001955, |
|
"eval_steps": 1000, |
|
"global_step": 130174, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002488214003044242, |
|
"loss": 2.325, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002943479979561561, |
|
"loss": 2.2608, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002998153476849216, |
|
"loss": 2.2442, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00029958453229107356, |
|
"loss": 2.2311, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002993537168972256, |
|
"loss": 2.2236, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00029912290150337756, |
|
"loss": 2.2163, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002988920861095296, |
|
"loss": 2.2148, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029866127071568157, |
|
"loss": 2.2093, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002984304553218336, |
|
"loss": 2.2074, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029819963992798557, |
|
"loss": 2.2009, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.5228380295891784, |
|
"eval_loss": 2.199307918548584, |
|
"eval_runtime": 43.726, |
|
"eval_samples_per_second": 393.793, |
|
"eval_steps_per_second": 2.356, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002979688245341376, |
|
"loss": 2.2039, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002977380091402896, |
|
"loss": 2.1986, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002975095019003801, |
|
"loss": 2.203, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029727868650653206, |
|
"loss": 2.197, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029704787111268404, |
|
"loss": 2.1994, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029681705571883607, |
|
"loss": 2.1945, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029658624032498804, |
|
"loss": 2.1944, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029635542493114007, |
|
"loss": 2.19, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029612460953729205, |
|
"loss": 2.1917, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002958937941434441, |
|
"loss": 2.1902, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.5246369945669794, |
|
"eval_loss": 2.184251546859741, |
|
"eval_runtime": 40.2801, |
|
"eval_samples_per_second": 427.481, |
|
"eval_steps_per_second": 2.557, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029566297874959605, |
|
"loss": 2.1869, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002954321633557481, |
|
"loss": 2.1867, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029520134796190005, |
|
"loss": 2.1845, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002949705325680521, |
|
"loss": 2.1917, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029473971717420405, |
|
"loss": 2.1828, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029450890178035603, |
|
"loss": 2.1825, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029427808638650806, |
|
"loss": 2.1819, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029404727099266003, |
|
"loss": 2.1834, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029381645559881206, |
|
"loss": 2.179, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029358564020496404, |
|
"loss": 2.179, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.5261414670573112, |
|
"eval_loss": 2.176321268081665, |
|
"eval_runtime": 43.6149, |
|
"eval_samples_per_second": 394.797, |
|
"eval_steps_per_second": 2.362, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029335482481111606, |
|
"loss": 2.1805, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029312400941726804, |
|
"loss": 2.1816, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002928955021773585, |
|
"loss": 2.1823, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029266468678351053, |
|
"loss": 2.1809, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002924338713896625, |
|
"loss": 2.1789, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002922030559958145, |
|
"loss": 2.1788, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002919722406019665, |
|
"loss": 2.176, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002917414252081185, |
|
"loss": 2.1759, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002915106098142705, |
|
"loss": 2.1757, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002912797944204225, |
|
"loss": 2.1773, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.5269956781855605, |
|
"eval_loss": 2.1722235679626465, |
|
"eval_runtime": 43.4233, |
|
"eval_samples_per_second": 396.539, |
|
"eval_steps_per_second": 2.372, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002910489790265745, |
|
"loss": 2.1753, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002908181636327265, |
|
"loss": 2.1774, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002905873482388785, |
|
"loss": 2.1685, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002903565328450305, |
|
"loss": 2.172, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000290128025605121, |
|
"loss": 2.173, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000289897210211273, |
|
"loss": 2.1751, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000289666394817425, |
|
"loss": 2.1715, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000289435579423577, |
|
"loss": 2.1724, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000289204764029729, |
|
"loss": 2.1729, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000288973948635881, |
|
"loss": 2.1708, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.5273898591237568, |
|
"eval_loss": 2.167875289916992, |
|
"eval_runtime": 46.3712, |
|
"eval_samples_per_second": 371.33, |
|
"eval_steps_per_second": 2.221, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000288743133242033, |
|
"loss": 2.1694, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000288512317848185, |
|
"loss": 2.1714, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000288281502454337, |
|
"loss": 2.1747, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000288050687060489, |
|
"loss": 2.1678, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000287819871666641, |
|
"loss": 2.1699, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000287589056272793, |
|
"loss": 2.166, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000287358240878945, |
|
"loss": 2.1681, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000287127425485097, |
|
"loss": 2.1693, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000286896610091249, |
|
"loss": 2.1691, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000286665794697401, |
|
"loss": 2.1695, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.5279203114303783, |
|
"eval_loss": 2.164818286895752, |
|
"eval_runtime": 47.7953, |
|
"eval_samples_per_second": 360.265, |
|
"eval_steps_per_second": 2.155, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000286434979303553, |
|
"loss": 2.1693, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000286204163909705, |
|
"loss": 2.1685, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00028597565666979547, |
|
"loss": 2.1631, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002857448412759475, |
|
"loss": 2.1669, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002855140258820995, |
|
"loss": 2.1672, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002852832104882515, |
|
"loss": 2.1689, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002850523950944035, |
|
"loss": 2.1647, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002848215797005555, |
|
"loss": 2.1669, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002845907643067075, |
|
"loss": 2.1705, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00028435994891285945, |
|
"loss": 2.1662, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.5283743968963053, |
|
"eval_loss": 2.1637933254241943, |
|
"eval_runtime": 41.85, |
|
"eval_samples_per_second": 411.446, |
|
"eval_steps_per_second": 2.461, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002841291335190115, |
|
"loss": 2.1611, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00028389831812516346, |
|
"loss": 2.1607, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002836675027313155, |
|
"loss": 2.1703, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00028343899549140595, |
|
"loss": 2.1667, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002832081800975579, |
|
"loss": 2.1602, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00028297736470370995, |
|
"loss": 2.1661, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002827465493098619, |
|
"loss": 2.1574, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00028251573391601396, |
|
"loss": 2.1655, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00028228491852216593, |
|
"loss": 2.1596, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00028205410312831796, |
|
"loss": 2.1635, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.5285070099729606, |
|
"eval_loss": 2.1613388061523438, |
|
"eval_runtime": 42.5073, |
|
"eval_samples_per_second": 405.083, |
|
"eval_steps_per_second": 2.423, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00028182328773446993, |
|
"loss": 2.1609, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002815924723406219, |
|
"loss": 2.1637, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00028136165694677394, |
|
"loss": 2.1623, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002811308415529259, |
|
"loss": 2.1642, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00028090002615907794, |
|
"loss": 2.1637, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002806692107652299, |
|
"loss": 2.164, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00028044070352532043, |
|
"loss": 2.1622, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002802121962854109, |
|
"loss": 2.1666, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002799813808915629, |
|
"loss": 2.1597, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002797505654977149, |
|
"loss": 2.1668, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.5288979899058585, |
|
"eval_loss": 2.159482479095459, |
|
"eval_runtime": 43.0577, |
|
"eval_samples_per_second": 399.905, |
|
"eval_steps_per_second": 2.392, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002795197501038669, |
|
"loss": 2.1606, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002792889347100189, |
|
"loss": 2.1645, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002790581193161709, |
|
"loss": 2.1602, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002788273039223229, |
|
"loss": 2.1624, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00027859648852847493, |
|
"loss": 2.1594, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002783656731346269, |
|
"loss": 2.1601, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00027813485774077893, |
|
"loss": 2.1597, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002779040423469309, |
|
"loss": 2.1639, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002776732269530829, |
|
"loss": 2.16, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002774424115592349, |
|
"loss": 2.1597, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.5293557336635552, |
|
"eval_loss": 2.1580162048339844, |
|
"eval_runtime": 43.1367, |
|
"eval_samples_per_second": 399.173, |
|
"eval_steps_per_second": 2.388, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002772115961653869, |
|
"loss": 2.1599, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002769807807715389, |
|
"loss": 2.161, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002767499653776909, |
|
"loss": 2.1619, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002765191499838429, |
|
"loss": 2.1631, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002762883345899949, |
|
"loss": 2.1606, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002760598273500854, |
|
"loss": 2.1548, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002758290119562374, |
|
"loss": 2.1598, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00027559819656238936, |
|
"loss": 2.1633, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002753673811685414, |
|
"loss": 2.1612, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00027513656577469336, |
|
"loss": 2.1593, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.5293914020083107, |
|
"eval_loss": 2.1572113037109375, |
|
"eval_runtime": 42.0785, |
|
"eval_samples_per_second": 409.211, |
|
"eval_steps_per_second": 2.448, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002749057503808454, |
|
"loss": 2.1588, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027467493498699736, |
|
"loss": 2.1571, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002744441195931494, |
|
"loss": 2.1563, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027421330419930137, |
|
"loss": 2.1575, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002739824888054534, |
|
"loss": 2.1552, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027375167341160537, |
|
"loss": 2.1645, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027352085801775735, |
|
"loss": 2.1566, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002732900426239094, |
|
"loss": 2.161, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027305922723006135, |
|
"loss": 2.1569, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002728284118362134, |
|
"loss": 2.1561, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.5296154723792112, |
|
"eval_loss": 2.1556365489959717, |
|
"eval_runtime": 43.0866, |
|
"eval_samples_per_second": 399.637, |
|
"eval_steps_per_second": 2.391, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027259759644236535, |
|
"loss": 2.155, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002723667810485174, |
|
"loss": 2.1577, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027213596565466935, |
|
"loss": 2.1558, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002719051502608214, |
|
"loss": 2.1571, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00027167433486697336, |
|
"loss": 2.1603, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00027144351947312533, |
|
"loss": 2.1558, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00027121270407927736, |
|
"loss": 2.1539, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00027098188868542934, |
|
"loss": 2.156, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00027075107329158136, |
|
"loss": 2.1619, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00027052025789773334, |
|
"loss": 2.1525, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.529815763853608, |
|
"eval_loss": 2.1542844772338867, |
|
"eval_runtime": 42.2302, |
|
"eval_samples_per_second": 407.742, |
|
"eval_steps_per_second": 2.439, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00027028944250388537, |
|
"loss": 2.1572, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00027005862711003734, |
|
"loss": 2.1525, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00026982781171618937, |
|
"loss": 2.1523, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00026959699632234135, |
|
"loss": 2.1548, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002693661809284934, |
|
"loss": 2.1551, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00026913536553464535, |
|
"loss": 2.1583, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002689045501407974, |
|
"loss": 2.1535, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026867604290088784, |
|
"loss": 2.1592, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026844753566097836, |
|
"loss": 2.1605, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026821672026713033, |
|
"loss": 2.1557, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.52968589449578, |
|
"eval_loss": 2.153402328491211, |
|
"eval_runtime": 42.2489, |
|
"eval_samples_per_second": 407.561, |
|
"eval_steps_per_second": 2.438, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002679859048732823, |
|
"loss": 2.1564, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026775508947943433, |
|
"loss": 2.1512, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002675242740855863, |
|
"loss": 2.1593, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026729345869173834, |
|
"loss": 2.1539, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002670626432978903, |
|
"loss": 2.1499, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026683182790404234, |
|
"loss": 2.1529, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002666010125101943, |
|
"loss": 2.152, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026637250527028483, |
|
"loss": 2.1512, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002661416898764368, |
|
"loss": 2.1537, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00026591087448258883, |
|
"loss": 2.1547, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.5298546332036621, |
|
"eval_loss": 2.1525609493255615, |
|
"eval_runtime": 45.4654, |
|
"eval_samples_per_second": 378.728, |
|
"eval_steps_per_second": 2.265, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002656800590887408, |
|
"loss": 2.155, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00026544924369489284, |
|
"loss": 2.1544, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002652184283010448, |
|
"loss": 2.1524, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002649876129071968, |
|
"loss": 2.1573, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002647567975133488, |
|
"loss": 2.1569, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002645259821195008, |
|
"loss": 2.1529, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002642951667256528, |
|
"loss": 2.1561, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002640643513318048, |
|
"loss": 2.1526, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002638335359379568, |
|
"loss": 2.1572, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002636027205441088, |
|
"loss": 2.1544, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.5303366131443338, |
|
"eval_loss": 2.151641368865967, |
|
"eval_runtime": 42.591, |
|
"eval_samples_per_second": 404.287, |
|
"eval_steps_per_second": 2.418, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002633719051502608, |
|
"loss": 2.1538, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002631410897564128, |
|
"loss": 2.156, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002629102743625648, |
|
"loss": 2.1495, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026268176712265524, |
|
"loss": 2.154, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026245095172880726, |
|
"loss": 2.1536, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026222013633495924, |
|
"loss": 2.1495, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026198932094111127, |
|
"loss": 2.1567, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026175850554726324, |
|
"loss": 2.1533, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026152769015341527, |
|
"loss": 2.1545, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026129687475956725, |
|
"loss": 2.1562, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.5304024623961903, |
|
"eval_loss": 2.151214599609375, |
|
"eval_runtime": 42.6646, |
|
"eval_samples_per_second": 403.59, |
|
"eval_steps_per_second": 2.414, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002610660593657193, |
|
"loss": 2.1521, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026083524397187125, |
|
"loss": 2.1575, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002606044285780233, |
|
"loss": 2.1486, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026037361318417525, |
|
"loss": 2.1513, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002601427977903273, |
|
"loss": 2.1574, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025991429055041774, |
|
"loss": 2.1556, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025968347515656977, |
|
"loss": 2.1561, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025945265976272175, |
|
"loss": 2.1511, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002592218443688738, |
|
"loss": 2.1519, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025899102897502575, |
|
"loss": 2.1515, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.5303078040966466, |
|
"eval_loss": 2.150639057159424, |
|
"eval_runtime": 42.0098, |
|
"eval_samples_per_second": 409.88, |
|
"eval_steps_per_second": 2.452, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002587602135811778, |
|
"loss": 2.1539, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025852939818732975, |
|
"loss": 2.1509, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002582985827934817, |
|
"loss": 2.1533, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025806776739963376, |
|
"loss": 2.1507, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025783695200578573, |
|
"loss": 2.1548, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025760613661193776, |
|
"loss": 2.1545, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025737532121808973, |
|
"loss": 2.1515, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025714450582424176, |
|
"loss": 2.1553, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025691369043039374, |
|
"loss": 2.1475, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025668287503654577, |
|
"loss": 2.1516, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.5307376533795986, |
|
"eval_loss": 2.1488122940063477, |
|
"eval_runtime": 41.9221, |
|
"eval_samples_per_second": 410.738, |
|
"eval_steps_per_second": 2.457, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025645205964269774, |
|
"loss": 2.1494, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025622124424884977, |
|
"loss": 2.1481, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025599042885500174, |
|
"loss": 2.1473, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025575961346115377, |
|
"loss": 2.1558, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025552879806730575, |
|
"loss": 2.1495, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025530029082739626, |
|
"loss": 2.1554, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025506947543354824, |
|
"loss": 2.1501, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002548386600397002, |
|
"loss": 2.1493, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025460784464585224, |
|
"loss": 2.1515, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002543770292520042, |
|
"loss": 2.1519, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.530503522706331, |
|
"eval_loss": 2.149341106414795, |
|
"eval_runtime": 42.1534, |
|
"eval_samples_per_second": 408.484, |
|
"eval_steps_per_second": 2.443, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025414621385815624, |
|
"loss": 2.1495, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002539153984643082, |
|
"loss": 2.1459, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00025368458307046025, |
|
"loss": 2.1492, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002534560758305507, |
|
"loss": 2.1529, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002532252604367027, |
|
"loss": 2.1504, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002529944450428547, |
|
"loss": 2.1452, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002527636296490067, |
|
"loss": 2.1553, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002525328142551587, |
|
"loss": 2.1509, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002523019988613107, |
|
"loss": 2.1521, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00025207118346746266, |
|
"loss": 2.1506, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.5311107991401185, |
|
"eval_loss": 2.1474409103393555, |
|
"eval_runtime": 40.4996, |
|
"eval_samples_per_second": 425.164, |
|
"eval_steps_per_second": 2.543, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002518403680736147, |
|
"loss": 2.1493, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00025160955267976667, |
|
"loss": 2.1486, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002513787372859187, |
|
"loss": 2.1476, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00025114792189207067, |
|
"loss": 2.1473, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002509171064982227, |
|
"loss": 2.1487, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002506862911043747, |
|
"loss": 2.145, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002504554757105267, |
|
"loss": 2.1484, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002502246603166787, |
|
"loss": 2.1514, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002499938449228307, |
|
"loss": 2.1475, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002497630295289827, |
|
"loss": 2.1484, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.5309598946046141, |
|
"eval_loss": 2.1483261585235596, |
|
"eval_runtime": 41.8845, |
|
"eval_samples_per_second": 411.107, |
|
"eval_steps_per_second": 2.459, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002495322141351347, |
|
"loss": 2.1488, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002493013987412867, |
|
"loss": 2.1457, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002490705833474387, |
|
"loss": 2.1475, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002488397679535907, |
|
"loss": 2.1505, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00024861126071368115, |
|
"loss": 2.1455, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002483804453198332, |
|
"loss": 2.1496, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00024814962992598515, |
|
"loss": 2.1494, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002479188145321372, |
|
"loss": 2.1471, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00024768799913828916, |
|
"loss": 2.1492, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002474571837444412, |
|
"loss": 2.1533, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.5312488996544286, |
|
"eval_loss": 2.1471364498138428, |
|
"eval_runtime": 40.9689, |
|
"eval_samples_per_second": 420.294, |
|
"eval_steps_per_second": 2.514, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00024722636835059316, |
|
"loss": 2.1508, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002469955529567452, |
|
"loss": 2.1488, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00024676473756289716, |
|
"loss": 2.1499, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002465339221690492, |
|
"loss": 2.1461, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00024630541492913965, |
|
"loss": 2.1491, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002460745995352917, |
|
"loss": 2.1464, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00024584378414144366, |
|
"loss": 2.1488, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002456129687475957, |
|
"loss": 2.1459, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00024538215335374766, |
|
"loss": 2.1474, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002451513379598997, |
|
"loss": 2.1471, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.5309704121934523, |
|
"eval_loss": 2.146967649459839, |
|
"eval_runtime": 42.4024, |
|
"eval_samples_per_second": 406.086, |
|
"eval_steps_per_second": 2.429, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024492052256605166, |
|
"loss": 2.1547, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002446897071722037, |
|
"loss": 2.1432, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024445889177835567, |
|
"loss": 2.1423, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024422807638450764, |
|
"loss": 2.1483, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024399726099065964, |
|
"loss": 2.1464, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024376644559681164, |
|
"loss": 2.1498, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024353793835690213, |
|
"loss": 2.1444, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002433071229630541, |
|
"loss": 2.1435, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024307630756920614, |
|
"loss": 2.1454, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002428478003292966, |
|
"loss": 2.1421, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.5313138343333427, |
|
"eval_loss": 2.1454176902770996, |
|
"eval_runtime": 41.7457, |
|
"eval_samples_per_second": 412.474, |
|
"eval_steps_per_second": 2.467, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002426169849354486, |
|
"loss": 2.1441, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002423861695416006, |
|
"loss": 2.1467, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002421553541477526, |
|
"loss": 2.1464, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002419245387539046, |
|
"loss": 2.1459, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002416960315139951, |
|
"loss": 2.1492, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00024146521612014707, |
|
"loss": 2.148, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002412344007262991, |
|
"loss": 2.1419, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00024100358533245107, |
|
"loss": 2.1466, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002407727699386031, |
|
"loss": 2.1467, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00024054195454475507, |
|
"loss": 2.1452, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.5314327288158612, |
|
"eval_loss": 2.1451821327209473, |
|
"eval_runtime": 42.3, |
|
"eval_samples_per_second": 407.068, |
|
"eval_steps_per_second": 2.435, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002403111391509071, |
|
"loss": 2.1489, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00024008032375705908, |
|
"loss": 2.1481, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002398495083632111, |
|
"loss": 2.1454, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00023961869296936308, |
|
"loss": 2.1472, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002393878775755151, |
|
"loss": 2.1397, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00023915706218166708, |
|
"loss": 2.1485, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023892624678781909, |
|
"loss": 2.1406, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002386954313939711, |
|
"loss": 2.1466, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023846461600012306, |
|
"loss": 2.1505, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002382338006062751, |
|
"loss": 2.1481, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.5316910956721036, |
|
"eval_loss": 2.1437814235687256, |
|
"eval_runtime": 43.5176, |
|
"eval_samples_per_second": 395.679, |
|
"eval_steps_per_second": 2.367, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023800298521242706, |
|
"loss": 2.1456, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002377721698185791, |
|
"loss": 2.144, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023754135442473107, |
|
"loss": 2.1441, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002373105390308831, |
|
"loss": 2.1467, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023707972363703507, |
|
"loss": 2.1481, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023684890824318707, |
|
"loss": 2.1481, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023661809284933907, |
|
"loss": 2.149, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023638958560942956, |
|
"loss": 2.1456, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023615877021558154, |
|
"loss": 2.1489, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023592795482173357, |
|
"loss": 2.149, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.5316824072291504, |
|
"eval_loss": 2.1441478729248047, |
|
"eval_runtime": 42.9654, |
|
"eval_samples_per_second": 400.765, |
|
"eval_steps_per_second": 2.397, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023569713942788554, |
|
"loss": 2.1434, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023546632403403757, |
|
"loss": 2.147, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023523550864018954, |
|
"loss": 2.1402, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023500469324634157, |
|
"loss": 2.1398, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023477387785249355, |
|
"loss": 2.1469, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023454306245864558, |
|
"loss": 2.1496, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023431224706479755, |
|
"loss": 2.1482, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023408143167094953, |
|
"loss": 2.1462, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023385061627710155, |
|
"loss": 2.1455, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023361980088325353, |
|
"loss": 2.1483, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.5314555931394225, |
|
"eval_loss": 2.1434664726257324, |
|
"eval_runtime": 42.2842, |
|
"eval_samples_per_second": 407.221, |
|
"eval_steps_per_second": 2.436, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023338898548940556, |
|
"loss": 2.1464, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023315817009555753, |
|
"loss": 2.1391, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023292735470170956, |
|
"loss": 2.1457, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023269653930786154, |
|
"loss": 2.142, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023246572391401356, |
|
"loss": 2.143, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023223721667410403, |
|
"loss": 2.1382, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023200640128025603, |
|
"loss": 2.1425, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023177558588640803, |
|
"loss": 2.1487, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023154477049256003, |
|
"loss": 2.1464, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023131395509871203, |
|
"loss": 2.1453, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.5318639499582268, |
|
"eval_loss": 2.142831325531006, |
|
"eval_runtime": 40.8449, |
|
"eval_samples_per_second": 421.57, |
|
"eval_steps_per_second": 2.522, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023108313970486403, |
|
"loss": 2.1419, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023085232431101604, |
|
"loss": 2.1424, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023062150891716804, |
|
"loss": 2.1444, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002303930016772585, |
|
"loss": 2.1451, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023016218628341053, |
|
"loss": 2.1441, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002299313708895625, |
|
"loss": 2.1408, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022970055549571453, |
|
"loss": 2.1481, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002294697401018665, |
|
"loss": 2.1408, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022923892470801853, |
|
"loss": 2.1444, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002290081093141705, |
|
"loss": 2.1442, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.5319791861489757, |
|
"eval_loss": 2.1425275802612305, |
|
"eval_runtime": 44.6867, |
|
"eval_samples_per_second": 385.328, |
|
"eval_steps_per_second": 2.305, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022877729392032248, |
|
"loss": 2.1445, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002285464785264745, |
|
"loss": 2.1433, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002283156631326265, |
|
"loss": 2.1411, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022808484773877852, |
|
"loss": 2.142, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002278540323449305, |
|
"loss": 2.1472, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022762321695108252, |
|
"loss": 2.1456, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002273924015572345, |
|
"loss": 2.1443, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022716158616338652, |
|
"loss": 2.1401, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002269307707695385, |
|
"loss": 2.145, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022669995537569052, |
|
"loss": 2.1411, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.5321625580239371, |
|
"eval_loss": 2.1413471698760986, |
|
"eval_runtime": 43.746, |
|
"eval_samples_per_second": 393.614, |
|
"eval_steps_per_second": 2.355, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002264691399818425, |
|
"loss": 2.1454, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002262383245879945, |
|
"loss": 2.1459, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002260075091941465, |
|
"loss": 2.1432, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002257766938002985, |
|
"loss": 2.1442, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022554818656038897, |
|
"loss": 2.1422, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000225317371166541, |
|
"loss": 2.1449, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022508655577269297, |
|
"loss": 2.1439, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000224855740378845, |
|
"loss": 2.1394, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022462492498499697, |
|
"loss": 2.1431, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022439410959114897, |
|
"loss": 2.1418, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.5322407540105166, |
|
"eval_loss": 2.1408767700195312, |
|
"eval_runtime": 42.7654, |
|
"eval_samples_per_second": 402.639, |
|
"eval_steps_per_second": 2.408, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022416329419730098, |
|
"loss": 2.1467, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022393247880345295, |
|
"loss": 2.1413, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022370166340960498, |
|
"loss": 2.1456, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022347084801575695, |
|
"loss": 2.1438, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022324003262190898, |
|
"loss": 2.1415, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022300921722806096, |
|
"loss": 2.144, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022278070998815147, |
|
"loss": 2.1413, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022254989459430345, |
|
"loss": 2.1403, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022231907920045548, |
|
"loss": 2.1434, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022208826380660745, |
|
"loss": 2.1394, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.5322869399441105, |
|
"eval_loss": 2.1409242153167725, |
|
"eval_runtime": 43.8368, |
|
"eval_samples_per_second": 392.798, |
|
"eval_steps_per_second": 2.35, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022185744841275945, |
|
"loss": 2.1469, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022162663301891145, |
|
"loss": 2.1438, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022139581762506346, |
|
"loss": 2.1374, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022116500223121546, |
|
"loss": 2.1442, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00022093418683736746, |
|
"loss": 2.1446, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00022070337144351946, |
|
"loss": 2.1427, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00022047255604967146, |
|
"loss": 2.1418, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00022024404880976192, |
|
"loss": 2.142, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00022001323341591395, |
|
"loss": 2.142, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00021978241802206593, |
|
"loss": 2.1415, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.5322645329070204, |
|
"eval_loss": 2.1403162479400635, |
|
"eval_runtime": 41.7315, |
|
"eval_samples_per_second": 412.614, |
|
"eval_steps_per_second": 2.468, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00021955160262821796, |
|
"loss": 2.1387, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00021932078723436993, |
|
"loss": 2.1399, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002190899718405219, |
|
"loss": 2.1348, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00021885915644667393, |
|
"loss": 2.1415, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002186283410528259, |
|
"loss": 2.1394, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00021839752565897794, |
|
"loss": 2.147, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002181667102651299, |
|
"loss": 2.1451, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00021793820302522043, |
|
"loss": 2.145, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002177073876313724, |
|
"loss": 2.144, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002174765722375244, |
|
"loss": 2.139, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.532117743949757, |
|
"eval_loss": 2.140399694442749, |
|
"eval_runtime": 43.2575, |
|
"eval_samples_per_second": 398.058, |
|
"eval_steps_per_second": 2.381, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002172457568436764, |
|
"loss": 2.1411, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002170149414498284, |
|
"loss": 2.1431, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002167841260559804, |
|
"loss": 2.1433, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002165533106621324, |
|
"loss": 2.1442, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002163224952682844, |
|
"loss": 2.1452, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00021609167987443641, |
|
"loss": 2.14, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00021586086448058842, |
|
"loss": 2.1412, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002156323572406789, |
|
"loss": 2.1395, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00021540154184683088, |
|
"loss": 2.1373, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002151707264529829, |
|
"loss": 2.1403, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.5324373871931436, |
|
"eval_loss": 2.139353036880493, |
|
"eval_runtime": 42.1409, |
|
"eval_samples_per_second": 408.605, |
|
"eval_steps_per_second": 2.444, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021493991105913488, |
|
"loss": 2.1419, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002147090956652869, |
|
"loss": 2.1414, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021447828027143889, |
|
"loss": 2.1376, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021424746487759086, |
|
"loss": 2.1412, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002140166494837429, |
|
"loss": 2.1416, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021378583408989486, |
|
"loss": 2.1368, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002135550186960469, |
|
"loss": 2.1381, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021332420330219887, |
|
"loss": 2.1383, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002130933879083509, |
|
"loss": 2.1418, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021286257251450287, |
|
"loss": 2.1382, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.53245842237082, |
|
"eval_loss": 2.139456272125244, |
|
"eval_runtime": 41.9317, |
|
"eval_samples_per_second": 410.644, |
|
"eval_steps_per_second": 2.456, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002126317571206549, |
|
"loss": 2.134, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021240094172680687, |
|
"loss": 2.1363, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002121701263329589, |
|
"loss": 2.1359, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021193931093911088, |
|
"loss": 2.1407, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021170849554526288, |
|
"loss": 2.1406, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021147768015141488, |
|
"loss": 2.1373, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021124686475756688, |
|
"loss": 2.139, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021101604936371888, |
|
"loss": 2.1396, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021078523396987088, |
|
"loss": 2.1441, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021055441857602289, |
|
"loss": 2.1375, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.5323290102994632, |
|
"eval_loss": 2.139159917831421, |
|
"eval_runtime": 43.2834, |
|
"eval_samples_per_second": 397.82, |
|
"eval_steps_per_second": 2.38, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021032591133611337, |
|
"loss": 2.1411, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021009740409620384, |
|
"loss": 2.1416, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00020986658870235587, |
|
"loss": 2.1446, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00020963577330850784, |
|
"loss": 2.1413, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00020940495791465981, |
|
"loss": 2.1361, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00020917414252081184, |
|
"loss": 2.1357, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00020894332712696382, |
|
"loss": 2.1392, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020871251173311585, |
|
"loss": 2.1366, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020848169633926782, |
|
"loss": 2.1429, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020825088094541985, |
|
"loss": 2.1403, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.5327936133542283, |
|
"eval_loss": 2.138187885284424, |
|
"eval_runtime": 41.2066, |
|
"eval_samples_per_second": 417.87, |
|
"eval_steps_per_second": 2.5, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002080223737055103, |
|
"loss": 2.141, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002077915583116623, |
|
"loss": 2.1414, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020756074291781431, |
|
"loss": 2.1391, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020732992752396632, |
|
"loss": 2.1434, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020709911213011832, |
|
"loss": 2.1401, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020686829673627032, |
|
"loss": 2.143, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002066374813424223, |
|
"loss": 2.1415, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020640666594857432, |
|
"loss": 2.1412, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002061758505547263, |
|
"loss": 2.1377, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020594503516087833, |
|
"loss": 2.1385, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.5328018445107103, |
|
"eval_loss": 2.1378109455108643, |
|
"eval_runtime": 42.5719, |
|
"eval_samples_per_second": 404.468, |
|
"eval_steps_per_second": 2.419, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002057142197670303, |
|
"loss": 2.1419, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020548340437318233, |
|
"loss": 2.1367, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002052525889793343, |
|
"loss": 2.1358, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020502177358548633, |
|
"loss": 2.1379, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002047909581916383, |
|
"loss": 2.1374, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020456014279779028, |
|
"loss": 2.14, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002043293274039423, |
|
"loss": 2.1382, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020409851201009429, |
|
"loss": 2.1387, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020386769661624631, |
|
"loss": 2.136, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002036368812223983, |
|
"loss": 2.1356, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.5327972716459981, |
|
"eval_loss": 2.1371355056762695, |
|
"eval_runtime": 42.4794, |
|
"eval_samples_per_second": 405.349, |
|
"eval_steps_per_second": 2.425, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020340606582855032, |
|
"loss": 2.1389, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002031752504347023, |
|
"loss": 2.14, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020294443504085432, |
|
"loss": 2.1359, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002027136196470063, |
|
"loss": 2.1368, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00020248280425315832, |
|
"loss": 2.1374, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00020225429701324879, |
|
"loss": 2.1372, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002020234816194008, |
|
"loss": 2.1342, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002017926662255528, |
|
"loss": 2.1389, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002015618508317048, |
|
"loss": 2.1383, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002013310354378568, |
|
"loss": 2.1388, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.5330181410115999, |
|
"eval_loss": 2.136991262435913, |
|
"eval_runtime": 40.3635, |
|
"eval_samples_per_second": 426.599, |
|
"eval_steps_per_second": 2.552, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002011002200440088, |
|
"loss": 2.133, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00020086940465016077, |
|
"loss": 2.1393, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002006385892563128, |
|
"loss": 2.1398, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00020040777386246477, |
|
"loss": 2.1419, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002001769584686168, |
|
"loss": 2.1371, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019994614307476877, |
|
"loss": 2.1375, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019971763583485924, |
|
"loss": 2.1362, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019948682044101127, |
|
"loss": 2.1397, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019925600504716324, |
|
"loss": 2.1359, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019902518965331527, |
|
"loss": 2.1347, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.5329385731656068, |
|
"eval_loss": 2.136066436767578, |
|
"eval_runtime": 42.2261, |
|
"eval_samples_per_second": 407.781, |
|
"eval_steps_per_second": 2.439, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019879437425946724, |
|
"loss": 2.1414, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019856355886561927, |
|
"loss": 2.1376, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019833274347177125, |
|
"loss": 2.1355, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019810192807792328, |
|
"loss": 2.136, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019787111268407525, |
|
"loss": 2.1367, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019764029729022728, |
|
"loss": 2.1394, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019740948189637925, |
|
"loss": 2.1352, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019718097465646974, |
|
"loss": 2.1368, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019695015926262174, |
|
"loss": 2.1358, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019671934386877375, |
|
"loss": 2.1384, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5331781912765289, |
|
"eval_loss": 2.13570237159729, |
|
"eval_runtime": 40.2734, |
|
"eval_samples_per_second": 427.553, |
|
"eval_steps_per_second": 2.558, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019648852847492572, |
|
"loss": 2.1368, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019625771308107775, |
|
"loss": 2.1374, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019602689768722972, |
|
"loss": 2.1371, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019579608229338175, |
|
"loss": 2.1371, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019556526689953373, |
|
"loss": 2.138, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019533445150568575, |
|
"loss": 2.1379, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019510363611183773, |
|
"loss": 2.1358, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019487282071798973, |
|
"loss": 2.134, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019464200532414173, |
|
"loss": 2.1383, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001944111899302937, |
|
"loss": 2.1391, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5333117189261267, |
|
"eval_loss": 2.1352105140686035, |
|
"eval_runtime": 42.248, |
|
"eval_samples_per_second": 407.57, |
|
"eval_steps_per_second": 2.438, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019418037453644574, |
|
"loss": 2.1326, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001939495591425977, |
|
"loss": 2.136, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019372105190268823, |
|
"loss": 2.1381, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001934902365088402, |
|
"loss": 2.1371, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019325942111499223, |
|
"loss": 2.13, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001930286057211442, |
|
"loss": 2.1354, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019279779032729623, |
|
"loss": 2.1341, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001925669749334482, |
|
"loss": 2.1352, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001923361595396002, |
|
"loss": 2.1392, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001921053441457522, |
|
"loss": 2.1342, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.5330414626216324, |
|
"eval_loss": 2.135636568069458, |
|
"eval_runtime": 42.0027, |
|
"eval_samples_per_second": 409.95, |
|
"eval_steps_per_second": 2.452, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001918745287519042, |
|
"loss": 2.1356, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019164602151199467, |
|
"loss": 2.1373, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001914152061181467, |
|
"loss": 2.1319, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019118439072429868, |
|
"loss": 2.1364, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001909535753304507, |
|
"loss": 2.1347, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019072275993660268, |
|
"loss": 2.1349, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001904919445427547, |
|
"loss": 2.136, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019026112914890668, |
|
"loss": 2.1328, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019003031375505866, |
|
"loss": 2.1379, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001897994983612107, |
|
"loss": 2.1319, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.5333684224485588, |
|
"eval_loss": 2.1347005367279053, |
|
"eval_runtime": 42.8346, |
|
"eval_samples_per_second": 401.988, |
|
"eval_steps_per_second": 2.405, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00018956868296736266, |
|
"loss": 2.1364, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001893378675735147, |
|
"loss": 2.1321, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00018910705217966667, |
|
"loss": 2.1344, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001888762367858187, |
|
"loss": 2.1365, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00018864542139197067, |
|
"loss": 2.1405, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001884146059981227, |
|
"loss": 2.1316, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00018818379060427467, |
|
"loss": 2.1291, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001879529752104267, |
|
"loss": 2.1364, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018772215981657867, |
|
"loss": 2.1342, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001874913444227307, |
|
"loss": 2.1305, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.5333807691832818, |
|
"eval_loss": 2.1345300674438477, |
|
"eval_runtime": 40.3145, |
|
"eval_samples_per_second": 427.117, |
|
"eval_steps_per_second": 2.555, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018726052902888268, |
|
"loss": 2.1359, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001870297136350347, |
|
"loss": 2.1353, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018679889824118668, |
|
"loss": 2.1367, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018656808284733868, |
|
"loss": 2.133, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018633957560742917, |
|
"loss": 2.1344, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018610876021358117, |
|
"loss": 2.1351, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018587794481973315, |
|
"loss": 2.1342, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018564712942588518, |
|
"loss": 2.1337, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018541631403203715, |
|
"loss": 2.1372, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018518549863818915, |
|
"loss": 2.1312, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.5334438747163109, |
|
"eval_loss": 2.133868932723999, |
|
"eval_runtime": 42.3503, |
|
"eval_samples_per_second": 406.585, |
|
"eval_steps_per_second": 2.432, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018495468324434115, |
|
"loss": 2.1352, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018472386785049316, |
|
"loss": 2.1352, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018449305245664516, |
|
"loss": 2.1302, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018426223706279716, |
|
"loss": 2.1306, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018403603797682614, |
|
"loss": 2.1342, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001838052225829781, |
|
"loss": 2.1312, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001835744071891301, |
|
"loss": 2.1311, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018334359179528211, |
|
"loss": 2.1329, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018311277640143412, |
|
"loss": 2.1318, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018288196100758612, |
|
"loss": 2.1352, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.5335911209600455, |
|
"eval_loss": 2.13337779045105, |
|
"eval_runtime": 42.5418, |
|
"eval_samples_per_second": 404.755, |
|
"eval_steps_per_second": 2.421, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018265114561373812, |
|
"loss": 2.1317, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018242033021989012, |
|
"loss": 2.1328, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018218951482604212, |
|
"loss": 2.1339, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018195869943219412, |
|
"loss": 2.1344, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018172788403834613, |
|
"loss": 2.1333, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001814970686444981, |
|
"loss": 2.1352, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018126625325065013, |
|
"loss": 2.1326, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001810354378568021, |
|
"loss": 2.137, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018080462246295413, |
|
"loss": 2.1357, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001805738070691061, |
|
"loss": 2.1342, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5338700657074931, |
|
"eval_loss": 2.1334917545318604, |
|
"eval_runtime": 42.0501, |
|
"eval_samples_per_second": 409.488, |
|
"eval_steps_per_second": 2.449, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001803429916752581, |
|
"loss": 2.1316, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001801121762814101, |
|
"loss": 2.1368, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001798813608875621, |
|
"loss": 2.1354, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001796505454937141, |
|
"loss": 2.1356, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001794197300998661, |
|
"loss": 2.1336, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00017918891470601812, |
|
"loss": 2.1297, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001789580993121701, |
|
"loss": 2.134, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017872728391832212, |
|
"loss": 2.1331, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001784964685244741, |
|
"loss": 2.1307, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017826565313062612, |
|
"loss": 2.1355, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.5338568043998275, |
|
"eval_loss": 2.1317787170410156, |
|
"eval_runtime": 49.386, |
|
"eval_samples_per_second": 348.662, |
|
"eval_steps_per_second": 2.086, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001780348377367781, |
|
"loss": 2.1265, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017780402234293013, |
|
"loss": 2.1308, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001775732069490821, |
|
"loss": 2.1353, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017734239155523413, |
|
"loss": 2.1333, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001771138843153246, |
|
"loss": 2.1301, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001768830689214766, |
|
"loss": 2.1322, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001766522535276286, |
|
"loss": 2.1354, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001764214381337806, |
|
"loss": 2.1333, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001761906227399326, |
|
"loss": 2.1325, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001759598073460846, |
|
"loss": 2.1333, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.5340008496382636, |
|
"eval_loss": 2.1320323944091797, |
|
"eval_runtime": 42.8801, |
|
"eval_samples_per_second": 401.562, |
|
"eval_steps_per_second": 2.402, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017572899195223657, |
|
"loss": 2.1302, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017549817655838858, |
|
"loss": 2.1359, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017526736116454058, |
|
"loss": 2.129, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017503654577069258, |
|
"loss": 2.1271, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017480573037684458, |
|
"loss": 2.1255, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017457491498299658, |
|
"loss": 2.1304, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017434409958914858, |
|
"loss": 2.1358, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017411559234923905, |
|
"loss": 2.1296, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017388477695539107, |
|
"loss": 2.135, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017365396156154305, |
|
"loss": 2.1315, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.5337804375591328, |
|
"eval_loss": 2.1315910816192627, |
|
"eval_runtime": 42.5737, |
|
"eval_samples_per_second": 404.451, |
|
"eval_steps_per_second": 2.419, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017342314616769508, |
|
"loss": 2.1346, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017319233077384705, |
|
"loss": 2.1324, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017296151537999908, |
|
"loss": 2.1324, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017273069998615105, |
|
"loss": 2.1303, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017249988459230308, |
|
"loss": 2.1301, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017226906919845506, |
|
"loss": 2.1314, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001720382538046071, |
|
"loss": 2.1332, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017180743841075906, |
|
"loss": 2.1307, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017157662301691106, |
|
"loss": 2.1318, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017134811577700152, |
|
"loss": 2.1316, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.5340438345665587, |
|
"eval_loss": 2.1310548782348633, |
|
"eval_runtime": 42.2768, |
|
"eval_samples_per_second": 407.292, |
|
"eval_steps_per_second": 2.436, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017111730038315355, |
|
"loss": 2.1323, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017088648498930553, |
|
"loss": 2.131, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017065566959545753, |
|
"loss": 2.1298, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017042485420160953, |
|
"loss": 2.1316, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001701963469617, |
|
"loss": 2.1321, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00016996553156785202, |
|
"loss": 2.1302, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000169734716174004, |
|
"loss": 2.1326, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016950390078015603, |
|
"loss": 2.1306, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000169273085386308, |
|
"loss": 2.1296, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016904226999246003, |
|
"loss": 2.1332, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.5338810405828025, |
|
"eval_loss": 2.1308557987213135, |
|
"eval_runtime": 42.5948, |
|
"eval_samples_per_second": 404.251, |
|
"eval_steps_per_second": 2.418, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000168811454598612, |
|
"loss": 2.1288, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016858063920476403, |
|
"loss": 2.1303, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000168349823810916, |
|
"loss": 2.1297, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016811900841706803, |
|
"loss": 2.1268, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016788819302322, |
|
"loss": 2.1309, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016765737762937204, |
|
"loss": 2.1294, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001674288703894625, |
|
"loss": 2.1252, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001671980549956145, |
|
"loss": 2.1285, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016696723960176648, |
|
"loss": 2.1297, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001667364242079185, |
|
"loss": 2.1258, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.5340570958742242, |
|
"eval_loss": 2.1298437118530273, |
|
"eval_runtime": 46.3444, |
|
"eval_samples_per_second": 371.544, |
|
"eval_steps_per_second": 2.222, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016650560881407048, |
|
"loss": 2.1271, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001662747934202225, |
|
"loss": 2.132, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016604397802637448, |
|
"loss": 2.1298, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016581547078646494, |
|
"loss": 2.1306, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016558465539261697, |
|
"loss": 2.1242, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016535383999876895, |
|
"loss": 2.1351, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016512302460492098, |
|
"loss": 2.1341, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016489220921107295, |
|
"loss": 2.1278, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016466139381722498, |
|
"loss": 2.1276, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016443057842337695, |
|
"loss": 2.1302, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.5344750557089244, |
|
"eval_loss": 2.129270553588867, |
|
"eval_runtime": 43.5271, |
|
"eval_samples_per_second": 395.593, |
|
"eval_steps_per_second": 2.366, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016419976302952898, |
|
"loss": 2.131, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016396894763568096, |
|
"loss": 2.1272, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016373813224183299, |
|
"loss": 2.1305, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016350731684798496, |
|
"loss": 2.1315, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.000163276501454137, |
|
"loss": 2.1251, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016304568606028896, |
|
"loss": 2.1284, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016281487066644097, |
|
"loss": 2.1298, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016258405527259297, |
|
"loss": 2.1316, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016235323987874497, |
|
"loss": 2.1277, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016212242448489697, |
|
"loss": 2.1318, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.5344819150059927, |
|
"eval_loss": 2.12872314453125, |
|
"eval_runtime": 40.2932, |
|
"eval_samples_per_second": 427.342, |
|
"eval_steps_per_second": 2.556, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016189160909104897, |
|
"loss": 2.1301, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016166079369720097, |
|
"loss": 2.1308, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016142997830335298, |
|
"loss": 2.1292, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016119916290950498, |
|
"loss": 2.1307, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016096834751565695, |
|
"loss": 2.1264, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00016073753212180895, |
|
"loss": 2.1274, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00016050671672796095, |
|
"loss": 2.1284, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00016027590133411296, |
|
"loss": 2.1288, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00016004508594026496, |
|
"loss": 2.1294, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015981427054641696, |
|
"loss": 2.1247, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.5341919953832358, |
|
"eval_loss": 2.12888765335083, |
|
"eval_runtime": 41.8552, |
|
"eval_samples_per_second": 411.394, |
|
"eval_steps_per_second": 2.461, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015958345515256896, |
|
"loss": 2.1282, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015935263975872096, |
|
"loss": 2.1295, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015912182436487296, |
|
"loss": 2.1283, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015889331712496345, |
|
"loss": 2.1314, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015866250173111543, |
|
"loss": 2.1321, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015843168633726746, |
|
"loss": 2.1271, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015820087094341943, |
|
"loss": 2.1261, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015797005554957146, |
|
"loss": 2.1281, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015773924015572343, |
|
"loss": 2.1278, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015750842476187546, |
|
"loss": 2.1282, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.5344627089742012, |
|
"eval_loss": 2.1275761127471924, |
|
"eval_runtime": 42.3322, |
|
"eval_samples_per_second": 406.759, |
|
"eval_steps_per_second": 2.433, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015727760936802744, |
|
"loss": 2.1227, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015704679397417944, |
|
"loss": 2.1288, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015681597858033144, |
|
"loss": 2.1275, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015658516318648344, |
|
"loss": 2.1288, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015635434779263544, |
|
"loss": 2.13, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015612353239878742, |
|
"loss": 2.1276, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015589271700493945, |
|
"loss": 2.1277, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015566190161109142, |
|
"loss": 2.1292, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015543108621724345, |
|
"loss": 2.1259, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015520027082339543, |
|
"loss": 2.1225, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.5345834326026048, |
|
"eval_loss": 2.127586603164673, |
|
"eval_runtime": 42.1512, |
|
"eval_samples_per_second": 408.505, |
|
"eval_steps_per_second": 2.444, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015496945542954743, |
|
"loss": 2.1342, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015474094818963792, |
|
"loss": 2.1263, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015451013279578992, |
|
"loss": 2.1261, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001542793174019419, |
|
"loss": 2.1303, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015404850200809392, |
|
"loss": 2.1326, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001538176866142459, |
|
"loss": 2.1278, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015358687122039792, |
|
"loss": 2.1254, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001533560558265499, |
|
"loss": 2.1235, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015312524043270193, |
|
"loss": 2.1271, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001528944250388539, |
|
"loss": 2.1288, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.5344064627382406, |
|
"eval_loss": 2.126460313796997, |
|
"eval_runtime": 41.9851, |
|
"eval_samples_per_second": 410.121, |
|
"eval_steps_per_second": 2.453, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015266360964500593, |
|
"loss": 2.1253, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001524327942511579, |
|
"loss": 2.1233, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015220197885730993, |
|
"loss": 2.1252, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001519711634634619, |
|
"loss": 2.1294, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015174034806961388, |
|
"loss": 2.1256, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001515095326757659, |
|
"loss": 2.1267, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015127871728191789, |
|
"loss": 2.126, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015104790188806992, |
|
"loss": 2.1258, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001508170864942219, |
|
"loss": 2.1307, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015058627110037392, |
|
"loss": 2.1281, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5346437944168065, |
|
"eval_loss": 2.1260733604431152, |
|
"eval_runtime": 40.4059, |
|
"eval_samples_per_second": 426.15, |
|
"eval_steps_per_second": 2.549, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001503554557065259, |
|
"loss": 2.1246, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015012464031267792, |
|
"loss": 2.1272, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00014989382491882992, |
|
"loss": 2.1288, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00014966531767892039, |
|
"loss": 2.127, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001494345022850724, |
|
"loss": 2.1255, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001492036868912244, |
|
"loss": 2.1221, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001489728714973764, |
|
"loss": 2.1281, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001487420561035284, |
|
"loss": 2.1257, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014851124070968037, |
|
"loss": 2.1268, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014828042531583237, |
|
"loss": 2.1267, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.5348189351352859, |
|
"eval_loss": 2.125568389892578, |
|
"eval_runtime": 42.0351, |
|
"eval_samples_per_second": 409.634, |
|
"eval_steps_per_second": 2.45, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014804960992198437, |
|
"loss": 2.1268, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014781879452813637, |
|
"loss": 2.1235, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014758797913428837, |
|
"loss": 2.1311, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014735716374044037, |
|
"loss": 2.1276, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014712634834659238, |
|
"loss": 2.129, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014689553295274438, |
|
"loss": 2.1234, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014666471755889638, |
|
"loss": 2.1312, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014643390216504838, |
|
"loss": 2.1246, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014620308677120038, |
|
"loss": 2.1244, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014597227137735238, |
|
"loss": 2.1252, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.534863749209466, |
|
"eval_loss": 2.125560760498047, |
|
"eval_runtime": 43.0904, |
|
"eval_samples_per_second": 399.602, |
|
"eval_steps_per_second": 2.39, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014574145598350439, |
|
"loss": 2.1254, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001455106405896564, |
|
"loss": 2.1264, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014528213334974688, |
|
"loss": 2.1272, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014505131795589888, |
|
"loss": 2.1249, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014482050256205088, |
|
"loss": 2.1246, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014458968716820285, |
|
"loss": 2.1302, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014435887177435486, |
|
"loss": 2.1272, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014412805638050686, |
|
"loss": 2.1264, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014389724098665886, |
|
"loss": 2.1272, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014366642559281086, |
|
"loss": 2.1237, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.5348610054906386, |
|
"eval_loss": 2.1258022785186768, |
|
"eval_runtime": 43.8595, |
|
"eval_samples_per_second": 392.595, |
|
"eval_steps_per_second": 2.348, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014343561019896286, |
|
"loss": 2.1214, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014320479480511486, |
|
"loss": 2.129, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014297397941126687, |
|
"loss": 2.1197, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014274547217135733, |
|
"loss": 2.1251, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014251465677750933, |
|
"loss": 2.127, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014228384138366133, |
|
"loss": 2.1215, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014205302598981333, |
|
"loss": 2.1277, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014182221059596533, |
|
"loss": 2.1293, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014159370335605582, |
|
"loss": 2.1248, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014136288796220782, |
|
"loss": 2.1264, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.5352551864288351, |
|
"eval_loss": 2.1243185997009277, |
|
"eval_runtime": 42.6964, |
|
"eval_samples_per_second": 403.289, |
|
"eval_steps_per_second": 2.412, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014113207256835983, |
|
"loss": 2.1243, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014090125717451183, |
|
"loss": 2.1249, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014067044178066383, |
|
"loss": 2.1231, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014043962638681583, |
|
"loss": 2.1285, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001402088109929678, |
|
"loss": 2.1271, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001399779955991198, |
|
"loss": 2.1254, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001397471802052718, |
|
"loss": 2.1216, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001395163648114238, |
|
"loss": 2.1233, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001392855494175758, |
|
"loss": 2.1232, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013905473402372781, |
|
"loss": 2.1245, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.5352295783864464, |
|
"eval_loss": 2.1242990493774414, |
|
"eval_runtime": 40.3898, |
|
"eval_samples_per_second": 426.321, |
|
"eval_steps_per_second": 2.55, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013882391862987982, |
|
"loss": 2.1216, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013859310323603182, |
|
"loss": 2.1278, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013836228784218382, |
|
"loss": 2.1247, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001381314724483358, |
|
"loss": 2.1261, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013790296520842628, |
|
"loss": 2.1238, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013767214981457828, |
|
"loss": 2.1256, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013744133442073029, |
|
"loss": 2.1215, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001372105190268823, |
|
"loss": 2.12, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001369797036330343, |
|
"loss": 2.1225, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001367488882391863, |
|
"loss": 2.1235, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.5352314075323313, |
|
"eval_loss": 2.123901844024658, |
|
"eval_runtime": 42.1278, |
|
"eval_samples_per_second": 408.733, |
|
"eval_steps_per_second": 2.445, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001365180728453383, |
|
"loss": 2.1211, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001362872574514903, |
|
"loss": 2.124, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001360564420576423, |
|
"loss": 2.1224, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001358256266637943, |
|
"loss": 2.123, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001355948112699463, |
|
"loss": 2.1242, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001353639958760983, |
|
"loss": 2.1244, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013513548863618876, |
|
"loss": 2.1239, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013490467324234076, |
|
"loss": 2.1209, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013467385784849277, |
|
"loss": 2.1213, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013444304245464477, |
|
"loss": 2.1261, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.5357376236559779, |
|
"eval_loss": 2.1223719120025635, |
|
"eval_runtime": 41.9736, |
|
"eval_samples_per_second": 410.234, |
|
"eval_steps_per_second": 2.454, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013421222706079677, |
|
"loss": 2.1277, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013398141166694877, |
|
"loss": 2.1232, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013375059627310074, |
|
"loss": 2.1235, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013351978087925275, |
|
"loss": 2.1244, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013328896548540475, |
|
"loss": 2.1265, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013305815009155675, |
|
"loss": 2.1257, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013282733469770875, |
|
"loss": 2.1212, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013259651930386075, |
|
"loss": 2.1239, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013236570391001275, |
|
"loss": 2.1214, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013213488851616476, |
|
"loss": 2.1218, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.5355108095662501, |
|
"eval_loss": 2.121886730194092, |
|
"eval_runtime": 40.4232, |
|
"eval_samples_per_second": 425.968, |
|
"eval_steps_per_second": 2.548, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013190407312231676, |
|
"loss": 2.1235, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013167325772846876, |
|
"loss": 2.1215, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013144244233462076, |
|
"loss": 2.1223, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013121393509471125, |
|
"loss": 2.1189, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013098311970086325, |
|
"loss": 2.124, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00013075230430701525, |
|
"loss": 2.1239, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00013052148891316725, |
|
"loss": 2.1255, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00013029067351931926, |
|
"loss": 2.1225, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00013005985812547126, |
|
"loss": 2.1198, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012982904273162323, |
|
"loss": 2.1205, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.5355661412292684, |
|
"eval_loss": 2.1218619346618652, |
|
"eval_runtime": 41.7364, |
|
"eval_samples_per_second": 412.566, |
|
"eval_steps_per_second": 2.468, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012959822733777523, |
|
"loss": 2.122, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012936741194392724, |
|
"loss": 2.1219, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001291389047040177, |
|
"loss": 2.1241, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001289080893101697, |
|
"loss": 2.1195, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001286772739163217, |
|
"loss": 2.1249, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001284464585224737, |
|
"loss": 2.1259, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001282156431286257, |
|
"loss": 2.1228, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001279848277347777, |
|
"loss": 2.1223, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001277540123409297, |
|
"loss": 2.124, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001275231969470817, |
|
"loss": 2.1229, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.5354605080544153, |
|
"eval_loss": 2.121462821960449, |
|
"eval_runtime": 41.895, |
|
"eval_samples_per_second": 411.004, |
|
"eval_steps_per_second": 2.459, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001272923815532337, |
|
"loss": 2.1232, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001270615661593857, |
|
"loss": 2.1202, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012683075076553771, |
|
"loss": 2.1202, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012659993537168972, |
|
"loss": 2.1209, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012636911997784172, |
|
"loss": 2.1212, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012613830458399372, |
|
"loss": 2.127, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012590748919014572, |
|
"loss": 2.1192, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012567667379629772, |
|
"loss": 2.1207, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012544585840244972, |
|
"loss": 2.118, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012521504300860173, |
|
"loss": 2.1199, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.5357902116001688, |
|
"eval_loss": 2.1206843852996826, |
|
"eval_runtime": 42.4403, |
|
"eval_samples_per_second": 405.723, |
|
"eval_steps_per_second": 2.427, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001249842276147537, |
|
"loss": 2.1155, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001247534122209057, |
|
"loss": 2.1234, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001245225968270577, |
|
"loss": 2.1239, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001242917814332097, |
|
"loss": 2.1228, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001240632741933002, |
|
"loss": 2.1189, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001238324587994522, |
|
"loss": 2.1198, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001236016434056042, |
|
"loss": 2.1209, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012337082801175617, |
|
"loss": 2.1213, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012314001261790817, |
|
"loss": 2.1269, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012290919722406017, |
|
"loss": 2.1175, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.5358331965284641, |
|
"eval_loss": 2.1204559803009033, |
|
"eval_runtime": 42.2461, |
|
"eval_samples_per_second": 407.588, |
|
"eval_steps_per_second": 2.438, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012267838183021218, |
|
"loss": 2.1214, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012244756643636418, |
|
"loss": 2.1236, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012221675104251618, |
|
"loss": 2.1185, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012198593564866818, |
|
"loss": 2.1196, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012175512025482018, |
|
"loss": 2.1193, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012152430486097218, |
|
"loss": 2.1149, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012129348946712419, |
|
"loss": 2.1214, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012106267407327619, |
|
"loss": 2.1224, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012083416683336666, |
|
"loss": 2.1212, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012060335143951866, |
|
"loss": 2.1205, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.5359333422656624, |
|
"eval_loss": 2.120081901550293, |
|
"eval_runtime": 42.3289, |
|
"eval_samples_per_second": 406.791, |
|
"eval_steps_per_second": 2.433, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012037253604567067, |
|
"loss": 2.1243, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012014172065182265, |
|
"loss": 2.1172, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011991090525797466, |
|
"loss": 2.12, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011968008986412666, |
|
"loss": 2.1211, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011944927447027866, |
|
"loss": 2.1182, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011921845907643066, |
|
"loss": 2.1241, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011898764368258266, |
|
"loss": 2.1215, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011875682828873465, |
|
"loss": 2.119, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011852601289488665, |
|
"loss": 2.1226, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011829750565497714, |
|
"loss": 2.1206, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.5361629000742176, |
|
"eval_loss": 2.1194353103637695, |
|
"eval_runtime": 41.81, |
|
"eval_samples_per_second": 411.84, |
|
"eval_steps_per_second": 2.464, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011806669026112914, |
|
"loss": 2.1177, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011783587486728114, |
|
"loss": 2.123, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011760505947343313, |
|
"loss": 2.1172, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011737424407958513, |
|
"loss": 2.117, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011714342868573714, |
|
"loss": 2.1192, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011691261329188914, |
|
"loss": 2.1204, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011668179789804114, |
|
"loss": 2.1223, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011645098250419314, |
|
"loss": 2.1175, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011622016711034514, |
|
"loss": 2.1175, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011598935171649714, |
|
"loss": 2.1183, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.5360938498170625, |
|
"eval_loss": 2.1190812587738037, |
|
"eval_runtime": 42.5392, |
|
"eval_samples_per_second": 404.779, |
|
"eval_steps_per_second": 2.421, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011575853632264915, |
|
"loss": 2.1195, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011552772092880115, |
|
"loss": 2.1194, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011529690553495312, |
|
"loss": 2.1205, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011506609014110512, |
|
"loss": 2.1178, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011483527474725713, |
|
"loss": 2.1198, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011460445935340913, |
|
"loss": 2.1149, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011437364395956113, |
|
"loss": 2.1232, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011414282856571313, |
|
"loss": 2.1155, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011391201317186513, |
|
"loss": 2.1183, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011368350593195561, |
|
"loss": 2.1242, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.5361350055994728, |
|
"eval_loss": 2.118927478790283, |
|
"eval_runtime": 41.7606, |
|
"eval_samples_per_second": 412.327, |
|
"eval_steps_per_second": 2.466, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011345269053810761, |
|
"loss": 2.1192, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011322187514425961, |
|
"loss": 2.1177, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011299105975041161, |
|
"loss": 2.1175, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011276024435656361, |
|
"loss": 2.1177, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011253173711665409, |
|
"loss": 2.1185, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011230092172280609, |
|
"loss": 2.1196, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011207010632895809, |
|
"loss": 2.1169, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001118392909351101, |
|
"loss": 2.12, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001116084755412621, |
|
"loss": 2.1172, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001113776601474141, |
|
"loss": 2.1214, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.536094764390005, |
|
"eval_loss": 2.117910385131836, |
|
"eval_runtime": 40.337, |
|
"eval_samples_per_second": 426.879, |
|
"eval_steps_per_second": 2.553, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001111468447535661, |
|
"loss": 2.1205, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001109160293597181, |
|
"loss": 2.1193, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001106852139658701, |
|
"loss": 2.1146, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011045439857202208, |
|
"loss": 2.1195, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011022358317817408, |
|
"loss": 2.1165, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00010999276778432608, |
|
"loss": 2.1221, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010976195239047808, |
|
"loss": 2.1176, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010953113699663008, |
|
"loss": 2.115, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010930032160278208, |
|
"loss": 2.1198, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010906950620893409, |
|
"loss": 2.1185, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.5361555834906779, |
|
"eval_loss": 2.1171975135803223, |
|
"eval_runtime": 42.1584, |
|
"eval_samples_per_second": 408.436, |
|
"eval_steps_per_second": 2.443, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010883869081508609, |
|
"loss": 2.118, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010860787542123809, |
|
"loss": 2.1215, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010837706002739008, |
|
"loss": 2.1185, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010814855278748057, |
|
"loss": 2.1229, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010791773739363257, |
|
"loss": 2.116, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010768923015372304, |
|
"loss": 2.1194, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010745841475987505, |
|
"loss": 2.1139, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010722759936602705, |
|
"loss": 2.1142, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010699678397217905, |
|
"loss": 2.1141, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010676596857833105, |
|
"loss": 2.1172, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.5361757040954119, |
|
"eval_loss": 2.117605447769165, |
|
"eval_runtime": 42.9428, |
|
"eval_samples_per_second": 400.976, |
|
"eval_steps_per_second": 2.399, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010653515318448305, |
|
"loss": 2.1128, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010630433779063505, |
|
"loss": 2.1161, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010607352239678704, |
|
"loss": 2.1202, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010584270700293904, |
|
"loss": 2.114, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010561189160909103, |
|
"loss": 2.1147, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010538107621524303, |
|
"loss": 2.1156, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010515026082139503, |
|
"loss": 2.1217, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010491944542754704, |
|
"loss": 2.1207, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010468863003369904, |
|
"loss": 2.1173, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010445781463985104, |
|
"loss": 2.1159, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.5366590558954972, |
|
"eval_loss": 2.1167280673980713, |
|
"eval_runtime": 42.3863, |
|
"eval_samples_per_second": 406.239, |
|
"eval_steps_per_second": 2.43, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010422699924600304, |
|
"loss": 2.1169, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010399618385215503, |
|
"loss": 2.1139, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010376536845830703, |
|
"loss": 2.1178, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010353455306445903, |
|
"loss": 2.1133, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010330373767061103, |
|
"loss": 2.116, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010307292227676304, |
|
"loss": 2.1143, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010284210688291504, |
|
"loss": 2.1177, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010261129148906704, |
|
"loss": 2.1168, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010238047609521904, |
|
"loss": 2.114, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010214966070137104, |
|
"loss": 2.1162, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.5367303925850084, |
|
"eval_loss": 2.115847110748291, |
|
"eval_runtime": 42.4066, |
|
"eval_samples_per_second": 406.045, |
|
"eval_steps_per_second": 2.429, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010191884530752304, |
|
"loss": 2.1194, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010168802991367505, |
|
"loss": 2.1192, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010145721451982705, |
|
"loss": 2.119, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010122639912597904, |
|
"loss": 2.1115, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010099558373213102, |
|
"loss": 2.1163, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00010076476833828302, |
|
"loss": 2.1127, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00010053395294443503, |
|
"loss": 2.1123, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00010030313755058703, |
|
"loss": 2.1158, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00010007232215673903, |
|
"loss": 2.1162, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.98438149168295e-05, |
|
"loss": 2.1134, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.5367422820332602, |
|
"eval_loss": 2.1160144805908203, |
|
"eval_runtime": 42.1023, |
|
"eval_samples_per_second": 408.98, |
|
"eval_steps_per_second": 2.446, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.96129995229815e-05, |
|
"loss": 2.1193, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.938449228307198e-05, |
|
"loss": 2.12, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.915367688922398e-05, |
|
"loss": 2.1138, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.892286149537599e-05, |
|
"loss": 2.116, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.869204610152799e-05, |
|
"loss": 2.1137, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.846353886161846e-05, |
|
"loss": 2.1134, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.823272346777046e-05, |
|
"loss": 2.1162, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.800190807392247e-05, |
|
"loss": 2.1103, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.777109268007447e-05, |
|
"loss": 2.1176, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.754027728622647e-05, |
|
"loss": 2.1158, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.5369151363193835, |
|
"eval_loss": 2.114868640899658, |
|
"eval_runtime": 41.8639, |
|
"eval_samples_per_second": 411.309, |
|
"eval_steps_per_second": 2.46, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.730946189237847e-05, |
|
"loss": 2.1166, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.707864649853047e-05, |
|
"loss": 2.1138, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.684783110468247e-05, |
|
"loss": 2.1188, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.661701571083448e-05, |
|
"loss": 2.1124, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.638620031698648e-05, |
|
"loss": 2.1121, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.615538492313848e-05, |
|
"loss": 2.116, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.592456952929045e-05, |
|
"loss": 2.1127, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.569375413544245e-05, |
|
"loss": 2.1189, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.546293874159446e-05, |
|
"loss": 2.1155, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.523212334774646e-05, |
|
"loss": 2.1183, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.5370953071890463, |
|
"eval_loss": 2.1145880222320557, |
|
"eval_runtime": 41.8951, |
|
"eval_samples_per_second": 411.002, |
|
"eval_steps_per_second": 2.459, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.500130795389846e-05, |
|
"loss": 2.1146, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.477049256005046e-05, |
|
"loss": 2.1156, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.453967716620246e-05, |
|
"loss": 2.1137, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.430886177235446e-05, |
|
"loss": 2.1145, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.407804637850647e-05, |
|
"loss": 2.1128, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.384723098465847e-05, |
|
"loss": 2.1165, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.361641559081046e-05, |
|
"loss": 2.1132, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.338560019696246e-05, |
|
"loss": 2.1152, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.315478480311446e-05, |
|
"loss": 2.1147, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.292396940926646e-05, |
|
"loss": 2.1172, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.5371268599555609, |
|
"eval_loss": 2.113826036453247, |
|
"eval_runtime": 42.6539, |
|
"eval_samples_per_second": 403.691, |
|
"eval_steps_per_second": 2.415, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.269315401541846e-05, |
|
"loss": 2.1169, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.246233862157046e-05, |
|
"loss": 2.1186, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.223152322772247e-05, |
|
"loss": 2.1137, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.200070783387447e-05, |
|
"loss": 2.1142, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.176989244002647e-05, |
|
"loss": 2.1215, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.153907704617847e-05, |
|
"loss": 2.1197, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.130826165233045e-05, |
|
"loss": 2.1103, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.107744625848245e-05, |
|
"loss": 2.1095, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.084663086463445e-05, |
|
"loss": 2.112, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.061581547078645e-05, |
|
"loss": 2.1192, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.5370097946189272, |
|
"eval_loss": 2.1132709980010986, |
|
"eval_runtime": 42.1032, |
|
"eval_samples_per_second": 408.971, |
|
"eval_steps_per_second": 2.446, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.038500007693845e-05, |
|
"loss": 2.1137, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.015418468309045e-05, |
|
"loss": 2.1144, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.992567744318093e-05, |
|
"loss": 2.1123, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.969486204933293e-05, |
|
"loss": 2.1098, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.946404665548493e-05, |
|
"loss": 2.1164, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.923323126163693e-05, |
|
"loss": 2.1149, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.900241586778894e-05, |
|
"loss": 2.1143, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.877160047394094e-05, |
|
"loss": 2.1121, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.854309323403141e-05, |
|
"loss": 2.1109, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.831227784018341e-05, |
|
"loss": 2.1107, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.5372498700163205, |
|
"eval_loss": 2.11297345161438, |
|
"eval_runtime": 42.2737, |
|
"eval_samples_per_second": 407.322, |
|
"eval_steps_per_second": 2.437, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.808146244633542e-05, |
|
"loss": 2.1081, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.785064705248742e-05, |
|
"loss": 2.1129, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.761983165863942e-05, |
|
"loss": 2.12, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.738901626479142e-05, |
|
"loss": 2.1157, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.715820087094342e-05, |
|
"loss": 2.1179, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.692738547709542e-05, |
|
"loss": 2.1146, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.669657008324743e-05, |
|
"loss": 2.1089, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.646575468939941e-05, |
|
"loss": 2.114, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.623724744948989e-05, |
|
"loss": 2.1141, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.600643205564188e-05, |
|
"loss": 2.1159, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.5374611363660268, |
|
"eval_loss": 2.112429618835449, |
|
"eval_runtime": 42.4437, |
|
"eval_samples_per_second": 405.69, |
|
"eval_steps_per_second": 2.427, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.577561666179388e-05, |
|
"loss": 2.1105, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.554480126794588e-05, |
|
"loss": 2.108, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.531398587409788e-05, |
|
"loss": 2.1121, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.508317048024988e-05, |
|
"loss": 2.1102, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.485235508640188e-05, |
|
"loss": 2.1134, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.462153969255389e-05, |
|
"loss": 2.1149, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.439072429870589e-05, |
|
"loss": 2.112, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.415990890485789e-05, |
|
"loss": 2.1162, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.392909351100989e-05, |
|
"loss": 2.1129, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.369827811716189e-05, |
|
"loss": 2.113, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.5374259253077424, |
|
"eval_loss": 2.112016201019287, |
|
"eval_runtime": 42.7468, |
|
"eval_samples_per_second": 402.814, |
|
"eval_steps_per_second": 2.41, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.34674627233139e-05, |
|
"loss": 2.1132, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.323664732946588e-05, |
|
"loss": 2.1135, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.300583193561788e-05, |
|
"loss": 2.1176, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.277501654176989e-05, |
|
"loss": 2.112, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.254420114792189e-05, |
|
"loss": 2.1102, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.231338575407389e-05, |
|
"loss": 2.1129, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.208257036022589e-05, |
|
"loss": 2.1089, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.185175496637789e-05, |
|
"loss": 2.1145, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.162093957252988e-05, |
|
"loss": 2.1159, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.139012417868188e-05, |
|
"loss": 2.1151, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.5375027494349083, |
|
"eval_loss": 2.11134672164917, |
|
"eval_runtime": 43.6521, |
|
"eval_samples_per_second": 394.46, |
|
"eval_steps_per_second": 2.36, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.115930878483387e-05, |
|
"loss": 2.1102, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.092849339098587e-05, |
|
"loss": 2.1124, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.069998615107636e-05, |
|
"loss": 2.1171, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.046917075722836e-05, |
|
"loss": 2.112, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.023835536338036e-05, |
|
"loss": 2.1095, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.000753996953235e-05, |
|
"loss": 2.1094, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.977672457568435e-05, |
|
"loss": 2.1104, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.954590918183636e-05, |
|
"loss": 2.1118, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.931509378798836e-05, |
|
"loss": 2.1128, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.908427839414036e-05, |
|
"loss": 2.1117, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.5376399353762759, |
|
"eval_loss": 2.110729932785034, |
|
"eval_runtime": 41.6046, |
|
"eval_samples_per_second": 413.872, |
|
"eval_steps_per_second": 2.476, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.885346300029236e-05, |
|
"loss": 2.1097, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.862264760644436e-05, |
|
"loss": 2.1146, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.839183221259636e-05, |
|
"loss": 2.1161, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.816332497268684e-05, |
|
"loss": 2.1114, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.793250957883884e-05, |
|
"loss": 2.1112, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.770169418499084e-05, |
|
"loss": 2.1093, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.747087879114284e-05, |
|
"loss": 2.1079, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.724006339729485e-05, |
|
"loss": 2.1099, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.700924800344685e-05, |
|
"loss": 2.1086, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.677843260959882e-05, |
|
"loss": 2.1111, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.537516468029045, |
|
"eval_loss": 2.1103692054748535, |
|
"eval_runtime": 41.9692, |
|
"eval_samples_per_second": 410.277, |
|
"eval_steps_per_second": 2.454, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.654761721575082e-05, |
|
"loss": 2.1119, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.631680182190283e-05, |
|
"loss": 2.1093, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.608598642805483e-05, |
|
"loss": 2.1154, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.585747918814532e-05, |
|
"loss": 2.1132, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.56266637942973e-05, |
|
"loss": 2.1112, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.53958484004493e-05, |
|
"loss": 2.1089, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.516503300660131e-05, |
|
"loss": 2.1137, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.493421761275331e-05, |
|
"loss": 2.1109, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.470340221890531e-05, |
|
"loss": 2.1138, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.447258682505731e-05, |
|
"loss": 2.109, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.537841141423615, |
|
"eval_loss": 2.110281467437744, |
|
"eval_runtime": 41.9177, |
|
"eval_samples_per_second": 410.781, |
|
"eval_steps_per_second": 2.457, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.424177143120931e-05, |
|
"loss": 2.1103, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.401095603736132e-05, |
|
"loss": 2.1111, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.378014064351332e-05, |
|
"loss": 2.113, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.354932524966532e-05, |
|
"loss": 2.1077, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.33208180097558e-05, |
|
"loss": 2.1121, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.30900026159078e-05, |
|
"loss": 2.11, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.286149537599827e-05, |
|
"loss": 2.1061, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.263067998215027e-05, |
|
"loss": 2.1115, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.239986458830226e-05, |
|
"loss": 2.1106, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.216904919445426e-05, |
|
"loss": 2.1121, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.5379024178107593, |
|
"eval_loss": 2.109757900238037, |
|
"eval_runtime": 42.1157, |
|
"eval_samples_per_second": 408.85, |
|
"eval_steps_per_second": 2.446, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.193823380060626e-05, |
|
"loss": 2.1124, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.170741840675827e-05, |
|
"loss": 2.1043, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.147660301291027e-05, |
|
"loss": 2.1094, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.124578761906227e-05, |
|
"loss": 2.1045, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.101497222521427e-05, |
|
"loss": 2.1123, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.078415683136627e-05, |
|
"loss": 2.1118, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.055334143751826e-05, |
|
"loss": 2.1091, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.032252604367026e-05, |
|
"loss": 2.1096, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.009171064982226e-05, |
|
"loss": 2.1104, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.986089525597426e-05, |
|
"loss": 2.1075, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.5377094429199022, |
|
"eval_loss": 2.108910083770752, |
|
"eval_runtime": 42.9094, |
|
"eval_samples_per_second": 401.287, |
|
"eval_steps_per_second": 2.4, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.963007986212627e-05, |
|
"loss": 2.1124, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.939926446827827e-05, |
|
"loss": 2.1086, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.916844907443027e-05, |
|
"loss": 2.1155, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.893763368058227e-05, |
|
"loss": 2.1083, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.870681828673427e-05, |
|
"loss": 2.1061, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.847600289288626e-05, |
|
"loss": 2.1066, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.824749565297675e-05, |
|
"loss": 2.1133, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.801668025912874e-05, |
|
"loss": 2.106, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.778586486528074e-05, |
|
"loss": 2.1102, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.755504947143274e-05, |
|
"loss": 2.1094, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.5378210208188812, |
|
"eval_loss": 2.108694553375244, |
|
"eval_runtime": 41.8265, |
|
"eval_samples_per_second": 411.677, |
|
"eval_steps_per_second": 2.463, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.732423407758474e-05, |
|
"loss": 2.1073, |
|
"step": 101100 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.709341868373674e-05, |
|
"loss": 2.1075, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.686260328988875e-05, |
|
"loss": 2.1075, |
|
"step": 101300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.663178789604075e-05, |
|
"loss": 2.1062, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.640097250219274e-05, |
|
"loss": 2.1084, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.617015710834474e-05, |
|
"loss": 2.1118, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.593934171449674e-05, |
|
"loss": 2.11, |
|
"step": 101700 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.570852632064874e-05, |
|
"loss": 2.1119, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.547771092680074e-05, |
|
"loss": 2.1102, |
|
"step": 101900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.524689553295273e-05, |
|
"loss": 2.1113, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.53814523692698, |
|
"eval_loss": 2.107855796813965, |
|
"eval_runtime": 43.4591, |
|
"eval_samples_per_second": 396.212, |
|
"eval_steps_per_second": 2.37, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.501608013910473e-05, |
|
"loss": 2.1084, |
|
"step": 102100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.478757289919522e-05, |
|
"loss": 2.1073, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.455675750534722e-05, |
|
"loss": 2.11, |
|
"step": 102300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.432594211149922e-05, |
|
"loss": 2.1095, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.409512671765121e-05, |
|
"loss": 2.1074, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.386431132380321e-05, |
|
"loss": 2.1089, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.363349592995522e-05, |
|
"loss": 2.1111, |
|
"step": 102700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.340268053610722e-05, |
|
"loss": 2.1094, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.31718651422592e-05, |
|
"loss": 2.1094, |
|
"step": 102900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.294104974841121e-05, |
|
"loss": 2.1065, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.5380492067680227, |
|
"eval_loss": 2.10768985748291, |
|
"eval_runtime": 41.9976, |
|
"eval_samples_per_second": 409.999, |
|
"eval_steps_per_second": 2.453, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.27125425085017e-05, |
|
"loss": 2.1077, |
|
"step": 103100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.24817271146537e-05, |
|
"loss": 2.1112, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.22509117208057e-05, |
|
"loss": 2.112, |
|
"step": 103300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.202009632695769e-05, |
|
"loss": 2.1022, |
|
"step": 103400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.178928093310969e-05, |
|
"loss": 2.1102, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.155846553926169e-05, |
|
"loss": 2.1036, |
|
"step": 103600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.132765014541369e-05, |
|
"loss": 2.1095, |
|
"step": 103700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.10968347515657e-05, |
|
"loss": 2.1067, |
|
"step": 103800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.086601935771769e-05, |
|
"loss": 2.1115, |
|
"step": 103900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.063520396386969e-05, |
|
"loss": 2.107, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.5381827344176205, |
|
"eval_loss": 2.107125759124756, |
|
"eval_runtime": 42.2638, |
|
"eval_samples_per_second": 407.418, |
|
"eval_steps_per_second": 2.437, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.040438857002169e-05, |
|
"loss": 2.104, |
|
"step": 104100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.017357317617369e-05, |
|
"loss": 2.1076, |
|
"step": 104200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.994275778232569e-05, |
|
"loss": 2.1091, |
|
"step": 104300 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.971194238847769e-05, |
|
"loss": 2.0995, |
|
"step": 104400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.948112699462969e-05, |
|
"loss": 2.1077, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.9250311600781693e-05, |
|
"loss": 2.1075, |
|
"step": 104600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.902180436087217e-05, |
|
"loss": 2.1061, |
|
"step": 104700 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.879098896702417e-05, |
|
"loss": 2.1054, |
|
"step": 104800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.856017357317617e-05, |
|
"loss": 2.1075, |
|
"step": 104900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.832935817932817e-05, |
|
"loss": 2.109, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.5384561917274133, |
|
"eval_loss": 2.106707811355591, |
|
"eval_runtime": 41.8526, |
|
"eval_samples_per_second": 411.421, |
|
"eval_steps_per_second": 2.461, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.809854278548016e-05, |
|
"loss": 2.1062, |
|
"step": 105100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.7867727391632163e-05, |
|
"loss": 2.1043, |
|
"step": 105200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.7636911997784165e-05, |
|
"loss": 2.106, |
|
"step": 105300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.740609660393617e-05, |
|
"loss": 2.1046, |
|
"step": 105400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.717528121008817e-05, |
|
"loss": 2.1045, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.694446581624017e-05, |
|
"loss": 2.1079, |
|
"step": 105600 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.6713650422392165e-05, |
|
"loss": 2.1083, |
|
"step": 105700 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.6482835028544166e-05, |
|
"loss": 2.1055, |
|
"step": 105800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.625201963469617e-05, |
|
"loss": 2.1116, |
|
"step": 105900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.602120424084816e-05, |
|
"loss": 2.1049, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.5384228098150139, |
|
"eval_loss": 2.105968952178955, |
|
"eval_runtime": 43.8044, |
|
"eval_samples_per_second": 393.088, |
|
"eval_steps_per_second": 2.351, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.5790388847000165e-05, |
|
"loss": 2.1072, |
|
"step": 106100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.555957345315216e-05, |
|
"loss": 2.1036, |
|
"step": 106200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.532875805930416e-05, |
|
"loss": 2.108, |
|
"step": 106300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.510025081939464e-05, |
|
"loss": 2.1048, |
|
"step": 106400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.4869435425546645e-05, |
|
"loss": 2.1059, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.4638620031698646e-05, |
|
"loss": 2.1094, |
|
"step": 106600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.440780463785064e-05, |
|
"loss": 2.1032, |
|
"step": 106700 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.417698924400264e-05, |
|
"loss": 2.1073, |
|
"step": 106800 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.3946173850154645e-05, |
|
"loss": 2.1063, |
|
"step": 106900 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.3715358456306646e-05, |
|
"loss": 2.1071, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.5385513073134283, |
|
"eval_loss": 2.1057865619659424, |
|
"eval_runtime": 42.7724, |
|
"eval_samples_per_second": 402.573, |
|
"eval_steps_per_second": 2.408, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.3484543062458634e-05, |
|
"loss": 2.1054, |
|
"step": 107100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.3253727668610636e-05, |
|
"loss": 2.105, |
|
"step": 107200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.302291227476264e-05, |
|
"loss": 2.1086, |
|
"step": 107300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.279209688091464e-05, |
|
"loss": 2.1043, |
|
"step": 107400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.256128148706664e-05, |
|
"loss": 2.1082, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.233046609321864e-05, |
|
"loss": 2.1031, |
|
"step": 107600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.209965069937064e-05, |
|
"loss": 2.1051, |
|
"step": 107700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.186883530552264e-05, |
|
"loss": 2.1072, |
|
"step": 107800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.163801991167464e-05, |
|
"loss": 2.1057, |
|
"step": 107900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.140720451782664e-05, |
|
"loss": 2.1026, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.5385348450004641, |
|
"eval_loss": 2.1053857803344727, |
|
"eval_runtime": 42.8825, |
|
"eval_samples_per_second": 401.54, |
|
"eval_steps_per_second": 2.402, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.117638912397864e-05, |
|
"loss": 2.1031, |
|
"step": 108100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.094557373013063e-05, |
|
"loss": 2.1087, |
|
"step": 108200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.0714758336282634e-05, |
|
"loss": 2.1047, |
|
"step": 108300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.0483942942434635e-05, |
|
"loss": 2.1061, |
|
"step": 108400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.025312754858664e-05, |
|
"loss": 2.1077, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.002231215473864e-05, |
|
"loss": 2.1016, |
|
"step": 108600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.979149676089064e-05, |
|
"loss": 2.1062, |
|
"step": 108700 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.9562989520981115e-05, |
|
"loss": 2.1049, |
|
"step": 108800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.933217412713312e-05, |
|
"loss": 2.1059, |
|
"step": 108900 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.910135873328512e-05, |
|
"loss": 2.1059, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.5388137897479117, |
|
"eval_loss": 2.104844570159912, |
|
"eval_runtime": 42.6237, |
|
"eval_samples_per_second": 403.977, |
|
"eval_steps_per_second": 2.416, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.887054333943711e-05, |
|
"loss": 2.1056, |
|
"step": 109100 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.863972794558911e-05, |
|
"loss": 2.1047, |
|
"step": 109200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.840891255174111e-05, |
|
"loss": 2.1062, |
|
"step": 109300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.817809715789311e-05, |
|
"loss": 2.1017, |
|
"step": 109400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.794728176404511e-05, |
|
"loss": 2.105, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.7716466370197115e-05, |
|
"loss": 2.1075, |
|
"step": 109600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.748565097634911e-05, |
|
"loss": 2.1013, |
|
"step": 109700 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.725483558250111e-05, |
|
"loss": 2.1021, |
|
"step": 109800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.702402018865311e-05, |
|
"loss": 2.1023, |
|
"step": 109900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.6795512948743595e-05, |
|
"loss": 2.1, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.5388586038220918, |
|
"eval_loss": 2.104264259338379, |
|
"eval_runtime": 43.2709, |
|
"eval_samples_per_second": 397.935, |
|
"eval_steps_per_second": 2.38, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.65646975548956e-05, |
|
"loss": 2.1045, |
|
"step": 110100 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.6333882161047585e-05, |
|
"loss": 2.1042, |
|
"step": 110200 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.6103066767199587e-05, |
|
"loss": 2.1043, |
|
"step": 110300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.587225137335159e-05, |
|
"loss": 2.1064, |
|
"step": 110400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.564143597950359e-05, |
|
"loss": 2.1061, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.541062058565559e-05, |
|
"loss": 2.101, |
|
"step": 110600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.5179805191807586e-05, |
|
"loss": 2.1052, |
|
"step": 110700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.494898979795959e-05, |
|
"loss": 2.1033, |
|
"step": 110800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.471817440411159e-05, |
|
"loss": 2.1032, |
|
"step": 110900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.448735901026359e-05, |
|
"loss": 2.1017, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.5389066189015704, |
|
"eval_loss": 2.1037588119506836, |
|
"eval_runtime": 42.5584, |
|
"eval_samples_per_second": 404.597, |
|
"eval_steps_per_second": 2.42, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.425885177035407e-05, |
|
"loss": 2.1025, |
|
"step": 111100 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.402803637650607e-05, |
|
"loss": 2.1029, |
|
"step": 111200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.379722098265806e-05, |
|
"loss": 2.1051, |
|
"step": 111300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.3566405588810065e-05, |
|
"loss": 2.0987, |
|
"step": 111400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.3335590194962066e-05, |
|
"loss": 2.1042, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.310477480111406e-05, |
|
"loss": 2.1041, |
|
"step": 111600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.287395940726606e-05, |
|
"loss": 2.1009, |
|
"step": 111700 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.2643144013418064e-05, |
|
"loss": 2.1073, |
|
"step": 111800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.2412328619570066e-05, |
|
"loss": 2.1016, |
|
"step": 111900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.218151322572207e-05, |
|
"loss": 2.107, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.5390323726811574, |
|
"eval_loss": 2.1030471324920654, |
|
"eval_runtime": 42.0291, |
|
"eval_samples_per_second": 409.692, |
|
"eval_steps_per_second": 2.451, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.195300598581254e-05, |
|
"loss": 2.1035, |
|
"step": 112100 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.1722190591964545e-05, |
|
"loss": 2.101, |
|
"step": 112200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.149137519811654e-05, |
|
"loss": 2.1045, |
|
"step": 112300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.126055980426854e-05, |
|
"loss": 2.1017, |
|
"step": 112400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.102974441042054e-05, |
|
"loss": 2.1007, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.079892901657254e-05, |
|
"loss": 2.1082, |
|
"step": 112600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.056811362272454e-05, |
|
"loss": 2.1044, |
|
"step": 112700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.033729822887654e-05, |
|
"loss": 2.1046, |
|
"step": 112800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.010648283502854e-05, |
|
"loss": 2.1037, |
|
"step": 112900 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9875667441180544e-05, |
|
"loss": 2.101, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.5391526390230897, |
|
"eval_loss": 2.102754831314087, |
|
"eval_runtime": 42.2499, |
|
"eval_samples_per_second": 407.551, |
|
"eval_steps_per_second": 2.438, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9644852047332546e-05, |
|
"loss": 2.1028, |
|
"step": 113100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.941403665348454e-05, |
|
"loss": 2.1023, |
|
"step": 113200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9183221259636536e-05, |
|
"loss": 2.1042, |
|
"step": 113300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.895471401972702e-05, |
|
"loss": 2.1022, |
|
"step": 113400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.872389862587901e-05, |
|
"loss": 2.0968, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.8493083232031014e-05, |
|
"loss": 2.1042, |
|
"step": 113600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.8262267838183016e-05, |
|
"loss": 2.1042, |
|
"step": 113700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.803145244433502e-05, |
|
"loss": 2.1035, |
|
"step": 113800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.780063705048702e-05, |
|
"loss": 2.1005, |
|
"step": 113900 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.756982165663902e-05, |
|
"loss": 2.0995, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.5390808450471074, |
|
"eval_loss": 2.102349042892456, |
|
"eval_runtime": 42.0963, |
|
"eval_samples_per_second": 409.038, |
|
"eval_steps_per_second": 2.447, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.7339006262791016e-05, |
|
"loss": 2.1037, |
|
"step": 114100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.710819086894302e-05, |
|
"loss": 2.1015, |
|
"step": 114200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.687737547509501e-05, |
|
"loss": 2.1039, |
|
"step": 114300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6646560081247014e-05, |
|
"loss": 2.1005, |
|
"step": 114400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6415744687399015e-05, |
|
"loss": 2.1021, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.618492929355101e-05, |
|
"loss": 2.0999, |
|
"step": 114600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.595411389970301e-05, |
|
"loss": 2.104, |
|
"step": 114700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5723298505855013e-05, |
|
"loss": 2.1058, |
|
"step": 114800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5492483112007015e-05, |
|
"loss": 2.0985, |
|
"step": 114900 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.526166771815902e-05, |
|
"loss": 2.1076, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.5391476088719063, |
|
"eval_loss": 2.1017749309539795, |
|
"eval_runtime": 42.7531, |
|
"eval_samples_per_second": 402.755, |
|
"eval_steps_per_second": 2.409, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.503085232431101e-05, |
|
"loss": 2.0992, |
|
"step": 115100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.4802345084401494e-05, |
|
"loss": 2.0993, |
|
"step": 115200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.457152969055349e-05, |
|
"loss": 2.0979, |
|
"step": 115300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.434071429670549e-05, |
|
"loss": 2.1048, |
|
"step": 115400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.410989890285749e-05, |
|
"loss": 2.0993, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.3879083509009494e-05, |
|
"loss": 2.0937, |
|
"step": 115600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.364826811516149e-05, |
|
"loss": 2.107, |
|
"step": 115700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.341745272131349e-05, |
|
"loss": 2.0987, |
|
"step": 115800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.318663732746549e-05, |
|
"loss": 2.1053, |
|
"step": 115900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.295582193361749e-05, |
|
"loss": 2.1011, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.5393575033621988, |
|
"eval_loss": 2.1012144088745117, |
|
"eval_runtime": 42.3952, |
|
"eval_samples_per_second": 406.155, |
|
"eval_steps_per_second": 2.43, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.272500653976949e-05, |
|
"loss": 2.0986, |
|
"step": 116100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.249419114592149e-05, |
|
"loss": 2.1036, |
|
"step": 116200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.2263375752073485e-05, |
|
"loss": 2.0991, |
|
"step": 116300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.203486851216397e-05, |
|
"loss": 2.0999, |
|
"step": 116400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.180405311831597e-05, |
|
"loss": 2.1057, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.157323772446797e-05, |
|
"loss": 2.1005, |
|
"step": 116600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.134242233061997e-05, |
|
"loss": 2.1039, |
|
"step": 116700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.1111606936771967e-05, |
|
"loss": 2.1026, |
|
"step": 116800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.088079154292397e-05, |
|
"loss": 2.1013, |
|
"step": 116900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.064997614907597e-05, |
|
"loss": 2.1006, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.5393684782375082, |
|
"eval_loss": 2.1007778644561768, |
|
"eval_runtime": 42.8091, |
|
"eval_samples_per_second": 402.227, |
|
"eval_steps_per_second": 2.406, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.0419160755227968e-05, |
|
"loss": 2.1035, |
|
"step": 117100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.0188345361379966e-05, |
|
"loss": 2.0997, |
|
"step": 117200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.9957529967531965e-05, |
|
"loss": 2.1021, |
|
"step": 117300 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.9726714573683963e-05, |
|
"loss": 2.0984, |
|
"step": 117400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.9495899179835964e-05, |
|
"loss": 2.1008, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.9267391939926443e-05, |
|
"loss": 2.1041, |
|
"step": 117600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.9036576546078445e-05, |
|
"loss": 2.0963, |
|
"step": 117700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.8805761152230443e-05, |
|
"loss": 2.099, |
|
"step": 117800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.857494575838244e-05, |
|
"loss": 2.1, |
|
"step": 117900 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8344130364534443e-05, |
|
"loss": 2.0955, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.5395468199612862, |
|
"eval_loss": 2.1003952026367188, |
|
"eval_runtime": 42.2852, |
|
"eval_samples_per_second": 407.211, |
|
"eval_steps_per_second": 2.436, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8113314970686445e-05, |
|
"loss": 2.1028, |
|
"step": 118100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.7882499576838443e-05, |
|
"loss": 2.0993, |
|
"step": 118200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.765168418299044e-05, |
|
"loss": 2.105, |
|
"step": 118300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.742086878914244e-05, |
|
"loss": 2.0944, |
|
"step": 118400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.719005339529444e-05, |
|
"loss": 2.0958, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.6959238001446443e-05, |
|
"loss": 2.0976, |
|
"step": 118600 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.673073076153692e-05, |
|
"loss": 2.1035, |
|
"step": 118700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.6499915367688923e-05, |
|
"loss": 2.1029, |
|
"step": 118800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.6269099973840918e-05, |
|
"loss": 2.0995, |
|
"step": 118900 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.603828457999292e-05, |
|
"loss": 2.1007, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.5395856893113403, |
|
"eval_loss": 2.0998544692993164, |
|
"eval_runtime": 42.7797, |
|
"eval_samples_per_second": 402.504, |
|
"eval_steps_per_second": 2.408, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.5807469186144918e-05, |
|
"loss": 2.0989, |
|
"step": 119100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.557665379229692e-05, |
|
"loss": 2.1004, |
|
"step": 119200 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.534583839844892e-05, |
|
"loss": 2.0977, |
|
"step": 119300 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.5115023004600916e-05, |
|
"loss": 2.1038, |
|
"step": 119400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.4886515764691398e-05, |
|
"loss": 2.0975, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.4655700370843396e-05, |
|
"loss": 2.099, |
|
"step": 119600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.4424884976995398e-05, |
|
"loss": 2.0984, |
|
"step": 119700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.4194069583147396e-05, |
|
"loss": 2.1004, |
|
"step": 119800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.3965562343237878e-05, |
|
"loss": 2.1007, |
|
"step": 119900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.3734746949389873e-05, |
|
"loss": 2.1022, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.5396016943378332, |
|
"eval_loss": 2.0994510650634766, |
|
"eval_runtime": 43.6284, |
|
"eval_samples_per_second": 394.674, |
|
"eval_steps_per_second": 2.361, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.3503931555541875e-05, |
|
"loss": 2.0971, |
|
"step": 120100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.3273116161693876e-05, |
|
"loss": 2.0978, |
|
"step": 120200 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.3042300767845875e-05, |
|
"loss": 2.0962, |
|
"step": 120300 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.2811485373997876e-05, |
|
"loss": 2.0991, |
|
"step": 120400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.258066998014987e-05, |
|
"loss": 2.1004, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.2349854586301873e-05, |
|
"loss": 2.1024, |
|
"step": 120600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.2119039192453874e-05, |
|
"loss": 2.0974, |
|
"step": 120700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.1888223798605873e-05, |
|
"loss": 2.1039, |
|
"step": 120800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.1657408404757874e-05, |
|
"loss": 2.0992, |
|
"step": 120900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.142659301090987e-05, |
|
"loss": 2.0978, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.5398605184805468, |
|
"eval_loss": 2.0989837646484375, |
|
"eval_runtime": 42.0243, |
|
"eval_samples_per_second": 409.74, |
|
"eval_steps_per_second": 2.451, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.119577761706187e-05, |
|
"loss": 2.0962, |
|
"step": 121100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.0964962223213872e-05, |
|
"loss": 2.0967, |
|
"step": 121200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.073414682936587e-05, |
|
"loss": 2.0949, |
|
"step": 121300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.0503331435517872e-05, |
|
"loss": 2.0953, |
|
"step": 121400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.0272516041669867e-05, |
|
"loss": 2.0981, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.004170064782187e-05, |
|
"loss": 2.0997, |
|
"step": 121600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.981088525397387e-05, |
|
"loss": 2.1005, |
|
"step": 121700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.958006986012587e-05, |
|
"loss": 2.1015, |
|
"step": 121800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.934925446627787e-05, |
|
"loss": 2.0995, |
|
"step": 121900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.912074722636835e-05, |
|
"loss": 2.0981, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.5399076189870831, |
|
"eval_loss": 2.0984342098236084, |
|
"eval_runtime": 42.5799, |
|
"eval_samples_per_second": 404.393, |
|
"eval_steps_per_second": 2.419, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.8889931832520347e-05, |
|
"loss": 2.1006, |
|
"step": 122100 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.865911643867235e-05, |
|
"loss": 2.1014, |
|
"step": 122200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.8428301044824347e-05, |
|
"loss": 2.1005, |
|
"step": 122300 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.819748565097635e-05, |
|
"loss": 2.0934, |
|
"step": 122400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7966670257128347e-05, |
|
"loss": 2.1023, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.773585486328035e-05, |
|
"loss": 2.0979, |
|
"step": 122600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7505039469432347e-05, |
|
"loss": 2.1003, |
|
"step": 122700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7274224075584345e-05, |
|
"loss": 2.0982, |
|
"step": 122800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7043408681736347e-05, |
|
"loss": 2.0982, |
|
"step": 122900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.6812593287888345e-05, |
|
"loss": 2.0952, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.5399163074300363, |
|
"eval_loss": 2.097956418991089, |
|
"eval_runtime": 42.3798, |
|
"eval_samples_per_second": 406.302, |
|
"eval_steps_per_second": 2.43, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6581777894040347e-05, |
|
"loss": 2.0975, |
|
"step": 123100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6350962500192345e-05, |
|
"loss": 2.0986, |
|
"step": 123200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6120147106344343e-05, |
|
"loss": 2.0992, |
|
"step": 123300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5891639866434825e-05, |
|
"loss": 2.0929, |
|
"step": 123400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5660824472586823e-05, |
|
"loss": 2.0973, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5430009078738822e-05, |
|
"loss": 2.1016, |
|
"step": 123600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5199193684890823e-05, |
|
"loss": 2.0939, |
|
"step": 123700 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.4968378291042822e-05, |
|
"loss": 2.0971, |
|
"step": 123800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.4737562897194823e-05, |
|
"loss": 2.1004, |
|
"step": 123900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.4506747503346821e-05, |
|
"loss": 2.0962, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.5399986189948569, |
|
"eval_loss": 2.0974485874176025, |
|
"eval_runtime": 43.0876, |
|
"eval_samples_per_second": 399.627, |
|
"eval_steps_per_second": 2.39, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.4275932109498821e-05, |
|
"loss": 2.0976, |
|
"step": 124100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.4045116715650821e-05, |
|
"loss": 2.0955, |
|
"step": 124200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.381430132180282e-05, |
|
"loss": 2.0998, |
|
"step": 124300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3583485927954821e-05, |
|
"loss": 2.0929, |
|
"step": 124400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.335267053410682e-05, |
|
"loss": 2.0913, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.31241632941973e-05, |
|
"loss": 2.0959, |
|
"step": 124600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.28933479003493e-05, |
|
"loss": 2.0971, |
|
"step": 124700 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.26625325065013e-05, |
|
"loss": 2.0989, |
|
"step": 124800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2431717112653298e-05, |
|
"loss": 2.0921, |
|
"step": 124900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.22009017188053e-05, |
|
"loss": 2.0993, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.540155010968016, |
|
"eval_loss": 2.0971081256866455, |
|
"eval_runtime": 42.3338, |
|
"eval_samples_per_second": 406.743, |
|
"eval_steps_per_second": 2.433, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1970086324957298e-05, |
|
"loss": 2.0997, |
|
"step": 125100 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1739270931109298e-05, |
|
"loss": 2.0969, |
|
"step": 125200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1508455537261298e-05, |
|
"loss": 2.1005, |
|
"step": 125300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1277640143413296e-05, |
|
"loss": 2.0897, |
|
"step": 125400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1046824749565298e-05, |
|
"loss": 2.097, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0816009355717296e-05, |
|
"loss": 2.0989, |
|
"step": 125600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0585193961869296e-05, |
|
"loss": 2.0949, |
|
"step": 125700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0356686721959776e-05, |
|
"loss": 2.1004, |
|
"step": 125800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0125871328111775e-05, |
|
"loss": 2.0969, |
|
"step": 125900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.895055934263774e-06, |
|
"loss": 2.0982, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.5402135436363329, |
|
"eval_loss": 2.0967490673065186, |
|
"eval_runtime": 42.1939, |
|
"eval_samples_per_second": 408.092, |
|
"eval_steps_per_second": 2.441, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.664240540415776e-06, |
|
"loss": 2.0988, |
|
"step": 126100 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.433425146567774e-06, |
|
"loss": 2.0995, |
|
"step": 126200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.202609752719774e-06, |
|
"loss": 2.0966, |
|
"step": 126300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.971794358871774e-06, |
|
"loss": 2.0942, |
|
"step": 126400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.740978965023772e-06, |
|
"loss": 2.0987, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.510163571175772e-06, |
|
"loss": 2.101, |
|
"step": 126600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.279348177327772e-06, |
|
"loss": 2.0984, |
|
"step": 126700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.050840937418253e-06, |
|
"loss": 2.0983, |
|
"step": 126800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.820025543570251e-06, |
|
"loss": 2.0973, |
|
"step": 126900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.589210149722251e-06, |
|
"loss": 2.0962, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.5402798501746605, |
|
"eval_loss": 2.0963633060455322, |
|
"eval_runtime": 41.8055, |
|
"eval_samples_per_second": 411.884, |
|
"eval_steps_per_second": 2.464, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.358394755874252e-06, |
|
"loss": 2.0962, |
|
"step": 127100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.127579362026251e-06, |
|
"loss": 2.0983, |
|
"step": 127200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.89676396817825e-06, |
|
"loss": 2.0972, |
|
"step": 127300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.66594857433025e-06, |
|
"loss": 2.0964, |
|
"step": 127400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.435133180482249e-06, |
|
"loss": 2.0941, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.20431778663425e-06, |
|
"loss": 2.0946, |
|
"step": 127600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.973502392786249e-06, |
|
"loss": 2.094, |
|
"step": 127700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.742686998938249e-06, |
|
"loss": 2.0904, |
|
"step": 127800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.511871605090248e-06, |
|
"loss": 2.0968, |
|
"step": 127900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.281056211242248e-06, |
|
"loss": 2.0963, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.5404106341054311, |
|
"eval_loss": 2.0959982872009277, |
|
"eval_runtime": 41.6925, |
|
"eval_samples_per_second": 413.0, |
|
"eval_steps_per_second": 2.47, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.050240817394248e-06, |
|
"loss": 2.1005, |
|
"step": 128100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.819425423546248e-06, |
|
"loss": 2.0951, |
|
"step": 128200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.588610029698247e-06, |
|
"loss": 2.1026, |
|
"step": 128300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.357794635850247e-06, |
|
"loss": 2.0978, |
|
"step": 128400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.126979242002246e-06, |
|
"loss": 2.093, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.896163848154246e-06, |
|
"loss": 2.0972, |
|
"step": 128600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.6653484543062457e-06, |
|
"loss": 2.0972, |
|
"step": 128700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.4345330604582452e-06, |
|
"loss": 2.0965, |
|
"step": 128800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.203717666610245e-06, |
|
"loss": 2.0963, |
|
"step": 128900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.9729022727622447e-06, |
|
"loss": 2.0967, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5404284682778089, |
|
"eval_loss": 2.095768928527832, |
|
"eval_runtime": 42.026, |
|
"eval_samples_per_second": 409.723, |
|
"eval_steps_per_second": 2.451, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7420868789142438e-06, |
|
"loss": 2.0965, |
|
"step": 129100 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.5112714850662437e-06, |
|
"loss": 2.0971, |
|
"step": 129200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.280456091218243e-06, |
|
"loss": 2.0966, |
|
"step": 129300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.049640697370243e-06, |
|
"loss": 2.0939, |
|
"step": 129400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.8188253035222427e-06, |
|
"loss": 2.0953, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.5880099096742424e-06, |
|
"loss": 2.094, |
|
"step": 129600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3595026697647222e-06, |
|
"loss": 2.1009, |
|
"step": 129700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.1286872759167217e-06, |
|
"loss": 2.0941, |
|
"step": 129800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.978718820687215e-07, |
|
"loss": 2.0952, |
|
"step": 129900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.67056488220721e-07, |
|
"loss": 2.094, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5404988903943776, |
|
"eval_loss": 2.0955088138580322, |
|
"eval_runtime": 41.8167, |
|
"eval_samples_per_second": 411.773, |
|
"eval_steps_per_second": 2.463, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.362410943727207e-07, |
|
"loss": 2.1005, |
|
"step": 130100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 130174, |
|
"total_flos": 2.2194820888706993e+20, |
|
"train_loss": 2.1287676742806667, |
|
"train_runtime": 185005.5119, |
|
"train_samples_per_second": 236.418, |
|
"train_steps_per_second": 0.704 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 130174, |
|
"num_train_epochs": 1, |
|
"save_steps": 13018, |
|
"total_flos": 2.2194820888706993e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|