|
{ |
|
"best_metric": 0.7272727272727273, |
|
"best_model_checkpoint": "deit-base-distilled-patch16-224-hasta-85-fold1/checkpoint-3", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.09090909090909091, |
|
"eval_loss": 1.2771788835525513, |
|
"eval_runtime": 0.1734, |
|
"eval_samples_per_second": 63.434, |
|
"eval_steps_per_second": 5.767, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.18181818181818182, |
|
"eval_loss": 1.1447755098342896, |
|
"eval_runtime": 0.1646, |
|
"eval_samples_per_second": 66.839, |
|
"eval_steps_per_second": 6.076, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 0.9744274020195007, |
|
"eval_runtime": 0.1717, |
|
"eval_samples_per_second": 64.083, |
|
"eval_steps_per_second": 5.826, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 0.9233787655830383, |
|
"eval_runtime": 0.172, |
|
"eval_samples_per_second": 63.969, |
|
"eval_steps_per_second": 5.815, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.0760316848754883, |
|
"eval_runtime": 0.1729, |
|
"eval_samples_per_second": 63.616, |
|
"eval_steps_per_second": 5.783, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.3222259283065796, |
|
"eval_runtime": 0.1683, |
|
"eval_samples_per_second": 65.343, |
|
"eval_steps_per_second": 5.94, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5247870683670044, |
|
"eval_runtime": 0.1317, |
|
"eval_samples_per_second": 83.521, |
|
"eval_steps_per_second": 7.593, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6138737201690674, |
|
"eval_runtime": 0.1738, |
|
"eval_samples_per_second": 63.299, |
|
"eval_steps_per_second": 5.754, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.671146035194397, |
|
"eval_runtime": 0.1801, |
|
"eval_samples_per_second": 61.094, |
|
"eval_steps_per_second": 5.554, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.1924768686294556, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3554, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7354072332382202, |
|
"eval_runtime": 0.1604, |
|
"eval_samples_per_second": 68.592, |
|
"eval_steps_per_second": 6.236, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6721187829971313, |
|
"eval_runtime": 0.1745, |
|
"eval_samples_per_second": 63.042, |
|
"eval_steps_per_second": 5.731, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5987907648086548, |
|
"eval_runtime": 0.1742, |
|
"eval_samples_per_second": 63.135, |
|
"eval_steps_per_second": 5.74, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5959930419921875, |
|
"eval_runtime": 0.1674, |
|
"eval_samples_per_second": 65.72, |
|
"eval_steps_per_second": 5.975, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.583227515220642, |
|
"eval_runtime": 0.1711, |
|
"eval_samples_per_second": 64.284, |
|
"eval_steps_per_second": 5.844, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.568271517753601, |
|
"eval_runtime": 0.17, |
|
"eval_samples_per_second": 64.702, |
|
"eval_steps_per_second": 5.882, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5774298906326294, |
|
"eval_runtime": 0.1701, |
|
"eval_samples_per_second": 64.672, |
|
"eval_steps_per_second": 5.879, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6467945575714111, |
|
"eval_runtime": 0.1692, |
|
"eval_samples_per_second": 65.014, |
|
"eval_steps_per_second": 5.91, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7090667486190796, |
|
"eval_runtime": 0.1688, |
|
"eval_samples_per_second": 65.178, |
|
"eval_steps_per_second": 5.925, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7276203632354736, |
|
"eval_runtime": 0.1682, |
|
"eval_samples_per_second": 65.379, |
|
"eval_steps_per_second": 5.944, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.0401579141616821, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.1335, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7052208185195923, |
|
"eval_runtime": 0.1669, |
|
"eval_samples_per_second": 65.92, |
|
"eval_steps_per_second": 5.993, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.642613172531128, |
|
"eval_runtime": 0.1706, |
|
"eval_samples_per_second": 64.477, |
|
"eval_steps_per_second": 5.862, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5315605401992798, |
|
"eval_runtime": 0.1679, |
|
"eval_samples_per_second": 65.515, |
|
"eval_steps_per_second": 5.956, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.401681661605835, |
|
"eval_runtime": 0.2092, |
|
"eval_samples_per_second": 52.574, |
|
"eval_steps_per_second": 4.779, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.300853967666626, |
|
"eval_runtime": 0.1638, |
|
"eval_samples_per_second": 67.161, |
|
"eval_steps_per_second": 6.106, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.2863625288009644, |
|
"eval_runtime": 0.1692, |
|
"eval_samples_per_second": 64.994, |
|
"eval_steps_per_second": 5.909, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.393377423286438, |
|
"eval_runtime": 0.1719, |
|
"eval_samples_per_second": 64.001, |
|
"eval_steps_per_second": 5.818, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.4435607194900513, |
|
"eval_runtime": 0.171, |
|
"eval_samples_per_second": 64.321, |
|
"eval_steps_per_second": 5.847, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5751649141311646, |
|
"eval_runtime": 0.1734, |
|
"eval_samples_per_second": 63.455, |
|
"eval_steps_per_second": 5.769, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6211150884628296, |
|
"eval_runtime": 0.1701, |
|
"eval_samples_per_second": 64.665, |
|
"eval_steps_per_second": 5.879, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 1.1806550025939941, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.0769, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5944229364395142, |
|
"eval_runtime": 0.1629, |
|
"eval_samples_per_second": 67.545, |
|
"eval_steps_per_second": 6.14, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5282962322235107, |
|
"eval_runtime": 0.1679, |
|
"eval_samples_per_second": 65.5, |
|
"eval_steps_per_second": 5.955, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.4341241121292114, |
|
"eval_runtime": 0.1709, |
|
"eval_samples_per_second": 64.372, |
|
"eval_steps_per_second": 5.852, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.451222538948059, |
|
"eval_runtime": 0.1774, |
|
"eval_samples_per_second": 62.024, |
|
"eval_steps_per_second": 5.639, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.4979816675186157, |
|
"eval_runtime": 0.1698, |
|
"eval_samples_per_second": 64.797, |
|
"eval_steps_per_second": 5.891, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5802719593048096, |
|
"eval_runtime": 0.1718, |
|
"eval_samples_per_second": 64.014, |
|
"eval_steps_per_second": 5.819, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7675877809524536, |
|
"eval_runtime": 0.1737, |
|
"eval_samples_per_second": 63.319, |
|
"eval_steps_per_second": 5.756, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.8581143617630005, |
|
"eval_runtime": 0.1763, |
|
"eval_samples_per_second": 62.379, |
|
"eval_steps_per_second": 5.671, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.8816009759902954, |
|
"eval_runtime": 0.1739, |
|
"eval_samples_per_second": 63.25, |
|
"eval_steps_per_second": 5.75, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.8316550254821777, |
|
"eval_runtime": 0.1703, |
|
"eval_samples_per_second": 64.601, |
|
"eval_steps_per_second": 5.873, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 0.9434149861335754, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0505, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7445045709609985, |
|
"eval_runtime": 0.1655, |
|
"eval_samples_per_second": 66.473, |
|
"eval_steps_per_second": 6.043, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6965172290802002, |
|
"eval_runtime": 0.1727, |
|
"eval_samples_per_second": 63.702, |
|
"eval_steps_per_second": 5.791, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.721003770828247, |
|
"eval_runtime": 0.1728, |
|
"eval_samples_per_second": 63.673, |
|
"eval_steps_per_second": 5.788, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6903266906738281, |
|
"eval_runtime": 0.1704, |
|
"eval_samples_per_second": 64.544, |
|
"eval_steps_per_second": 5.868, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.694361925125122, |
|
"eval_runtime": 0.1732, |
|
"eval_samples_per_second": 63.528, |
|
"eval_steps_per_second": 5.775, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6923259496688843, |
|
"eval_runtime": 0.1663, |
|
"eval_samples_per_second": 66.135, |
|
"eval_steps_per_second": 6.012, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7470028400421143, |
|
"eval_runtime": 0.1683, |
|
"eval_samples_per_second": 65.348, |
|
"eval_steps_per_second": 5.941, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7501949071884155, |
|
"eval_runtime": 0.1672, |
|
"eval_samples_per_second": 65.8, |
|
"eval_steps_per_second": 5.982, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7738779783248901, |
|
"eval_runtime": 0.174, |
|
"eval_samples_per_second": 63.224, |
|
"eval_steps_per_second": 5.748, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7819253206253052, |
|
"eval_runtime": 0.1701, |
|
"eval_samples_per_second": 64.658, |
|
"eval_steps_per_second": 5.878, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 0.3251829743385315, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0255, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.8199766874313354, |
|
"eval_runtime": 0.1617, |
|
"eval_samples_per_second": 68.041, |
|
"eval_steps_per_second": 6.186, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.8121682405471802, |
|
"eval_runtime": 0.1715, |
|
"eval_samples_per_second": 64.129, |
|
"eval_steps_per_second": 5.83, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7939274311065674, |
|
"eval_runtime": 0.1789, |
|
"eval_samples_per_second": 61.501, |
|
"eval_steps_per_second": 5.591, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7736003398895264, |
|
"eval_runtime": 0.1717, |
|
"eval_samples_per_second": 64.058, |
|
"eval_steps_per_second": 5.823, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7411243915557861, |
|
"eval_runtime": 0.17, |
|
"eval_samples_per_second": 64.709, |
|
"eval_steps_per_second": 5.883, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6772898435592651, |
|
"eval_runtime": 0.1718, |
|
"eval_samples_per_second": 64.013, |
|
"eval_steps_per_second": 5.819, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6555858850479126, |
|
"eval_runtime": 0.172, |
|
"eval_samples_per_second": 63.951, |
|
"eval_steps_per_second": 5.814, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6766685247421265, |
|
"eval_runtime": 0.1687, |
|
"eval_samples_per_second": 65.223, |
|
"eval_steps_per_second": 5.929, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6623153686523438, |
|
"eval_runtime": 0.1813, |
|
"eval_samples_per_second": 60.676, |
|
"eval_steps_per_second": 5.516, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6553212404251099, |
|
"eval_runtime": 0.1694, |
|
"eval_samples_per_second": 64.947, |
|
"eval_steps_per_second": 5.904, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 0.4616081416606903, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0227, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6682250499725342, |
|
"eval_runtime": 0.1787, |
|
"eval_samples_per_second": 61.567, |
|
"eval_steps_per_second": 5.597, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.62090265750885, |
|
"eval_runtime": 0.1815, |
|
"eval_samples_per_second": 60.623, |
|
"eval_steps_per_second": 5.511, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6187800168991089, |
|
"eval_runtime": 0.1699, |
|
"eval_samples_per_second": 64.734, |
|
"eval_steps_per_second": 5.885, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6919440031051636, |
|
"eval_runtime": 0.1718, |
|
"eval_samples_per_second": 64.021, |
|
"eval_steps_per_second": 5.82, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7957440614700317, |
|
"eval_runtime": 0.171, |
|
"eval_samples_per_second": 64.31, |
|
"eval_steps_per_second": 5.846, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.8749808073043823, |
|
"eval_runtime": 0.1689, |
|
"eval_samples_per_second": 65.119, |
|
"eval_steps_per_second": 5.92, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.9156414270401, |
|
"eval_runtime": 0.1703, |
|
"eval_samples_per_second": 64.59, |
|
"eval_steps_per_second": 5.872, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.9163463115692139, |
|
"eval_runtime": 0.1741, |
|
"eval_samples_per_second": 63.174, |
|
"eval_steps_per_second": 5.743, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.8968899250030518, |
|
"eval_runtime": 0.1686, |
|
"eval_samples_per_second": 65.251, |
|
"eval_steps_per_second": 5.932, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.881422519683838, |
|
"eval_runtime": 0.171, |
|
"eval_samples_per_second": 64.319, |
|
"eval_steps_per_second": 5.847, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 0.3331330716609955, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0185, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.8714560270309448, |
|
"eval_runtime": 0.1697, |
|
"eval_samples_per_second": 64.821, |
|
"eval_steps_per_second": 5.893, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.8891681432724, |
|
"eval_runtime": 0.1694, |
|
"eval_samples_per_second": 64.943, |
|
"eval_steps_per_second": 5.904, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.9382548332214355, |
|
"eval_runtime": 0.1738, |
|
"eval_samples_per_second": 63.278, |
|
"eval_steps_per_second": 5.753, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.9627383947372437, |
|
"eval_runtime": 0.1697, |
|
"eval_samples_per_second": 64.822, |
|
"eval_steps_per_second": 5.893, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.0154221057891846, |
|
"eval_runtime": 0.1705, |
|
"eval_samples_per_second": 64.527, |
|
"eval_steps_per_second": 5.866, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.032640218734741, |
|
"eval_runtime": 0.1767, |
|
"eval_samples_per_second": 62.244, |
|
"eval_steps_per_second": 5.659, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.0424911975860596, |
|
"eval_runtime": 0.1711, |
|
"eval_samples_per_second": 64.272, |
|
"eval_steps_per_second": 5.843, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.0586304664611816, |
|
"eval_runtime": 0.177, |
|
"eval_samples_per_second": 62.132, |
|
"eval_steps_per_second": 5.648, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.058166265487671, |
|
"eval_runtime": 0.1793, |
|
"eval_samples_per_second": 61.357, |
|
"eval_steps_per_second": 5.578, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.086315870285034, |
|
"eval_runtime": 0.1722, |
|
"eval_samples_per_second": 63.888, |
|
"eval_steps_per_second": 5.808, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 0.5901564955711365, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0246, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.123337507247925, |
|
"eval_runtime": 0.168, |
|
"eval_samples_per_second": 65.467, |
|
"eval_steps_per_second": 5.952, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1527483463287354, |
|
"eval_runtime": 0.1714, |
|
"eval_samples_per_second": 64.176, |
|
"eval_steps_per_second": 5.834, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.176025152206421, |
|
"eval_runtime": 0.177, |
|
"eval_samples_per_second": 62.154, |
|
"eval_steps_per_second": 5.65, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.190654993057251, |
|
"eval_runtime": 0.1738, |
|
"eval_samples_per_second": 63.301, |
|
"eval_steps_per_second": 5.755, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1858766078948975, |
|
"eval_runtime": 0.179, |
|
"eval_samples_per_second": 61.465, |
|
"eval_steps_per_second": 5.588, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.165440320968628, |
|
"eval_runtime": 0.1802, |
|
"eval_samples_per_second": 61.042, |
|
"eval_steps_per_second": 5.549, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1478655338287354, |
|
"eval_runtime": 0.1681, |
|
"eval_samples_per_second": 65.441, |
|
"eval_steps_per_second": 5.949, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1194212436676025, |
|
"eval_runtime": 0.1706, |
|
"eval_samples_per_second": 64.486, |
|
"eval_steps_per_second": 5.862, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1058995723724365, |
|
"eval_runtime": 0.173, |
|
"eval_samples_per_second": 63.597, |
|
"eval_steps_per_second": 5.782, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.103247880935669, |
|
"eval_runtime": 0.1721, |
|
"eval_samples_per_second": 63.931, |
|
"eval_steps_per_second": 5.812, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 1.2021247148513794, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0228, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.0998663902282715, |
|
"eval_runtime": 0.1654, |
|
"eval_samples_per_second": 66.509, |
|
"eval_steps_per_second": 6.046, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1037116050720215, |
|
"eval_runtime": 0.1718, |
|
"eval_samples_per_second": 64.033, |
|
"eval_steps_per_second": 5.821, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1025755405426025, |
|
"eval_runtime": 0.175, |
|
"eval_samples_per_second": 62.874, |
|
"eval_steps_per_second": 5.716, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1132094860076904, |
|
"eval_runtime": 0.1668, |
|
"eval_samples_per_second": 65.929, |
|
"eval_steps_per_second": 5.994, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1301839351654053, |
|
"eval_runtime": 0.171, |
|
"eval_samples_per_second": 64.331, |
|
"eval_steps_per_second": 5.848, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1452560424804688, |
|
"eval_runtime": 0.1758, |
|
"eval_samples_per_second": 62.58, |
|
"eval_steps_per_second": 5.689, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.163395643234253, |
|
"eval_runtime": 0.1756, |
|
"eval_samples_per_second": 62.64, |
|
"eval_steps_per_second": 5.695, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1761701107025146, |
|
"eval_runtime": 0.1744, |
|
"eval_samples_per_second": 63.075, |
|
"eval_steps_per_second": 5.734, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1859476566314697, |
|
"eval_runtime": 0.1746, |
|
"eval_samples_per_second": 63.0, |
|
"eval_steps_per_second": 5.727, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1915647983551025, |
|
"eval_runtime": 0.1687, |
|
"eval_samples_per_second": 65.197, |
|
"eval_steps_per_second": 5.927, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 1.034210205078125, |
|
"learning_rate": 0.0, |
|
"loss": 0.0142, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.193308115005493, |
|
"eval_runtime": 0.1657, |
|
"eval_samples_per_second": 66.395, |
|
"eval_steps_per_second": 6.036, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 100, |
|
"total_flos": 4.572150213593088e+17, |
|
"train_loss": 0.07446861431002617, |
|
"train_runtime": 420.0976, |
|
"train_samples_per_second": 14.044, |
|
"train_steps_per_second": 0.238 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 0.9744274020195007, |
|
"eval_runtime": 0.2198, |
|
"eval_samples_per_second": 50.054, |
|
"eval_steps_per_second": 4.55, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.572150213593088e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|