Training in progress, step 260, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 45118424
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d78fd5c628f31fe6f81b7e9bbd7adcf7dcd7e2069fba87a4287bb9992aa32940
|
3 |
size 45118424
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02b2f2518671db7884993614fc2de098c0047accad5c9b663a3acf6ae07fa80d
|
3 |
+
size 23159546
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:390b86d1750c11659578b906a56e7ab9fdb42bccc921eb4cc727a15d8557be03
|
3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42476027bd2bfd54d2018c7efa4a234360e0e33427747c62bc385bfb49affb98
|
3 |
size 14512
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb93d8c2ec6d70ccb139d9861604471242441ca47e1ff57cc12b015fe36fd3bd
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 52,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1727,6 +1727,154 @@
|
|
1727 |
"learning_rate": 6.481377904428171e-05,
|
1728 |
"loss": 0.6306,
|
1729 |
"step": 240
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1730 |
}
|
1731 |
],
|
1732 |
"logging_steps": 1,
|
@@ -1746,7 +1894,7 @@
|
|
1746 |
"attributes": {}
|
1747 |
}
|
1748 |
},
|
1749 |
-
"total_flos":
|
1750 |
"train_batch_size": 4,
|
1751 |
"trial_name": null,
|
1752 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.4821002386634845,
|
5 |
"eval_steps": 52,
|
6 |
+
"global_step": 260,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1727 |
"learning_rate": 6.481377904428171e-05,
|
1728 |
"loss": 0.6306,
|
1729 |
"step": 240
|
1730 |
+
},
|
1731 |
+
{
|
1732 |
+
"epoch": 2.300715990453461,
|
1733 |
+
"grad_norm": 0.40222805738449097,
|
1734 |
+
"learning_rate": 6.449424378870123e-05,
|
1735 |
+
"loss": 0.6857,
|
1736 |
+
"step": 241
|
1737 |
+
},
|
1738 |
+
{
|
1739 |
+
"epoch": 2.3102625298329356,
|
1740 |
+
"grad_norm": 0.4215107858181,
|
1741 |
+
"learning_rate": 6.41740609463409e-05,
|
1742 |
+
"loss": 0.7309,
|
1743 |
+
"step": 242
|
1744 |
+
},
|
1745 |
+
{
|
1746 |
+
"epoch": 2.3198090692124103,
|
1747 |
+
"grad_norm": 0.4149893820285797,
|
1748 |
+
"learning_rate": 6.385324482261597e-05,
|
1749 |
+
"loss": 0.6562,
|
1750 |
+
"step": 243
|
1751 |
+
},
|
1752 |
+
{
|
1753 |
+
"epoch": 2.3293556085918854,
|
1754 |
+
"grad_norm": 0.4119661748409271,
|
1755 |
+
"learning_rate": 6.353180975123595e-05,
|
1756 |
+
"loss": 0.7544,
|
1757 |
+
"step": 244
|
1758 |
+
},
|
1759 |
+
{
|
1760 |
+
"epoch": 2.3389021479713605,
|
1761 |
+
"grad_norm": 0.32324427366256714,
|
1762 |
+
"learning_rate": 6.320977009356431e-05,
|
1763 |
+
"loss": 0.5994,
|
1764 |
+
"step": 245
|
1765 |
+
},
|
1766 |
+
{
|
1767 |
+
"epoch": 2.3484486873508352,
|
1768 |
+
"grad_norm": 0.4508344531059265,
|
1769 |
+
"learning_rate": 6.288714023797672e-05,
|
1770 |
+
"loss": 0.7047,
|
1771 |
+
"step": 246
|
1772 |
+
},
|
1773 |
+
{
|
1774 |
+
"epoch": 2.3579952267303104,
|
1775 |
+
"grad_norm": 0.3957417905330658,
|
1776 |
+
"learning_rate": 6.256393459921824e-05,
|
1777 |
+
"loss": 0.6364,
|
1778 |
+
"step": 247
|
1779 |
+
},
|
1780 |
+
{
|
1781 |
+
"epoch": 2.367541766109785,
|
1782 |
+
"grad_norm": 0.4180348813533783,
|
1783 |
+
"learning_rate": 6.224016761775933e-05,
|
1784 |
+
"loss": 0.6511,
|
1785 |
+
"step": 248
|
1786 |
+
},
|
1787 |
+
{
|
1788 |
+
"epoch": 2.37708830548926,
|
1789 |
+
"grad_norm": 0.46107926964759827,
|
1790 |
+
"learning_rate": 6.191585375915055e-05,
|
1791 |
+
"loss": 0.6736,
|
1792 |
+
"step": 249
|
1793 |
+
},
|
1794 |
+
{
|
1795 |
+
"epoch": 2.386634844868735,
|
1796 |
+
"grad_norm": 0.43949881196022034,
|
1797 |
+
"learning_rate": 6.159100751337642e-05,
|
1798 |
+
"loss": 0.6639,
|
1799 |
+
"step": 250
|
1800 |
+
},
|
1801 |
+
{
|
1802 |
+
"epoch": 2.39618138424821,
|
1803 |
+
"grad_norm": 0.4427139461040497,
|
1804 |
+
"learning_rate": 6.126564339420784e-05,
|
1805 |
+
"loss": 0.6581,
|
1806 |
+
"step": 251
|
1807 |
+
},
|
1808 |
+
{
|
1809 |
+
"epoch": 2.405727923627685,
|
1810 |
+
"grad_norm": 0.4241901636123657,
|
1811 |
+
"learning_rate": 6.093977593855375e-05,
|
1812 |
+
"loss": 0.6738,
|
1813 |
+
"step": 252
|
1814 |
+
},
|
1815 |
+
{
|
1816 |
+
"epoch": 2.41527446300716,
|
1817 |
+
"grad_norm": 0.41828441619873047,
|
1818 |
+
"learning_rate": 6.061341970581165e-05,
|
1819 |
+
"loss": 0.685,
|
1820 |
+
"step": 253
|
1821 |
+
},
|
1822 |
+
{
|
1823 |
+
"epoch": 2.424821002386635,
|
1824 |
+
"grad_norm": 0.4712134599685669,
|
1825 |
+
"learning_rate": 6.028658927721697e-05,
|
1826 |
+
"loss": 0.6853,
|
1827 |
+
"step": 254
|
1828 |
+
},
|
1829 |
+
{
|
1830 |
+
"epoch": 2.4343675417661097,
|
1831 |
+
"grad_norm": 0.47678568959236145,
|
1832 |
+
"learning_rate": 5.99592992551918e-05,
|
1833 |
+
"loss": 0.673,
|
1834 |
+
"step": 255
|
1835 |
+
},
|
1836 |
+
{
|
1837 |
+
"epoch": 2.443914081145585,
|
1838 |
+
"grad_norm": 0.46318480372428894,
|
1839 |
+
"learning_rate": 5.9631564262692274e-05,
|
1840 |
+
"loss": 0.688,
|
1841 |
+
"step": 256
|
1842 |
+
},
|
1843 |
+
{
|
1844 |
+
"epoch": 2.4534606205250595,
|
1845 |
+
"grad_norm": 0.4256531000137329,
|
1846 |
+
"learning_rate": 5.930339894255532e-05,
|
1847 |
+
"loss": 0.6521,
|
1848 |
+
"step": 257
|
1849 |
+
},
|
1850 |
+
{
|
1851 |
+
"epoch": 2.4630071599045347,
|
1852 |
+
"grad_norm": 0.39636510610580444,
|
1853 |
+
"learning_rate": 5.897481795684446e-05,
|
1854 |
+
"loss": 0.6713,
|
1855 |
+
"step": 258
|
1856 |
+
},
|
1857 |
+
{
|
1858 |
+
"epoch": 2.4725536992840094,
|
1859 |
+
"grad_norm": 0.497344434261322,
|
1860 |
+
"learning_rate": 5.8645835986194676e-05,
|
1861 |
+
"loss": 0.7745,
|
1862 |
+
"step": 259
|
1863 |
+
},
|
1864 |
+
{
|
1865 |
+
"epoch": 2.4821002386634845,
|
1866 |
+
"grad_norm": 0.4814034104347229,
|
1867 |
+
"learning_rate": 5.831646772915651e-05,
|
1868 |
+
"loss": 0.6849,
|
1869 |
+
"step": 260
|
1870 |
+
},
|
1871 |
+
{
|
1872 |
+
"epoch": 2.4821002386634845,
|
1873 |
+
"eval_loss": 0.7669724225997925,
|
1874 |
+
"eval_runtime": 13.0227,
|
1875 |
+
"eval_samples_per_second": 13.592,
|
1876 |
+
"eval_steps_per_second": 1.766,
|
1877 |
+
"step": 260
|
1878 |
}
|
1879 |
],
|
1880 |
"logging_steps": 1,
|
|
|
1894 |
"attributes": {}
|
1895 |
}
|
1896 |
},
|
1897 |
+
"total_flos": 2.0128629446934528e+17,
|
1898 |
"train_batch_size": 4,
|
1899 |
"trial_name": null,
|
1900 |
"trial_params": null
|