ncbateman commited on
Commit
45cccea
1 Parent(s): 721194a

Training in progress, step 260, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6181c92989cc12c5c4bf8ebe5c826179e6c49d9a8f6fdcdac17fbc54e6be7263
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d78fd5c628f31fe6f81b7e9bbd7adcf7dcd7e2069fba87a4287bb9992aa32940
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:201e9ee2aad5fb6422cb6bd1f1a23c9fc2c463c94ca6b58351cd891758b9db65
3
- size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02b2f2518671db7884993614fc2de098c0047accad5c9b663a3acf6ae07fa80d
3
+ size 23159546
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6398556d22fe95a7e5e6c7da07ebca8075991f502dbf137933551966eb78dbd
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:390b86d1750c11659578b906a56e7ab9fdb42bccc921eb4cc727a15d8557be03
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d37fad6be6ffe60fea334ab6bc75d4ae6805a5c373142ff91d27c556b05e854c
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42476027bd2bfd54d2018c7efa4a234360e0e33427747c62bc385bfb49affb98
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2f6dcce6785392487d83864c0be888500239b6ce81b1cd85adb6f30290e683a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb93d8c2ec6d70ccb139d9861604471242441ca47e1ff57cc12b015fe36fd3bd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.2911694510739857,
5
  "eval_steps": 52,
6
- "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1727,6 +1727,154 @@
1727
  "learning_rate": 6.481377904428171e-05,
1728
  "loss": 0.6306,
1729
  "step": 240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1730
  }
1731
  ],
1732
  "logging_steps": 1,
@@ -1746,7 +1894,7 @@
1746
  "attributes": {}
1747
  }
1748
  },
1749
- "total_flos": 1.8580273335631872e+17,
1750
  "train_batch_size": 4,
1751
  "trial_name": null,
1752
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.4821002386634845,
5
  "eval_steps": 52,
6
+ "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1727
  "learning_rate": 6.481377904428171e-05,
1728
  "loss": 0.6306,
1729
  "step": 240
1730
+ },
1731
+ {
1732
+ "epoch": 2.300715990453461,
1733
+ "grad_norm": 0.40222805738449097,
1734
+ "learning_rate": 6.449424378870123e-05,
1735
+ "loss": 0.6857,
1736
+ "step": 241
1737
+ },
1738
+ {
1739
+ "epoch": 2.3102625298329356,
1740
+ "grad_norm": 0.4215107858181,
1741
+ "learning_rate": 6.41740609463409e-05,
1742
+ "loss": 0.7309,
1743
+ "step": 242
1744
+ },
1745
+ {
1746
+ "epoch": 2.3198090692124103,
1747
+ "grad_norm": 0.4149893820285797,
1748
+ "learning_rate": 6.385324482261597e-05,
1749
+ "loss": 0.6562,
1750
+ "step": 243
1751
+ },
1752
+ {
1753
+ "epoch": 2.3293556085918854,
1754
+ "grad_norm": 0.4119661748409271,
1755
+ "learning_rate": 6.353180975123595e-05,
1756
+ "loss": 0.7544,
1757
+ "step": 244
1758
+ },
1759
+ {
1760
+ "epoch": 2.3389021479713605,
1761
+ "grad_norm": 0.32324427366256714,
1762
+ "learning_rate": 6.320977009356431e-05,
1763
+ "loss": 0.5994,
1764
+ "step": 245
1765
+ },
1766
+ {
1767
+ "epoch": 2.3484486873508352,
1768
+ "grad_norm": 0.4508344531059265,
1769
+ "learning_rate": 6.288714023797672e-05,
1770
+ "loss": 0.7047,
1771
+ "step": 246
1772
+ },
1773
+ {
1774
+ "epoch": 2.3579952267303104,
1775
+ "grad_norm": 0.3957417905330658,
1776
+ "learning_rate": 6.256393459921824e-05,
1777
+ "loss": 0.6364,
1778
+ "step": 247
1779
+ },
1780
+ {
1781
+ "epoch": 2.367541766109785,
1782
+ "grad_norm": 0.4180348813533783,
1783
+ "learning_rate": 6.224016761775933e-05,
1784
+ "loss": 0.6511,
1785
+ "step": 248
1786
+ },
1787
+ {
1788
+ "epoch": 2.37708830548926,
1789
+ "grad_norm": 0.46107926964759827,
1790
+ "learning_rate": 6.191585375915055e-05,
1791
+ "loss": 0.6736,
1792
+ "step": 249
1793
+ },
1794
+ {
1795
+ "epoch": 2.386634844868735,
1796
+ "grad_norm": 0.43949881196022034,
1797
+ "learning_rate": 6.159100751337642e-05,
1798
+ "loss": 0.6639,
1799
+ "step": 250
1800
+ },
1801
+ {
1802
+ "epoch": 2.39618138424821,
1803
+ "grad_norm": 0.4427139461040497,
1804
+ "learning_rate": 6.126564339420784e-05,
1805
+ "loss": 0.6581,
1806
+ "step": 251
1807
+ },
1808
+ {
1809
+ "epoch": 2.405727923627685,
1810
+ "grad_norm": 0.4241901636123657,
1811
+ "learning_rate": 6.093977593855375e-05,
1812
+ "loss": 0.6738,
1813
+ "step": 252
1814
+ },
1815
+ {
1816
+ "epoch": 2.41527446300716,
1817
+ "grad_norm": 0.41828441619873047,
1818
+ "learning_rate": 6.061341970581165e-05,
1819
+ "loss": 0.685,
1820
+ "step": 253
1821
+ },
1822
+ {
1823
+ "epoch": 2.424821002386635,
1824
+ "grad_norm": 0.4712134599685669,
1825
+ "learning_rate": 6.028658927721697e-05,
1826
+ "loss": 0.6853,
1827
+ "step": 254
1828
+ },
1829
+ {
1830
+ "epoch": 2.4343675417661097,
1831
+ "grad_norm": 0.47678568959236145,
1832
+ "learning_rate": 5.99592992551918e-05,
1833
+ "loss": 0.673,
1834
+ "step": 255
1835
+ },
1836
+ {
1837
+ "epoch": 2.443914081145585,
1838
+ "grad_norm": 0.46318480372428894,
1839
+ "learning_rate": 5.9631564262692274e-05,
1840
+ "loss": 0.688,
1841
+ "step": 256
1842
+ },
1843
+ {
1844
+ "epoch": 2.4534606205250595,
1845
+ "grad_norm": 0.4256531000137329,
1846
+ "learning_rate": 5.930339894255532e-05,
1847
+ "loss": 0.6521,
1848
+ "step": 257
1849
+ },
1850
+ {
1851
+ "epoch": 2.4630071599045347,
1852
+ "grad_norm": 0.39636510610580444,
1853
+ "learning_rate": 5.897481795684446e-05,
1854
+ "loss": 0.6713,
1855
+ "step": 258
1856
+ },
1857
+ {
1858
+ "epoch": 2.4725536992840094,
1859
+ "grad_norm": 0.497344434261322,
1860
+ "learning_rate": 5.8645835986194676e-05,
1861
+ "loss": 0.7745,
1862
+ "step": 259
1863
+ },
1864
+ {
1865
+ "epoch": 2.4821002386634845,
1866
+ "grad_norm": 0.4814034104347229,
1867
+ "learning_rate": 5.831646772915651e-05,
1868
+ "loss": 0.6849,
1869
+ "step": 260
1870
+ },
1871
+ {
1872
+ "epoch": 2.4821002386634845,
1873
+ "eval_loss": 0.7669724225997925,
1874
+ "eval_runtime": 13.0227,
1875
+ "eval_samples_per_second": 13.592,
1876
+ "eval_steps_per_second": 1.766,
1877
+ "step": 260
1878
  }
1879
  ],
1880
  "logging_steps": 1,
 
1894
  "attributes": {}
1895
  }
1896
  },
1897
+ "total_flos": 2.0128629446934528e+17,
1898
  "train_batch_size": 4,
1899
  "trial_name": null,
1900
  "trial_params": null