Training in progress, step 1850000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +2 -2
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +383 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfb68686d566f5019ee82a8c96b0a5544a86168b18ae5533f028d07705e256d7
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64dd9bb8ac07ad494b77a3974d9a13d4d5d6c9061220ee2632308b55b6ccca8c
|
3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8093364ac983f02083f889c77722892d297eae3bcec837969f1e20972859470
|
3 |
+
size 21579
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04b4f57439a544d0a66cc7c8aa509e6d07139998cf7951a6fd1fc7884297b3c7
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:203a36bdaa16f61697b76694bf2a74dc1a746df9c496ed1bca73de3ffd507a20
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -13686,11 +13686,391 @@
|
|
13686 |
"eval_samples_per_second": 83.142,
|
13687 |
"eval_steps_per_second": 0.65,
|
13688 |
"step": 1800000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13689 |
}
|
13690 |
],
|
13691 |
"max_steps": 2000000,
|
13692 |
"num_train_epochs": 9223372036854775807,
|
13693 |
-
"total_flos": 1.
|
13694 |
"trial_name": null,
|
13695 |
"trial_params": null
|
13696 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.025,
|
5 |
+
"global_step": 1850000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
13686 |
"eval_samples_per_second": 83.142,
|
13687 |
"eval_steps_per_second": 0.65,
|
13688 |
"step": 1800000
|
13689 |
+
},
|
13690 |
+
{
|
13691 |
+
"epoch": 0.0,
|
13692 |
+
"learning_rate": 1.4208113677502687e-05,
|
13693 |
+
"loss": 0.4365,
|
13694 |
+
"step": 1801000
|
13695 |
+
},
|
13696 |
+
{
|
13697 |
+
"epoch": 0.0,
|
13698 |
+
"learning_rate": 1.4166492588365344e-05,
|
13699 |
+
"loss": 0.4384,
|
13700 |
+
"step": 1802000
|
13701 |
+
},
|
13702 |
+
{
|
13703 |
+
"epoch": 0.0,
|
13704 |
+
"learning_rate": 1.4125072039508715e-05,
|
13705 |
+
"loss": 0.4379,
|
13706 |
+
"step": 1803000
|
13707 |
+
},
|
13708 |
+
{
|
13709 |
+
"epoch": 0.0,
|
13710 |
+
"learning_rate": 1.4083852157106983e-05,
|
13711 |
+
"loss": 0.4377,
|
13712 |
+
"step": 1804000
|
13713 |
+
},
|
13714 |
+
{
|
13715 |
+
"epoch": 0.0,
|
13716 |
+
"learning_rate": 1.4042833066723076e-05,
|
13717 |
+
"loss": 0.4385,
|
13718 |
+
"step": 1805000
|
13719 |
+
},
|
13720 |
+
{
|
13721 |
+
"epoch": 0.0,
|
13722 |
+
"eval_loss": 0.42015281319618225,
|
13723 |
+
"eval_runtime": 78.5938,
|
13724 |
+
"eval_samples_per_second": 81.431,
|
13725 |
+
"eval_steps_per_second": 0.636,
|
13726 |
+
"step": 1805000
|
13727 |
+
},
|
13728 |
+
{
|
13729 |
+
"epoch": 0.0,
|
13730 |
+
"learning_rate": 1.4002055611082185e-05,
|
13731 |
+
"loss": 0.4387,
|
13732 |
+
"step": 1806000
|
13733 |
+
},
|
13734 |
+
{
|
13735 |
+
"epoch": 0.0,
|
13736 |
+
"learning_rate": 1.396143827787245e-05,
|
13737 |
+
"loss": 0.4379,
|
13738 |
+
"step": 1807000
|
13739 |
+
},
|
13740 |
+
{
|
13741 |
+
"epoch": 0.0,
|
13742 |
+
"learning_rate": 1.3921022109574423e-05,
|
13743 |
+
"loss": 0.4373,
|
13744 |
+
"step": 1808000
|
13745 |
+
},
|
13746 |
+
{
|
13747 |
+
"epoch": 0.0,
|
13748 |
+
"learning_rate": 1.3880847343598854e-05,
|
13749 |
+
"loss": 0.4382,
|
13750 |
+
"step": 1809000
|
13751 |
+
},
|
13752 |
+
{
|
13753 |
+
"epoch": 0.01,
|
13754 |
+
"learning_rate": 1.384087358540966e-05,
|
13755 |
+
"loss": 0.438,
|
13756 |
+
"step": 1810000
|
13757 |
+
},
|
13758 |
+
{
|
13759 |
+
"epoch": 0.01,
|
13760 |
+
"eval_loss": 0.4181618392467499,
|
13761 |
+
"eval_runtime": 77.0873,
|
13762 |
+
"eval_samples_per_second": 83.023,
|
13763 |
+
"eval_steps_per_second": 0.649,
|
13764 |
+
"step": 1810000
|
13765 |
+
},
|
13766 |
+
{
|
13767 |
+
"epoch": 0.01,
|
13768 |
+
"learning_rate": 1.3801061244895656e-05,
|
13769 |
+
"loss": 0.4382,
|
13770 |
+
"step": 1811000
|
13771 |
+
},
|
13772 |
+
{
|
13773 |
+
"epoch": 0.01,
|
13774 |
+
"learning_rate": 1.3761450557829634e-05,
|
13775 |
+
"loss": 0.4392,
|
13776 |
+
"step": 1812000
|
13777 |
+
},
|
13778 |
+
{
|
13779 |
+
"epoch": 0.01,
|
13780 |
+
"learning_rate": 1.372204164487259e-05,
|
13781 |
+
"loss": 0.4387,
|
13782 |
+
"step": 1813000
|
13783 |
+
},
|
13784 |
+
{
|
13785 |
+
"epoch": 0.01,
|
13786 |
+
"learning_rate": 1.368283462607094e-05,
|
13787 |
+
"loss": 0.4388,
|
13788 |
+
"step": 1814000
|
13789 |
+
},
|
13790 |
+
{
|
13791 |
+
"epoch": 0.01,
|
13792 |
+
"learning_rate": 1.3643868524915881e-05,
|
13793 |
+
"loss": 0.4392,
|
13794 |
+
"step": 1815000
|
13795 |
+
},
|
13796 |
+
{
|
13797 |
+
"epoch": 0.01,
|
13798 |
+
"eval_loss": 0.42043235898017883,
|
13799 |
+
"eval_runtime": 79.2626,
|
13800 |
+
"eval_samples_per_second": 80.744,
|
13801 |
+
"eval_steps_per_second": 0.631,
|
13802 |
+
"step": 1815000
|
13803 |
+
},
|
13804 |
+
{
|
13805 |
+
"epoch": 0.01,
|
13806 |
+
"learning_rate": 1.3605065449912204e-05,
|
13807 |
+
"loss": 0.4395,
|
13808 |
+
"step": 1816000
|
13809 |
+
},
|
13810 |
+
{
|
13811 |
+
"epoch": 0.01,
|
13812 |
+
"learning_rate": 1.3566464625393676e-05,
|
13813 |
+
"loss": 0.4391,
|
13814 |
+
"step": 1817000
|
13815 |
+
},
|
13816 |
+
{
|
13817 |
+
"epoch": 0.01,
|
13818 |
+
"learning_rate": 1.352810446627972e-05,
|
13819 |
+
"loss": 0.4379,
|
13820 |
+
"step": 1818000
|
13821 |
+
},
|
13822 |
+
{
|
13823 |
+
"epoch": 0.01,
|
13824 |
+
"learning_rate": 1.3489908292326226e-05,
|
13825 |
+
"loss": 0.4377,
|
13826 |
+
"step": 1819000
|
13827 |
+
},
|
13828 |
+
{
|
13829 |
+
"epoch": 0.01,
|
13830 |
+
"learning_rate": 1.3451952611981318e-05,
|
13831 |
+
"loss": 0.4389,
|
13832 |
+
"step": 1820000
|
13833 |
+
},
|
13834 |
+
{
|
13835 |
+
"epoch": 0.01,
|
13836 |
+
"eval_loss": 0.41748473048210144,
|
13837 |
+
"eval_runtime": 77.7669,
|
13838 |
+
"eval_samples_per_second": 82.297,
|
13839 |
+
"eval_steps_per_second": 0.643,
|
13840 |
+
"step": 1820000
|
13841 |
+
},
|
13842 |
+
{
|
13843 |
+
"epoch": 0.01,
|
13844 |
+
"learning_rate": 1.3414161553535873e-05,
|
13845 |
+
"loss": 0.4386,
|
13846 |
+
"step": 1821000
|
13847 |
+
},
|
13848 |
+
{
|
13849 |
+
"epoch": 0.01,
|
13850 |
+
"learning_rate": 1.3376573327101957e-05,
|
13851 |
+
"loss": 0.4383,
|
13852 |
+
"step": 1822000
|
13853 |
+
},
|
13854 |
+
{
|
13855 |
+
"epoch": 0.01,
|
13856 |
+
"learning_rate": 1.333918804717982e-05,
|
13857 |
+
"loss": 0.4371,
|
13858 |
+
"step": 1823000
|
13859 |
+
},
|
13860 |
+
{
|
13861 |
+
"epoch": 0.01,
|
13862 |
+
"learning_rate": 1.3302079989360922e-05,
|
13863 |
+
"loss": 0.4369,
|
13864 |
+
"step": 1824000
|
13865 |
+
},
|
13866 |
+
{
|
13867 |
+
"epoch": 0.01,
|
13868 |
+
"learning_rate": 1.3265100537030001e-05,
|
13869 |
+
"loss": 0.4378,
|
13870 |
+
"step": 1825000
|
13871 |
+
},
|
13872 |
+
{
|
13873 |
+
"epoch": 0.01,
|
13874 |
+
"eval_loss": 0.4193136692047119,
|
13875 |
+
"eval_runtime": 79.079,
|
13876 |
+
"eval_samples_per_second": 80.932,
|
13877 |
+
"eval_steps_per_second": 0.632,
|
13878 |
+
"step": 1825000
|
13879 |
+
},
|
13880 |
+
{
|
13881 |
+
"epoch": 0.01,
|
13882 |
+
"learning_rate": 1.3228324370776315e-05,
|
13883 |
+
"loss": 0.4385,
|
13884 |
+
"step": 1826000
|
13885 |
+
},
|
13886 |
+
{
|
13887 |
+
"epoch": 0.01,
|
13888 |
+
"learning_rate": 1.319175160262646e-05,
|
13889 |
+
"loss": 0.4363,
|
13890 |
+
"step": 1827000
|
13891 |
+
},
|
13892 |
+
{
|
13893 |
+
"epoch": 0.01,
|
13894 |
+
"learning_rate": 1.3155418611556128e-05,
|
13895 |
+
"loss": 0.438,
|
13896 |
+
"step": 1828000
|
13897 |
+
},
|
13898 |
+
{
|
13899 |
+
"epoch": 0.01,
|
13900 |
+
"learning_rate": 1.3119252769539538e-05,
|
13901 |
+
"loss": 0.4378,
|
13902 |
+
"step": 1829000
|
13903 |
+
},
|
13904 |
+
{
|
13905 |
+
"epoch": 0.01,
|
13906 |
+
"learning_rate": 1.3083326518189592e-05,
|
13907 |
+
"loss": 0.4377,
|
13908 |
+
"step": 1830000
|
13909 |
+
},
|
13910 |
+
{
|
13911 |
+
"epoch": 0.01,
|
13912 |
+
"eval_loss": 0.4179893732070923,
|
13913 |
+
"eval_runtime": 78.6845,
|
13914 |
+
"eval_samples_per_second": 81.337,
|
13915 |
+
"eval_steps_per_second": 0.635,
|
13916 |
+
"step": 1830000
|
13917 |
+
},
|
13918 |
+
{
|
13919 |
+
"epoch": 0.02,
|
13920 |
+
"learning_rate": 1.3047568042535075e-05,
|
13921 |
+
"loss": 0.4388,
|
13922 |
+
"step": 1831000
|
13923 |
+
},
|
13924 |
+
{
|
13925 |
+
"epoch": 0.02,
|
13926 |
+
"learning_rate": 1.3012013515599501e-05,
|
13927 |
+
"loss": 0.439,
|
13928 |
+
"step": 1832000
|
13929 |
+
},
|
13930 |
+
{
|
13931 |
+
"epoch": 0.02,
|
13932 |
+
"learning_rate": 1.2976698294195656e-05,
|
13933 |
+
"loss": 0.4392,
|
13934 |
+
"step": 1833000
|
13935 |
+
},
|
13936 |
+
{
|
13937 |
+
"epoch": 0.02,
|
13938 |
+
"learning_rate": 1.2941586829267356e-05,
|
13939 |
+
"loss": 0.4378,
|
13940 |
+
"step": 1834000
|
13941 |
+
},
|
13942 |
+
{
|
13943 |
+
"epoch": 0.02,
|
13944 |
+
"learning_rate": 1.2906644387183456e-05,
|
13945 |
+
"loss": 0.4372,
|
13946 |
+
"step": 1835000
|
13947 |
+
},
|
13948 |
+
{
|
13949 |
+
"epoch": 0.02,
|
13950 |
+
"eval_loss": 0.4213528037071228,
|
13951 |
+
"eval_runtime": 77.6423,
|
13952 |
+
"eval_samples_per_second": 82.429,
|
13953 |
+
"eval_steps_per_second": 0.644,
|
13954 |
+
"step": 1835000
|
13955 |
+
},
|
13956 |
+
{
|
13957 |
+
"epoch": 0.02,
|
13958 |
+
"learning_rate": 1.287194095903841e-05,
|
13959 |
+
"loss": 0.4367,
|
13960 |
+
"step": 1836000
|
13961 |
+
},
|
13962 |
+
{
|
13963 |
+
"epoch": 0.02,
|
13964 |
+
"learning_rate": 1.2837407174229876e-05,
|
13965 |
+
"loss": 0.437,
|
13966 |
+
"step": 1837000
|
13967 |
+
},
|
13968 |
+
{
|
13969 |
+
"epoch": 0.02,
|
13970 |
+
"learning_rate": 1.2803077978326747e-05,
|
13971 |
+
"loss": 0.4377,
|
13972 |
+
"step": 1838000
|
13973 |
+
},
|
13974 |
+
{
|
13975 |
+
"epoch": 0.02,
|
13976 |
+
"learning_rate": 1.2768953475901701e-05,
|
13977 |
+
"loss": 0.4383,
|
13978 |
+
"step": 1839000
|
13979 |
+
},
|
13980 |
+
{
|
13981 |
+
"epoch": 0.02,
|
13982 |
+
"learning_rate": 1.2735101405857255e-05,
|
13983 |
+
"loss": 0.4379,
|
13984 |
+
"step": 1840000
|
13985 |
+
},
|
13986 |
+
{
|
13987 |
+
"epoch": 0.02,
|
13988 |
+
"eval_loss": 0.41641688346862793,
|
13989 |
+
"eval_runtime": 79.1386,
|
13990 |
+
"eval_samples_per_second": 80.871,
|
13991 |
+
"eval_steps_per_second": 0.632,
|
13992 |
+
"step": 1840000
|
13993 |
+
},
|
13994 |
+
{
|
13995 |
+
"epoch": 0.02,
|
13996 |
+
"learning_rate": 1.2701386191707756e-05,
|
13997 |
+
"loss": 0.4379,
|
13998 |
+
"step": 1841000
|
13999 |
+
},
|
14000 |
+
{
|
14001 |
+
"epoch": 0.02,
|
14002 |
+
"learning_rate": 1.2667875980807157e-05,
|
14003 |
+
"loss": 0.4384,
|
14004 |
+
"step": 1842000
|
14005 |
+
},
|
14006 |
+
{
|
14007 |
+
"epoch": 0.02,
|
14008 |
+
"learning_rate": 1.2634570875233356e-05,
|
14009 |
+
"loss": 0.4379,
|
14010 |
+
"step": 1843000
|
14011 |
+
},
|
14012 |
+
{
|
14013 |
+
"epoch": 0.02,
|
14014 |
+
"learning_rate": 1.2601470976439498e-05,
|
14015 |
+
"loss": 0.4368,
|
14016 |
+
"step": 1844000
|
14017 |
+
},
|
14018 |
+
{
|
14019 |
+
"epoch": 0.02,
|
14020 |
+
"learning_rate": 1.2568576385253613e-05,
|
14021 |
+
"loss": 0.4379,
|
14022 |
+
"step": 1845000
|
14023 |
+
},
|
14024 |
+
{
|
14025 |
+
"epoch": 0.02,
|
14026 |
+
"eval_loss": 0.41581401228904724,
|
14027 |
+
"eval_runtime": 81.6085,
|
14028 |
+
"eval_samples_per_second": 78.423,
|
14029 |
+
"eval_steps_per_second": 0.613,
|
14030 |
+
"step": 1845000
|
14031 |
+
},
|
14032 |
+
{
|
14033 |
+
"epoch": 0.02,
|
14034 |
+
"learning_rate": 1.2535919788427315e-05,
|
14035 |
+
"loss": 0.4365,
|
14036 |
+
"step": 1846000
|
14037 |
+
},
|
14038 |
+
{
|
14039 |
+
"epoch": 0.02,
|
14040 |
+
"learning_rate": 1.2503435906882624e-05,
|
14041 |
+
"loss": 0.4374,
|
14042 |
+
"step": 1847000
|
14043 |
+
},
|
14044 |
+
{
|
14045 |
+
"epoch": 0.02,
|
14046 |
+
"learning_rate": 1.247115763157773e-05,
|
14047 |
+
"loss": 0.4381,
|
14048 |
+
"step": 1848000
|
14049 |
+
},
|
14050 |
+
{
|
14051 |
+
"epoch": 0.02,
|
14052 |
+
"learning_rate": 1.2439117030626584e-05,
|
14053 |
+
"loss": 0.4368,
|
14054 |
+
"step": 1849000
|
14055 |
+
},
|
14056 |
+
{
|
14057 |
+
"epoch": 0.03,
|
14058 |
+
"learning_rate": 1.2407250056299487e-05,
|
14059 |
+
"loss": 0.4383,
|
14060 |
+
"step": 1850000
|
14061 |
+
},
|
14062 |
+
{
|
14063 |
+
"epoch": 0.03,
|
14064 |
+
"eval_loss": 0.4171139597892761,
|
14065 |
+
"eval_runtime": 78.6968,
|
14066 |
+
"eval_samples_per_second": 81.325,
|
14067 |
+
"eval_steps_per_second": 0.635,
|
14068 |
+
"step": 1850000
|
14069 |
}
|
14070 |
],
|
14071 |
"max_steps": 2000000,
|
14072 |
"num_train_epochs": 9223372036854775807,
|
14073 |
+
"total_flos": 1.6210685567434752e+22,
|
14074 |
"trial_name": null,
|
14075 |
"trial_params": null
|
14076 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5551
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ffbace6af33e15cfb1f1ee5cd7d43fec11995860b2c004e4c591e320c40cf9b
|
3 |
size 5551
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64dd9bb8ac07ad494b77a3974d9a13d4d5d6c9061220ee2632308b55b6ccca8c
|
3 |
size 449471589
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5551
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ffbace6af33e15cfb1f1ee5cd7d43fec11995860b2c004e4c591e320c40cf9b
|
3 |
size 5551
|