Training in progress, step 2000000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +383 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38f36466b9f2b124ce3950f4272937ae40e2fa26880ec00a4e1f83639190fb7d
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fc35de7c7ab795f6ce22b4d822a3c81dd28eb6da159fa0e6bc70e2d249fbce8
|
3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21579
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c86960e82d428869302623bd9f7002f37b98a8296d67cde31b64acf1793fdd0e
|
3 |
size 21579
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26c2c5dcfeda6d6eb5b101bdcd99b94aa97e0eb4affa75fa0e151082e701b9eb
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:994a0fabdb31bb0426e3f82b99b32aaddcc1766fdd4539450b1f928f65099fb8
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -14826,11 +14826,391 @@
|
|
14826 |
"eval_samples_per_second": 82.821,
|
14827 |
"eval_steps_per_second": 0.647,
|
14828 |
"step": 1950000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14829 |
}
|
14830 |
],
|
14831 |
"max_steps": 2000000,
|
14832 |
"num_train_epochs": 9223372036854775807,
|
14833 |
-
"total_flos": 1.
|
14834 |
"trial_name": null,
|
14835 |
"trial_params": null
|
14836 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.1,
|
5 |
+
"global_step": 2000000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
14826 |
"eval_samples_per_second": 82.821,
|
14827 |
"eval_steps_per_second": 0.647,
|
14828 |
"step": 1950000
|
14829 |
+
},
|
14830 |
+
{
|
14831 |
+
"epoch": 0.08,
|
14832 |
+
"learning_rate": 1.026354625870075e-05,
|
14833 |
+
"loss": 0.4364,
|
14834 |
+
"step": 1951000
|
14835 |
+
},
|
14836 |
+
{
|
14837 |
+
"epoch": 0.08,
|
14838 |
+
"learning_rate": 1.0253060901106556e-05,
|
14839 |
+
"loss": 0.4361,
|
14840 |
+
"step": 1952000
|
14841 |
+
},
|
14842 |
+
{
|
14843 |
+
"epoch": 0.08,
|
14844 |
+
"learning_rate": 1.0242798171546145e-05,
|
14845 |
+
"loss": 0.4365,
|
14846 |
+
"step": 1953000
|
14847 |
+
},
|
14848 |
+
{
|
14849 |
+
"epoch": 0.08,
|
14850 |
+
"learning_rate": 1.0232747509747644e-05,
|
14851 |
+
"loss": 0.4373,
|
14852 |
+
"step": 1954000
|
14853 |
+
},
|
14854 |
+
{
|
14855 |
+
"epoch": 0.08,
|
14856 |
+
"learning_rate": 1.0222899204125646e-05,
|
14857 |
+
"loss": 0.4362,
|
14858 |
+
"step": 1955000
|
14859 |
+
},
|
14860 |
+
{
|
14861 |
+
"epoch": 0.08,
|
14862 |
+
"eval_loss": 0.4164978265762329,
|
14863 |
+
"eval_runtime": 80.0596,
|
14864 |
+
"eval_samples_per_second": 79.94,
|
14865 |
+
"eval_steps_per_second": 0.625,
|
14866 |
+
"step": 1955000
|
14867 |
+
},
|
14868 |
+
{
|
14869 |
+
"epoch": 0.08,
|
14870 |
+
"learning_rate": 1.0213263451653737e-05,
|
14871 |
+
"loss": 0.4367,
|
14872 |
+
"step": 1956000
|
14873 |
+
},
|
14874 |
+
{
|
14875 |
+
"epoch": 0.08,
|
14876 |
+
"learning_rate": 1.0203849598659497e-05,
|
14877 |
+
"loss": 0.4367,
|
14878 |
+
"step": 1957000
|
14879 |
+
},
|
14880 |
+
{
|
14881 |
+
"epoch": 0.08,
|
14882 |
+
"learning_rate": 1.0194638827271399e-05,
|
14883 |
+
"loss": 0.4364,
|
14884 |
+
"step": 1958000
|
14885 |
+
},
|
14886 |
+
{
|
14887 |
+
"epoch": 0.08,
|
14888 |
+
"learning_rate": 1.0185640695119401e-05,
|
14889 |
+
"loss": 0.4363,
|
14890 |
+
"step": 1959000
|
14891 |
+
},
|
14892 |
+
{
|
14893 |
+
"epoch": 0.08,
|
14894 |
+
"learning_rate": 1.017685522961337e-05,
|
14895 |
+
"loss": 0.4362,
|
14896 |
+
"step": 1960000
|
14897 |
+
},
|
14898 |
+
{
|
14899 |
+
"epoch": 0.08,
|
14900 |
+
"eval_loss": 0.42052188515663147,
|
14901 |
+
"eval_runtime": 77.8558,
|
14902 |
+
"eval_samples_per_second": 82.203,
|
14903 |
+
"eval_steps_per_second": 0.642,
|
14904 |
+
"step": 1960000
|
14905 |
+
},
|
14906 |
+
{
|
14907 |
+
"epoch": 0.08,
|
14908 |
+
"learning_rate": 1.0168282457515363e-05,
|
14909 |
+
"loss": 0.4369,
|
14910 |
+
"step": 1961000
|
14911 |
+
},
|
14912 |
+
{
|
14913 |
+
"epoch": 0.08,
|
14914 |
+
"learning_rate": 1.0159930658730172e-05,
|
14915 |
+
"loss": 0.4364,
|
14916 |
+
"step": 1962000
|
14917 |
+
},
|
14918 |
+
{
|
14919 |
+
"epoch": 0.08,
|
14920 |
+
"learning_rate": 1.0151791179631108e-05,
|
14921 |
+
"loss": 0.4359,
|
14922 |
+
"step": 1963000
|
14923 |
+
},
|
14924 |
+
{
|
14925 |
+
"epoch": 0.08,
|
14926 |
+
"learning_rate": 1.0143856216286122e-05,
|
14927 |
+
"loss": 0.4368,
|
14928 |
+
"step": 1964000
|
14929 |
+
},
|
14930 |
+
{
|
14931 |
+
"epoch": 0.08,
|
14932 |
+
"learning_rate": 1.0136134046869866e-05,
|
14933 |
+
"loss": 0.4357,
|
14934 |
+
"step": 1965000
|
14935 |
+
},
|
14936 |
+
{
|
14937 |
+
"epoch": 0.08,
|
14938 |
+
"eval_loss": 0.41740044951438904,
|
14939 |
+
"eval_runtime": 78.1991,
|
14940 |
+
"eval_samples_per_second": 81.842,
|
14941 |
+
"eval_steps_per_second": 0.639,
|
14942 |
+
"step": 1965000
|
14943 |
+
},
|
14944 |
+
{
|
14945 |
+
"epoch": 0.08,
|
14946 |
+
"learning_rate": 1.0128632097947403e-05,
|
14947 |
+
"loss": 0.4365,
|
14948 |
+
"step": 1966000
|
14949 |
+
},
|
14950 |
+
{
|
14951 |
+
"epoch": 0.08,
|
14952 |
+
"learning_rate": 1.0121335373458022e-05,
|
14953 |
+
"loss": 0.4362,
|
14954 |
+
"step": 1967000
|
14955 |
+
},
|
14956 |
+
{
|
14957 |
+
"epoch": 0.08,
|
14958 |
+
"learning_rate": 1.011425151149977e-05,
|
14959 |
+
"loss": 0.4361,
|
14960 |
+
"step": 1968000
|
14961 |
+
},
|
14962 |
+
{
|
14963 |
+
"epoch": 0.08,
|
14964 |
+
"learning_rate": 1.010738729828653e-05,
|
14965 |
+
"loss": 0.4375,
|
14966 |
+
"step": 1969000
|
14967 |
+
},
|
14968 |
+
{
|
14969 |
+
"epoch": 0.09,
|
14970 |
+
"learning_rate": 1.0100729012562797e-05,
|
14971 |
+
"loss": 0.4372,
|
14972 |
+
"step": 1970000
|
14973 |
+
},
|
14974 |
+
{
|
14975 |
+
"epoch": 0.09,
|
14976 |
+
"eval_loss": 0.4145086705684662,
|
14977 |
+
"eval_runtime": 79.8319,
|
14978 |
+
"eval_samples_per_second": 80.168,
|
14979 |
+
"eval_steps_per_second": 0.626,
|
14980 |
+
"step": 1970000
|
14981 |
+
},
|
14982 |
+
{
|
14983 |
+
"epoch": 0.09,
|
14984 |
+
"learning_rate": 1.0094289991138392e-05,
|
14985 |
+
"loss": 0.4363,
|
14986 |
+
"step": 1971000
|
14987 |
+
},
|
14988 |
+
{
|
14989 |
+
"epoch": 0.09,
|
14990 |
+
"learning_rate": 1.0088057362697175e-05,
|
14991 |
+
"loss": 0.4375,
|
14992 |
+
"step": 1972000
|
14993 |
+
},
|
14994 |
+
{
|
14995 |
+
"epoch": 0.09,
|
14996 |
+
"learning_rate": 1.0082049524936494e-05,
|
14997 |
+
"loss": 0.4372,
|
14998 |
+
"step": 1973000
|
14999 |
+
},
|
15000 |
+
{
|
15001 |
+
"epoch": 0.09,
|
15002 |
+
"learning_rate": 1.0076242416653332e-05,
|
15003 |
+
"loss": 0.4349,
|
15004 |
+
"step": 1974000
|
15005 |
+
},
|
15006 |
+
{
|
15007 |
+
"epoch": 0.09,
|
15008 |
+
"learning_rate": 1.0070648308262255e-05,
|
15009 |
+
"loss": 0.436,
|
15010 |
+
"step": 1975000
|
15011 |
+
},
|
15012 |
+
{
|
15013 |
+
"epoch": 0.09,
|
15014 |
+
"eval_loss": 0.4151042103767395,
|
15015 |
+
"eval_runtime": 79.0273,
|
15016 |
+
"eval_samples_per_second": 80.985,
|
15017 |
+
"eval_steps_per_second": 0.633,
|
15018 |
+
"step": 1975000
|
15019 |
+
},
|
15020 |
+
{
|
15021 |
+
"epoch": 0.09,
|
15022 |
+
"learning_rate": 1.006526721680391e-05,
|
15023 |
+
"loss": 0.4342,
|
15024 |
+
"step": 1976000
|
15025 |
+
},
|
15026 |
+
{
|
15027 |
+
"epoch": 0.09,
|
15028 |
+
"learning_rate": 1.0060099158670026e-05,
|
15029 |
+
"loss": 0.4363,
|
15030 |
+
"step": 1977000
|
15031 |
+
},
|
15032 |
+
{
|
15033 |
+
"epoch": 0.09,
|
15034 |
+
"learning_rate": 1.0055148998189381e-05,
|
15035 |
+
"loss": 0.437,
|
15036 |
+
"step": 1978000
|
15037 |
+
},
|
15038 |
+
{
|
15039 |
+
"epoch": 0.09,
|
15040 |
+
"learning_rate": 1.0050411475939925e-05,
|
15041 |
+
"loss": 0.436,
|
15042 |
+
"step": 1979000
|
15043 |
+
},
|
15044 |
+
{
|
15045 |
+
"epoch": 0.09,
|
15046 |
+
"learning_rate": 1.0045882183469046e-05,
|
15047 |
+
"loss": 0.4355,
|
15048 |
+
"step": 1980000
|
15049 |
+
},
|
15050 |
+
{
|
15051 |
+
"epoch": 0.09,
|
15052 |
+
"eval_loss": 0.4141569435596466,
|
15053 |
+
"eval_runtime": 79.5726,
|
15054 |
+
"eval_samples_per_second": 80.43,
|
15055 |
+
"eval_steps_per_second": 0.628,
|
15056 |
+
"step": 1980000
|
15057 |
+
},
|
15058 |
+
{
|
15059 |
+
"epoch": 0.09,
|
15060 |
+
"learning_rate": 1.0041565983372807e-05,
|
15061 |
+
"loss": 0.4359,
|
15062 |
+
"step": 1981000
|
15063 |
+
},
|
15064 |
+
{
|
15065 |
+
"epoch": 0.09,
|
15066 |
+
"learning_rate": 1.0037462888799093e-05,
|
15067 |
+
"loss": 0.4362,
|
15068 |
+
"step": 1982000
|
15069 |
+
},
|
15070 |
+
{
|
15071 |
+
"epoch": 0.09,
|
15072 |
+
"learning_rate": 1.0033576695766748e-05,
|
15073 |
+
"loss": 0.4376,
|
15074 |
+
"step": 1983000
|
15075 |
+
},
|
15076 |
+
{
|
15077 |
+
"epoch": 0.09,
|
15078 |
+
"learning_rate": 1.0029899635949539e-05,
|
15079 |
+
"loss": 0.4373,
|
15080 |
+
"step": 1984000
|
15081 |
+
},
|
15082 |
+
{
|
15083 |
+
"epoch": 0.09,
|
15084 |
+
"learning_rate": 1.0026435717192568e-05,
|
15085 |
+
"loss": 0.4367,
|
15086 |
+
"step": 1985000
|
15087 |
+
},
|
15088 |
+
{
|
15089 |
+
"epoch": 0.09,
|
15090 |
+
"eval_loss": 0.4171934127807617,
|
15091 |
+
"eval_runtime": 77.9474,
|
15092 |
+
"eval_samples_per_second": 82.107,
|
15093 |
+
"eval_steps_per_second": 0.641,
|
15094 |
+
"step": 1985000
|
15095 |
+
},
|
15096 |
+
{
|
15097 |
+
"epoch": 0.09,
|
15098 |
+
"learning_rate": 1.0023184950047551e-05,
|
15099 |
+
"loss": 0.4361,
|
15100 |
+
"step": 1986000
|
15101 |
+
},
|
15102 |
+
{
|
15103 |
+
"epoch": 0.09,
|
15104 |
+
"learning_rate": 1.002015027554519e-05,
|
15105 |
+
"loss": 0.4377,
|
15106 |
+
"step": 1987000
|
15107 |
+
},
|
15108 |
+
{
|
15109 |
+
"epoch": 0.09,
|
15110 |
+
"learning_rate": 1.0017325627506754e-05,
|
15111 |
+
"loss": 0.4373,
|
15112 |
+
"step": 1988000
|
15113 |
+
},
|
15114 |
+
{
|
15115 |
+
"epoch": 0.09,
|
15116 |
+
"learning_rate": 1.0014716663814055e-05,
|
15117 |
+
"loss": 0.4368,
|
15118 |
+
"step": 1989000
|
15119 |
+
},
|
15120 |
+
{
|
15121 |
+
"epoch": 0.1,
|
15122 |
+
"learning_rate": 1.0012320461270247e-05,
|
15123 |
+
"loss": 0.4358,
|
15124 |
+
"step": 1990000
|
15125 |
+
},
|
15126 |
+
{
|
15127 |
+
"epoch": 0.1,
|
15128 |
+
"eval_loss": 0.41612717509269714,
|
15129 |
+
"eval_runtime": 80.5577,
|
15130 |
+
"eval_samples_per_second": 79.446,
|
15131 |
+
"eval_steps_per_second": 0.621,
|
15132 |
+
"step": 1990000
|
15133 |
+
},
|
15134 |
+
{
|
15135 |
+
"epoch": 0.1,
|
15136 |
+
"learning_rate": 1.0010134948139825e-05,
|
15137 |
+
"loss": 0.4366,
|
15138 |
+
"step": 1991000
|
15139 |
+
},
|
15140 |
+
{
|
15141 |
+
"epoch": 0.1,
|
15142 |
+
"learning_rate": 1.0008162636276321e-05,
|
15143 |
+
"loss": 0.4369,
|
15144 |
+
"step": 1992000
|
15145 |
+
},
|
15146 |
+
{
|
15147 |
+
"epoch": 0.1,
|
15148 |
+
"learning_rate": 1.0006403531687724e-05,
|
15149 |
+
"loss": 0.4372,
|
15150 |
+
"step": 1993000
|
15151 |
+
},
|
15152 |
+
{
|
15153 |
+
"epoch": 0.1,
|
15154 |
+
"learning_rate": 1.0004859079123212e-05,
|
15155 |
+
"loss": 0.4361,
|
15156 |
+
"step": 1994000
|
15157 |
+
},
|
15158 |
+
{
|
15159 |
+
"epoch": 0.1,
|
15160 |
+
"learning_rate": 1.0003526191291106e-05,
|
15161 |
+
"loss": 0.4369,
|
15162 |
+
"step": 1995000
|
15163 |
+
},
|
15164 |
+
{
|
15165 |
+
"epoch": 0.1,
|
15166 |
+
"eval_loss": 0.4170204997062683,
|
15167 |
+
"eval_runtime": 80.1918,
|
15168 |
+
"eval_samples_per_second": 79.809,
|
15169 |
+
"eval_steps_per_second": 0.624,
|
15170 |
+
"step": 1995000
|
15171 |
+
},
|
15172 |
+
{
|
15173 |
+
"epoch": 0.1,
|
15174 |
+
"learning_rate": 1.0002406524857334e-05,
|
15175 |
+
"loss": 0.436,
|
15176 |
+
"step": 1996000
|
15177 |
+
},
|
15178 |
+
{
|
15179 |
+
"epoch": 0.1,
|
15180 |
+
"learning_rate": 1.0001500883167451e-05,
|
15181 |
+
"loss": 0.4372,
|
15182 |
+
"step": 1997000
|
15183 |
+
},
|
15184 |
+
{
|
15185 |
+
"epoch": 0.1,
|
15186 |
+
"learning_rate": 1.0000807455884181e-05,
|
15187 |
+
"loss": 0.4369,
|
15188 |
+
"step": 1998000
|
15189 |
+
},
|
15190 |
+
{
|
15191 |
+
"epoch": 0.1,
|
15192 |
+
"learning_rate": 1.0000327631969819e-05,
|
15193 |
+
"loss": 0.4362,
|
15194 |
+
"step": 1999000
|
15195 |
+
},
|
15196 |
+
{
|
15197 |
+
"epoch": 0.1,
|
15198 |
+
"learning_rate": 1.00000604522778e-05,
|
15199 |
+
"loss": 0.4363,
|
15200 |
+
"step": 2000000
|
15201 |
+
},
|
15202 |
+
{
|
15203 |
+
"epoch": 0.1,
|
15204 |
+
"eval_loss": 0.41442054510116577,
|
15205 |
+
"eval_runtime": 79.9098,
|
15206 |
+
"eval_samples_per_second": 80.09,
|
15207 |
+
"eval_steps_per_second": 0.626,
|
15208 |
+
"step": 2000000
|
15209 |
}
|
15210 |
],
|
15211 |
"max_steps": 2000000,
|
15212 |
"num_train_epochs": 9223372036854775807,
|
15213 |
+
"total_flos": 1.752506547830784e+22,
|
15214 |
"trial_name": null,
|
15215 |
"trial_params": null
|
15216 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fc35de7c7ab795f6ce22b4d822a3c81dd28eb6da159fa0e6bc70e2d249fbce8
|
3 |
size 449471589
|