Training in progress, step 980000
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893441093
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f583ea7c93bc66a9ef8a4e308c8dc630c9ff1c01fbc3af28ec5ab02ff4f79681
|
3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449474181
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3aff5625e82bdff15b343b27e9d6a9b3efa1fcccca6fcaa49aaf091e12be1731
|
3 |
size 449474181
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17577
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70a20325b16659225e3cd01033074400c13a25610958f69aa9e3e72a98f16147
|
3 |
size 17577
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43c4cc1073d6660901cb3dc90d6cc9fa2b9e92470c9b5b2cfa7f8fe0038e768d
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -7184,11 +7184,85 @@
|
|
7184 |
"eval_samples_per_second": 168.183,
|
7185 |
"eval_steps_per_second": 5.257,
|
7186 |
"step": 970000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7187 |
}
|
7188 |
],
|
7189 |
"max_steps": 1000000,
|
7190 |
"num_train_epochs": 72,
|
7191 |
-
"total_flos": 3.
|
7192 |
"trial_name": null,
|
7193 |
"trial_params": null
|
7194 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 69.69136680415303,
|
5 |
+
"global_step": 980000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
7184 |
"eval_samples_per_second": 168.183,
|
7185 |
"eval_steps_per_second": 5.257,
|
7186 |
"step": 970000
|
7187 |
+
},
|
7188 |
+
{
|
7189 |
+
"epoch": 69.05,
|
7190 |
+
"learning_rate": 1.0149337548616087e-05,
|
7191 |
+
"loss": 0.1075,
|
7192 |
+
"step": 971000
|
7193 |
+
},
|
7194 |
+
{
|
7195 |
+
"epoch": 69.12,
|
7196 |
+
"learning_rate": 1.0139223211606578e-05,
|
7197 |
+
"loss": 0.1076,
|
7198 |
+
"step": 972000
|
7199 |
+
},
|
7200 |
+
{
|
7201 |
+
"epoch": 69.19,
|
7202 |
+
"learning_rate": 1.012946276686737e-05,
|
7203 |
+
"loss": 0.1078,
|
7204 |
+
"step": 973000
|
7205 |
+
},
|
7206 |
+
{
|
7207 |
+
"epoch": 69.26,
|
7208 |
+
"learning_rate": 1.0120056321137129e-05,
|
7209 |
+
"loss": 0.1079,
|
7210 |
+
"step": 974000
|
7211 |
+
},
|
7212 |
+
{
|
7213 |
+
"epoch": 69.34,
|
7214 |
+
"learning_rate": 1.0111003977283231e-05,
|
7215 |
+
"loss": 0.1074,
|
7216 |
+
"step": 975000
|
7217 |
+
},
|
7218 |
+
{
|
7219 |
+
"epoch": 69.34,
|
7220 |
+
"eval_runtime": 524.9926,
|
7221 |
+
"eval_samples_per_second": 167.456,
|
7222 |
+
"eval_steps_per_second": 5.234,
|
7223 |
+
"step": 975000
|
7224 |
+
},
|
7225 |
+
{
|
7226 |
+
"epoch": 69.41,
|
7227 |
+
"learning_rate": 1.010230583430066e-05,
|
7228 |
+
"loss": 0.1073,
|
7229 |
+
"step": 976000
|
7230 |
+
},
|
7231 |
+
{
|
7232 |
+
"epoch": 69.48,
|
7233 |
+
"learning_rate": 1.009396198731092e-05,
|
7234 |
+
"loss": 0.1079,
|
7235 |
+
"step": 977000
|
7236 |
+
},
|
7237 |
+
{
|
7238 |
+
"epoch": 69.55,
|
7239 |
+
"learning_rate": 1.0085972527560981e-05,
|
7240 |
+
"loss": 0.1072,
|
7241 |
+
"step": 978000
|
7242 |
+
},
|
7243 |
+
{
|
7244 |
+
"epoch": 69.62,
|
7245 |
+
"learning_rate": 1.0078337542422287e-05,
|
7246 |
+
"loss": 0.1077,
|
7247 |
+
"step": 979000
|
7248 |
+
},
|
7249 |
+
{
|
7250 |
+
"epoch": 69.69,
|
7251 |
+
"learning_rate": 1.0071057115389828e-05,
|
7252 |
+
"loss": 0.108,
|
7253 |
+
"step": 980000
|
7254 |
+
},
|
7255 |
+
{
|
7256 |
+
"epoch": 69.69,
|
7257 |
+
"eval_runtime": 537.8359,
|
7258 |
+
"eval_samples_per_second": 163.457,
|
7259 |
+
"eval_steps_per_second": 5.109,
|
7260 |
+
"step": 980000
|
7261 |
}
|
7262 |
],
|
7263 |
"max_steps": 1000000,
|
7264 |
"num_train_epochs": 72,
|
7265 |
+
"total_flos": 3.4348958289582365e+22,
|
7266 |
"trial_name": null,
|
7267 |
"trial_params": null
|
7268 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449474181
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3aff5625e82bdff15b343b27e9d6a9b3efa1fcccca6fcaa49aaf091e12be1731
|
3 |
size 449474181
|