Training in progress, step 990000
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893441093
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa56c6244a84a771cf2984aa63900b7128ad40dd69818d54b918bf9b35954fe3
|
3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449474181
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45e8cc279c772df75f8d61748adcf8cab41db1f2f7207255b9b48edc3dca234c
|
3 |
size 449474181
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d5b60e0a007a854b2858ef033ce2307ff2527680a1a2fca671d1fe620a358a8
|
3 |
+
size 17641
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28cacbced11de35b0d6bed5b65452853709959052dcbd10ec95163f3114bda17
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -7258,11 +7258,85 @@
|
|
7258 |
"eval_samples_per_second": 163.457,
|
7259 |
"eval_steps_per_second": 5.109,
|
7260 |
"step": 980000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7261 |
}
|
7262 |
],
|
7263 |
"max_steps": 1000000,
|
7264 |
"num_train_epochs": 72,
|
7265 |
-
"total_flos": 3.
|
7266 |
"trial_name": null,
|
7267 |
"trial_params": null
|
7268 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 70.40250320011378,
|
5 |
+
"global_step": 990000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
7258 |
"eval_samples_per_second": 163.457,
|
7259 |
"eval_steps_per_second": 5.109,
|
7260 |
"step": 980000
|
7261 |
+
},
|
7262 |
+
{
|
7263 |
+
"epoch": 69.76,
|
7264 |
+
"learning_rate": 1.0064131326081175e-05,
|
7265 |
+
"loss": 0.1074,
|
7266 |
+
"step": 981000
|
7267 |
+
},
|
7268 |
+
{
|
7269 |
+
"epoch": 69.83,
|
7270 |
+
"learning_rate": 1.0057560250235656e-05,
|
7271 |
+
"loss": 0.1075,
|
7272 |
+
"step": 982000
|
7273 |
+
},
|
7274 |
+
{
|
7275 |
+
"epoch": 69.9,
|
7276 |
+
"learning_rate": 1.0051343959713499e-05,
|
7277 |
+
"loss": 0.1075,
|
7278 |
+
"step": 983000
|
7279 |
+
},
|
7280 |
+
{
|
7281 |
+
"epoch": 69.98,
|
7282 |
+
"learning_rate": 1.0045482522495065e-05,
|
7283 |
+
"loss": 0.1075,
|
7284 |
+
"step": 984000
|
7285 |
+
},
|
7286 |
+
{
|
7287 |
+
"epoch": 70.05,
|
7288 |
+
"learning_rate": 1.0039976002680097e-05,
|
7289 |
+
"loss": 0.1076,
|
7290 |
+
"step": 985000
|
7291 |
+
},
|
7292 |
+
{
|
7293 |
+
"epoch": 70.05,
|
7294 |
+
"eval_runtime": 519.6166,
|
7295 |
+
"eval_samples_per_second": 169.188,
|
7296 |
+
"eval_steps_per_second": 5.289,
|
7297 |
+
"step": 985000
|
7298 |
+
},
|
7299 |
+
{
|
7300 |
+
"epoch": 70.12,
|
7301 |
+
"learning_rate": 1.0034824460487e-05,
|
7302 |
+
"loss": 0.1072,
|
7303 |
+
"step": 986000
|
7304 |
+
},
|
7305 |
+
{
|
7306 |
+
"epoch": 70.19,
|
7307 |
+
"learning_rate": 1.003002795225223e-05,
|
7308 |
+
"loss": 0.1071,
|
7309 |
+
"step": 987000
|
7310 |
+
},
|
7311 |
+
{
|
7312 |
+
"epoch": 70.26,
|
7313 |
+
"learning_rate": 1.0025586530429617e-05,
|
7314 |
+
"loss": 0.1076,
|
7315 |
+
"step": 988000
|
7316 |
+
},
|
7317 |
+
{
|
7318 |
+
"epoch": 70.33,
|
7319 |
+
"learning_rate": 1.0021500243589852e-05,
|
7320 |
+
"loss": 0.1073,
|
7321 |
+
"step": 989000
|
7322 |
+
},
|
7323 |
+
{
|
7324 |
+
"epoch": 70.4,
|
7325 |
+
"learning_rate": 1.0017769136419904e-05,
|
7326 |
+
"loss": 0.107,
|
7327 |
+
"step": 990000
|
7328 |
+
},
|
7329 |
+
{
|
7330 |
+
"epoch": 70.4,
|
7331 |
+
"eval_runtime": 526.375,
|
7332 |
+
"eval_samples_per_second": 167.016,
|
7333 |
+
"eval_steps_per_second": 5.221,
|
7334 |
+
"step": 990000
|
7335 |
}
|
7336 |
],
|
7337 |
"max_steps": 1000000,
|
7338 |
"num_train_epochs": 72,
|
7339 |
+
"total_flos": 3.4699457134686187e+22,
|
7340 |
"trial_name": null,
|
7341 |
"trial_params": null
|
7342 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449474181
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45e8cc279c772df75f8d61748adcf8cab41db1f2f7207255b9b48edc3dca234c
|
3 |
size 449474181
|