HealthTeam
commited on
Commit
•
e55d09d
1
Parent(s):
27ca8bc
Training in progress, step 44192
Browse files- last-checkpoint/generation_config.json +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +144 -3
- pytorch_model.bin +1 -1
- runs/Feb14_18-29-07_39730b194efc/events.out.tfevents.1676399391.39730b194efc.3664.0 +2 -2
last-checkpoint/generation_config.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"_from_model_config": true,
|
3 |
"decoder_start_token_id": 0,
|
4 |
"eos_token_id": 1,
|
5 |
-
"max_length":
|
6 |
"pad_token_id": 0,
|
7 |
"transformers_version": "4.26.1"
|
8 |
}
|
|
|
2 |
"_from_model_config": true,
|
3 |
"decoder_start_token_id": 0,
|
4 |
"eos_token_id": 1,
|
5 |
+
"max_length": 1024,
|
6 |
"pad_token_id": 0,
|
7 |
"transformers_version": "4.26.1"
|
8 |
}
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2401461253
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:735d18dafec0eb0d88dc8f14c6729fbeaedd8e691030fc2db5b2412727e9aee7
|
3 |
size 2401461253
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1200739717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34ff48fe0ad9782c0ff82a7ab7cbb2a6863c809cb138abf98a7aebec9da7688d
|
3 |
size 1200739717
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9985ba16d61e95c50d1b9cec08c60d6a3a483569fabd9a37aabc0a1c5f05bc9b
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:932a3fd2a2c6fac0cf60b74f87ec91fc889b1355ae07661a0816ca1fa6dd613f
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -402,11 +402,152 @@
|
|
402 |
"learning_rate": 1.3454523816607659e-05,
|
403 |
"loss": 2.4452,
|
404 |
"step": 33000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
}
|
406 |
],
|
407 |
"max_steps": 100833,
|
408 |
"num_train_epochs": 3,
|
409 |
-
"total_flos":
|
410 |
"trial_name": null,
|
411 |
"trial_params": null
|
412 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.3148076522567016,
|
5 |
+
"global_step": 44192,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
402 |
"learning_rate": 1.3454523816607659e-05,
|
403 |
"loss": 2.4452,
|
404 |
"step": 33000
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"epoch": 1.0,
|
408 |
+
"learning_rate": 1.3355349935041108e-05,
|
409 |
+
"loss": 2.4438,
|
410 |
+
"step": 33500
|
411 |
+
},
|
412 |
+
{
|
413 |
+
"epoch": 1.0,
|
414 |
+
"eval_bleu": 16.655484084216944,
|
415 |
+
"eval_loss": 2.04453706741333,
|
416 |
+
"eval_runtime": 4558.2525,
|
417 |
+
"eval_samples_per_second": 8.87,
|
418 |
+
"eval_steps_per_second": 0.277,
|
419 |
+
"step": 33611
|
420 |
+
},
|
421 |
+
{
|
422 |
+
"epoch": 1.01,
|
423 |
+
"learning_rate": 1.3256176053474558e-05,
|
424 |
+
"loss": 2.44,
|
425 |
+
"step": 34000
|
426 |
+
},
|
427 |
+
{
|
428 |
+
"epoch": 1.03,
|
429 |
+
"learning_rate": 1.3157002171908007e-05,
|
430 |
+
"loss": 2.4188,
|
431 |
+
"step": 34500
|
432 |
+
},
|
433 |
+
{
|
434 |
+
"epoch": 1.04,
|
435 |
+
"learning_rate": 1.3057828290341456e-05,
|
436 |
+
"loss": 2.4264,
|
437 |
+
"step": 35000
|
438 |
+
},
|
439 |
+
{
|
440 |
+
"epoch": 1.06,
|
441 |
+
"learning_rate": 1.2958654408774906e-05,
|
442 |
+
"loss": 2.4337,
|
443 |
+
"step": 35500
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"epoch": 1.07,
|
447 |
+
"learning_rate": 1.2859480527208354e-05,
|
448 |
+
"loss": 2.433,
|
449 |
+
"step": 36000
|
450 |
+
},
|
451 |
+
{
|
452 |
+
"epoch": 1.09,
|
453 |
+
"learning_rate": 1.2760306645641804e-05,
|
454 |
+
"loss": 2.4277,
|
455 |
+
"step": 36500
|
456 |
+
},
|
457 |
+
{
|
458 |
+
"epoch": 1.1,
|
459 |
+
"learning_rate": 1.2661132764075254e-05,
|
460 |
+
"loss": 2.43,
|
461 |
+
"step": 37000
|
462 |
+
},
|
463 |
+
{
|
464 |
+
"epoch": 1.12,
|
465 |
+
"learning_rate": 1.2561958882508702e-05,
|
466 |
+
"loss": 2.4343,
|
467 |
+
"step": 37500
|
468 |
+
},
|
469 |
+
{
|
470 |
+
"epoch": 1.13,
|
471 |
+
"learning_rate": 1.2462785000942152e-05,
|
472 |
+
"loss": 2.4278,
|
473 |
+
"step": 38000
|
474 |
+
},
|
475 |
+
{
|
476 |
+
"epoch": 1.15,
|
477 |
+
"learning_rate": 1.2363611119375602e-05,
|
478 |
+
"loss": 2.4462,
|
479 |
+
"step": 38500
|
480 |
+
},
|
481 |
+
{
|
482 |
+
"epoch": 1.16,
|
483 |
+
"learning_rate": 1.2264437237809052e-05,
|
484 |
+
"loss": 2.4115,
|
485 |
+
"step": 39000
|
486 |
+
},
|
487 |
+
{
|
488 |
+
"epoch": 1.18,
|
489 |
+
"learning_rate": 1.21652633562425e-05,
|
490 |
+
"loss": 2.426,
|
491 |
+
"step": 39500
|
492 |
+
},
|
493 |
+
{
|
494 |
+
"epoch": 1.19,
|
495 |
+
"learning_rate": 1.206608947467595e-05,
|
496 |
+
"loss": 2.4384,
|
497 |
+
"step": 40000
|
498 |
+
},
|
499 |
+
{
|
500 |
+
"epoch": 1.2,
|
501 |
+
"learning_rate": 1.19669155931094e-05,
|
502 |
+
"loss": 2.418,
|
503 |
+
"step": 40500
|
504 |
+
},
|
505 |
+
{
|
506 |
+
"epoch": 1.22,
|
507 |
+
"learning_rate": 1.1867741711542848e-05,
|
508 |
+
"loss": 2.4409,
|
509 |
+
"step": 41000
|
510 |
+
},
|
511 |
+
{
|
512 |
+
"epoch": 1.23,
|
513 |
+
"learning_rate": 1.1768567829976298e-05,
|
514 |
+
"loss": 2.4147,
|
515 |
+
"step": 41500
|
516 |
+
},
|
517 |
+
{
|
518 |
+
"epoch": 1.25,
|
519 |
+
"learning_rate": 1.1669393948409748e-05,
|
520 |
+
"loss": 2.4325,
|
521 |
+
"step": 42000
|
522 |
+
},
|
523 |
+
{
|
524 |
+
"epoch": 1.26,
|
525 |
+
"learning_rate": 1.1570220066843196e-05,
|
526 |
+
"loss": 2.4317,
|
527 |
+
"step": 42500
|
528 |
+
},
|
529 |
+
{
|
530 |
+
"epoch": 1.28,
|
531 |
+
"learning_rate": 1.1471046185276646e-05,
|
532 |
+
"loss": 2.4253,
|
533 |
+
"step": 43000
|
534 |
+
},
|
535 |
+
{
|
536 |
+
"epoch": 1.29,
|
537 |
+
"learning_rate": 1.1371872303710096e-05,
|
538 |
+
"loss": 2.4249,
|
539 |
+
"step": 43500
|
540 |
+
},
|
541 |
+
{
|
542 |
+
"epoch": 1.31,
|
543 |
+
"learning_rate": 1.1272698422143544e-05,
|
544 |
+
"loss": 2.4322,
|
545 |
+
"step": 44000
|
546 |
}
|
547 |
],
|
548 |
"max_steps": 100833,
|
549 |
"num_train_epochs": 3,
|
550 |
+
"total_flos": 1.1901350172465562e+17,
|
551 |
"trial_name": null,
|
552 |
"trial_params": null
|
553 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1200739717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34ff48fe0ad9782c0ff82a7ab7cbb2a6863c809cb138abf98a7aebec9da7688d
|
3 |
size 1200739717
|
runs/Feb14_18-29-07_39730b194efc/events.out.tfevents.1676399391.39730b194efc.3664.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d9dfbf22943453ae081381839560fdfbdba7c8a881a29e0a7f925e30d18ae48
|
3 |
+
size 18672
|