Training in progress, step 1950000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +383 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24480137122a3ca1298b2aa2acbf1d8e05d75ba9f182abd41ff9618c60e00071
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4158aaedff079b2378ceb72199c920ad399c00fbc03838dbc3a2204ee0d64219
|
3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21579
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a16c585a386790723cc51bc4a838a254dc71110b475f7ebf887ed7011d90a8f
|
3 |
size 21579
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abaeb1638369c701afb9b3b4e706b5c028681adb6ebf26ba2bfe37402d287efd
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2c8322c0057a49117b93f76b6d690bf483c56843cf994e2b3614611effcb47d
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -14446,11 +14446,391 @@
|
|
14446 |
"eval_samples_per_second": 79.467,
|
14447 |
"eval_steps_per_second": 0.621,
|
14448 |
"step": 1900000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14449 |
}
|
14450 |
],
|
14451 |
"max_steps": 2000000,
|
14452 |
"num_train_epochs": 9223372036854775807,
|
14453 |
-
"total_flos": 1.
|
14454 |
"trial_name": null,
|
14455 |
"trial_params": null
|
14456 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.075,
|
5 |
+
"global_step": 1950000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
14446 |
"eval_samples_per_second": 79.467,
|
14447 |
"eval_steps_per_second": 0.621,
|
14448 |
"step": 1900000
|
14449 |
+
},
|
14450 |
+
{
|
14451 |
+
"epoch": 0.05,
|
14452 |
+
"learning_rate": 1.105744066188684e-05,
|
14453 |
+
"loss": 0.4371,
|
14454 |
+
"step": 1901000
|
14455 |
+
},
|
14456 |
+
{
|
14457 |
+
"epoch": 0.05,
|
14458 |
+
"learning_rate": 1.1036390191576373e-05,
|
14459 |
+
"loss": 0.4379,
|
14460 |
+
"step": 1902000
|
14461 |
+
},
|
14462 |
+
{
|
14463 |
+
"epoch": 0.05,
|
14464 |
+
"learning_rate": 1.1015549796381372e-05,
|
14465 |
+
"loss": 0.4373,
|
14466 |
+
"step": 1903000
|
14467 |
+
},
|
14468 |
+
{
|
14469 |
+
"epoch": 0.05,
|
14470 |
+
"learning_rate": 1.0994960590538279e-05,
|
14471 |
+
"loss": 0.4375,
|
14472 |
+
"step": 1904000
|
14473 |
+
},
|
14474 |
+
{
|
14475 |
+
"epoch": 0.05,
|
14476 |
+
"learning_rate": 1.0974540114919287e-05,
|
14477 |
+
"loss": 0.4363,
|
14478 |
+
"step": 1905000
|
14479 |
+
},
|
14480 |
+
{
|
14481 |
+
"epoch": 0.05,
|
14482 |
+
"eval_loss": 0.4150693416595459,
|
14483 |
+
"eval_runtime": 80.8571,
|
14484 |
+
"eval_samples_per_second": 79.152,
|
14485 |
+
"eval_steps_per_second": 0.618,
|
14486 |
+
"step": 1905000
|
14487 |
+
},
|
14488 |
+
{
|
14489 |
+
"epoch": 0.05,
|
14490 |
+
"learning_rate": 1.0954329902821809e-05,
|
14491 |
+
"loss": 0.4375,
|
14492 |
+
"step": 1906000
|
14493 |
+
},
|
14494 |
+
{
|
14495 |
+
"epoch": 0.05,
|
14496 |
+
"learning_rate": 1.0934330015809674e-05,
|
14497 |
+
"loss": 0.437,
|
14498 |
+
"step": 1907000
|
14499 |
+
},
|
14500 |
+
{
|
14501 |
+
"epoch": 0.05,
|
14502 |
+
"learning_rate": 1.0914560199199067e-05,
|
14503 |
+
"loss": 0.4379,
|
14504 |
+
"step": 1908000
|
14505 |
+
},
|
14506 |
+
{
|
14507 |
+
"epoch": 0.05,
|
14508 |
+
"learning_rate": 1.0894980934009906e-05,
|
14509 |
+
"loss": 0.4372,
|
14510 |
+
"step": 1909000
|
14511 |
+
},
|
14512 |
+
{
|
14513 |
+
"epoch": 0.06,
|
14514 |
+
"learning_rate": 1.0875612174693328e-05,
|
14515 |
+
"loss": 0.437,
|
14516 |
+
"step": 1910000
|
14517 |
+
},
|
14518 |
+
{
|
14519 |
+
"epoch": 0.06,
|
14520 |
+
"eval_loss": 0.4164562225341797,
|
14521 |
+
"eval_runtime": 79.4864,
|
14522 |
+
"eval_samples_per_second": 80.517,
|
14523 |
+
"eval_steps_per_second": 0.629,
|
14524 |
+
"step": 1910000
|
14525 |
+
},
|
14526 |
+
{
|
14527 |
+
"epoch": 0.06,
|
14528 |
+
"learning_rate": 1.0856473033247752e-05,
|
14529 |
+
"loss": 0.4369,
|
14530 |
+
"step": 1911000
|
14531 |
+
},
|
14532 |
+
{
|
14533 |
+
"epoch": 0.06,
|
14534 |
+
"learning_rate": 1.0837525251384567e-05,
|
14535 |
+
"loss": 0.4367,
|
14536 |
+
"step": 1912000
|
14537 |
+
},
|
14538 |
+
{
|
14539 |
+
"epoch": 0.06,
|
14540 |
+
"learning_rate": 1.0818806782260748e-05,
|
14541 |
+
"loss": 0.4367,
|
14542 |
+
"step": 1913000
|
14543 |
+
},
|
14544 |
+
{
|
14545 |
+
"epoch": 0.06,
|
14546 |
+
"learning_rate": 1.0800280208492865e-05,
|
14547 |
+
"loss": 0.4379,
|
14548 |
+
"step": 1914000
|
14549 |
+
},
|
14550 |
+
{
|
14551 |
+
"epoch": 0.06,
|
14552 |
+
"learning_rate": 1.0781982639541429e-05,
|
14553 |
+
"loss": 0.4373,
|
14554 |
+
"step": 1915000
|
14555 |
+
},
|
14556 |
+
{
|
14557 |
+
"epoch": 0.06,
|
14558 |
+
"eval_loss": 0.4175663888454437,
|
14559 |
+
"eval_runtime": 80.7249,
|
14560 |
+
"eval_samples_per_second": 79.282,
|
14561 |
+
"eval_steps_per_second": 0.619,
|
14562 |
+
"step": 1915000
|
14563 |
+
},
|
14564 |
+
{
|
14565 |
+
"epoch": 0.06,
|
14566 |
+
"learning_rate": 1.0763895499185767e-05,
|
14567 |
+
"loss": 0.4362,
|
14568 |
+
"step": 1916000
|
14569 |
+
},
|
14570 |
+
{
|
14571 |
+
"epoch": 0.06,
|
14572 |
+
"learning_rate": 1.0746001053331784e-05,
|
14573 |
+
"loss": 0.4367,
|
14574 |
+
"step": 1917000
|
14575 |
+
},
|
14576 |
+
{
|
14577 |
+
"epoch": 0.06,
|
14578 |
+
"learning_rate": 1.0728317567168942e-05,
|
14579 |
+
"loss": 0.4373,
|
14580 |
+
"step": 1918000
|
14581 |
+
},
|
14582 |
+
{
|
14583 |
+
"epoch": 0.06,
|
14584 |
+
"learning_rate": 1.0710845094564199e-05,
|
14585 |
+
"loss": 0.4377,
|
14586 |
+
"step": 1919000
|
14587 |
+
},
|
14588 |
+
{
|
14589 |
+
"epoch": 0.06,
|
14590 |
+
"learning_rate": 1.0693583688741745e-05,
|
14591 |
+
"loss": 0.4364,
|
14592 |
+
"step": 1920000
|
14593 |
+
},
|
14594 |
+
{
|
14595 |
+
"epoch": 0.06,
|
14596 |
+
"eval_loss": 0.4133068919181824,
|
14597 |
+
"eval_runtime": 78.4611,
|
14598 |
+
"eval_samples_per_second": 81.569,
|
14599 |
+
"eval_steps_per_second": 0.637,
|
14600 |
+
"step": 1920000
|
14601 |
+
},
|
14602 |
+
{
|
14603 |
+
"epoch": 0.06,
|
14604 |
+
"learning_rate": 1.0676550347097805e-05,
|
14605 |
+
"loss": 0.4376,
|
14606 |
+
"step": 1921000
|
14607 |
+
},
|
14608 |
+
{
|
14609 |
+
"epoch": 0.06,
|
14610 |
+
"learning_rate": 1.06597110207435e-05,
|
14611 |
+
"loss": 0.437,
|
14612 |
+
"step": 1922000
|
14613 |
+
},
|
14614 |
+
{
|
14615 |
+
"epoch": 0.06,
|
14616 |
+
"learning_rate": 1.0643082916934733e-05,
|
14617 |
+
"loss": 0.4378,
|
14618 |
+
"step": 1923000
|
14619 |
+
},
|
14620 |
+
{
|
14621 |
+
"epoch": 0.06,
|
14622 |
+
"learning_rate": 1.0626682397606544e-05,
|
14623 |
+
"loss": 0.4365,
|
14624 |
+
"step": 1924000
|
14625 |
+
},
|
14626 |
+
{
|
14627 |
+
"epoch": 0.06,
|
14628 |
+
"learning_rate": 1.0610492778999931e-05,
|
14629 |
+
"loss": 0.4366,
|
14630 |
+
"step": 1925000
|
14631 |
+
},
|
14632 |
+
{
|
14633 |
+
"epoch": 0.06,
|
14634 |
+
"eval_loss": 0.41611722111701965,
|
14635 |
+
"eval_runtime": 81.6547,
|
14636 |
+
"eval_samples_per_second": 78.379,
|
14637 |
+
"eval_steps_per_second": 0.612,
|
14638 |
+
"step": 1925000
|
14639 |
+
},
|
14640 |
+
{
|
14641 |
+
"epoch": 0.06,
|
14642 |
+
"learning_rate": 1.059449822137189e-05,
|
14643 |
+
"loss": 0.4372,
|
14644 |
+
"step": 1926000
|
14645 |
+
},
|
14646 |
+
{
|
14647 |
+
"epoch": 0.06,
|
14648 |
+
"learning_rate": 1.0578715084938887e-05,
|
14649 |
+
"loss": 0.4374,
|
14650 |
+
"step": 1927000
|
14651 |
+
},
|
14652 |
+
{
|
14653 |
+
"epoch": 0.06,
|
14654 |
+
"learning_rate": 1.0563143417779096e-05,
|
14655 |
+
"loss": 0.4366,
|
14656 |
+
"step": 1928000
|
14657 |
+
},
|
14658 |
+
{
|
14659 |
+
"epoch": 0.06,
|
14660 |
+
"learning_rate": 1.0547798521808734e-05,
|
14661 |
+
"loss": 0.437,
|
14662 |
+
"step": 1929000
|
14663 |
+
},
|
14664 |
+
{
|
14665 |
+
"epoch": 0.07,
|
14666 |
+
"learning_rate": 1.0532649723266384e-05,
|
14667 |
+
"loss": 0.4365,
|
14668 |
+
"step": 1930000
|
14669 |
+
},
|
14670 |
+
{
|
14671 |
+
"epoch": 0.07,
|
14672 |
+
"eval_loss": 0.4162156581878662,
|
14673 |
+
"eval_runtime": 80.1168,
|
14674 |
+
"eval_samples_per_second": 79.883,
|
14675 |
+
"eval_steps_per_second": 0.624,
|
14676 |
+
"step": 1930000
|
14677 |
+
},
|
14678 |
+
{
|
14679 |
+
"epoch": 0.07,
|
14680 |
+
"learning_rate": 1.0517727365795085e-05,
|
14681 |
+
"loss": 0.4369,
|
14682 |
+
"step": 1931000
|
14683 |
+
},
|
14684 |
+
{
|
14685 |
+
"epoch": 0.07,
|
14686 |
+
"learning_rate": 1.0503001620268975e-05,
|
14687 |
+
"loss": 0.4373,
|
14688 |
+
"step": 1932000
|
14689 |
+
},
|
14690 |
+
{
|
14691 |
+
"epoch": 0.07,
|
14692 |
+
"learning_rate": 1.0488487574652423e-05,
|
14693 |
+
"loss": 0.4374,
|
14694 |
+
"step": 1933000
|
14695 |
+
},
|
14696 |
+
{
|
14697 |
+
"epoch": 0.07,
|
14698 |
+
"learning_rate": 1.0474199469678468e-05,
|
14699 |
+
"loss": 0.437,
|
14700 |
+
"step": 1934000
|
14701 |
+
},
|
14702 |
+
{
|
14703 |
+
"epoch": 0.07,
|
14704 |
+
"learning_rate": 1.0460108744063674e-05,
|
14705 |
+
"loss": 0.4369,
|
14706 |
+
"step": 1935000
|
14707 |
+
},
|
14708 |
+
{
|
14709 |
+
"epoch": 0.07,
|
14710 |
+
"eval_loss": 0.4142652451992035,
|
14711 |
+
"eval_runtime": 77.8865,
|
14712 |
+
"eval_samples_per_second": 82.171,
|
14713 |
+
"eval_steps_per_second": 0.642,
|
14714 |
+
"step": 1935000
|
14715 |
+
},
|
14716 |
+
{
|
14717 |
+
"epoch": 0.07,
|
14718 |
+
"learning_rate": 1.0446243622089129e-05,
|
14719 |
+
"loss": 0.4389,
|
14720 |
+
"step": 1936000
|
14721 |
+
},
|
14722 |
+
{
|
14723 |
+
"epoch": 0.07,
|
14724 |
+
"learning_rate": 1.0432576387995491e-05,
|
14725 |
+
"loss": 0.4371,
|
14726 |
+
"step": 1937000
|
14727 |
+
},
|
14728 |
+
{
|
14729 |
+
"epoch": 0.07,
|
14730 |
+
"learning_rate": 1.0419121068338878e-05,
|
14731 |
+
"loss": 0.4372,
|
14732 |
+
"step": 1938000
|
14733 |
+
},
|
14734 |
+
{
|
14735 |
+
"epoch": 0.07,
|
14736 |
+
"learning_rate": 1.0405877704106532e-05,
|
14737 |
+
"loss": 0.4366,
|
14738 |
+
"step": 1939000
|
14739 |
+
},
|
14740 |
+
{
|
14741 |
+
"epoch": 0.07,
|
14742 |
+
"learning_rate": 1.0392859261103349e-05,
|
14743 |
+
"loss": 0.4355,
|
14744 |
+
"step": 1940000
|
14745 |
+
},
|
14746 |
+
{
|
14747 |
+
"epoch": 0.07,
|
14748 |
+
"eval_loss": 0.4190742075443268,
|
14749 |
+
"eval_runtime": 80.8959,
|
14750 |
+
"eval_samples_per_second": 79.114,
|
14751 |
+
"eval_steps_per_second": 0.618,
|
14752 |
+
"step": 1940000
|
14753 |
+
},
|
14754 |
+
{
|
14755 |
+
"epoch": 0.07,
|
14756 |
+
"learning_rate": 1.0380039716043426e-05,
|
14757 |
+
"loss": 0.4357,
|
14758 |
+
"step": 1941000
|
14759 |
+
},
|
14760 |
+
{
|
14761 |
+
"epoch": 0.07,
|
14762 |
+
"learning_rate": 1.0367432245456347e-05,
|
14763 |
+
"loss": 0.4362,
|
14764 |
+
"step": 1942000
|
14765 |
+
},
|
14766 |
+
{
|
14767 |
+
"epoch": 0.07,
|
14768 |
+
"learning_rate": 1.0355049177141353e-05,
|
14769 |
+
"loss": 0.4362,
|
14770 |
+
"step": 1943000
|
14771 |
+
},
|
14772 |
+
{
|
14773 |
+
"epoch": 0.07,
|
14774 |
+
"learning_rate": 1.0342865757898152e-05,
|
14775 |
+
"loss": 0.437,
|
14776 |
+
"step": 1944000
|
14777 |
+
},
|
14778 |
+
{
|
14779 |
+
"epoch": 0.07,
|
14780 |
+
"learning_rate": 1.0330906391597708e-05,
|
14781 |
+
"loss": 0.4357,
|
14782 |
+
"step": 1945000
|
14783 |
+
},
|
14784 |
+
{
|
14785 |
+
"epoch": 0.07,
|
14786 |
+
"eval_loss": 0.4155297577381134,
|
14787 |
+
"eval_runtime": 77.8064,
|
14788 |
+
"eval_samples_per_second": 82.255,
|
14789 |
+
"eval_steps_per_second": 0.643,
|
14790 |
+
"step": 1945000
|
14791 |
+
},
|
14792 |
+
{
|
14793 |
+
"epoch": 0.07,
|
14794 |
+
"learning_rate": 1.0319147172001108e-05,
|
14795 |
+
"loss": 0.4367,
|
14796 |
+
"step": 1946000
|
14797 |
+
},
|
14798 |
+
{
|
14799 |
+
"epoch": 0.07,
|
14800 |
+
"learning_rate": 1.0307600212366596e-05,
|
14801 |
+
"loss": 0.4355,
|
14802 |
+
"step": 1947000
|
14803 |
+
},
|
14804 |
+
{
|
14805 |
+
"epoch": 0.07,
|
14806 |
+
"learning_rate": 1.029627677647975e-05,
|
14807 |
+
"loss": 0.4367,
|
14808 |
+
"step": 1948000
|
14809 |
+
},
|
14810 |
+
{
|
14811 |
+
"epoch": 0.07,
|
14812 |
+
"learning_rate": 1.0285154229298157e-05,
|
14813 |
+
"loss": 0.4369,
|
14814 |
+
"step": 1949000
|
14815 |
+
},
|
14816 |
+
{
|
14817 |
+
"epoch": 0.07,
|
14818 |
+
"learning_rate": 1.0274244045627054e-05,
|
14819 |
+
"loss": 0.4373,
|
14820 |
+
"step": 1950000
|
14821 |
+
},
|
14822 |
+
{
|
14823 |
+
"epoch": 0.07,
|
14824 |
+
"eval_loss": 0.41703131794929504,
|
14825 |
+
"eval_runtime": 77.2751,
|
14826 |
+
"eval_samples_per_second": 82.821,
|
14827 |
+
"eval_steps_per_second": 0.647,
|
14828 |
+
"step": 1950000
|
14829 |
}
|
14830 |
],
|
14831 |
"max_steps": 2000000,
|
14832 |
"num_train_epochs": 9223372036854775807,
|
14833 |
+
"total_flos": 1.7086938841350144e+22,
|
14834 |
"trial_name": null,
|
14835 |
"trial_params": null
|
14836 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4158aaedff079b2378ceb72199c920ad399c00fbc03838dbc3a2204ee0d64219
|
3 |
size 449471589
|