3v324v23 commited on
Commit
dce943d
β€’
1 Parent(s): d191936

udpate new checkpint 15K

Browse files
.gitattributes CHANGED
@@ -38,3 +38,4 @@ checkpoint-5000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
38
  old/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
39
  checkpoint-10000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
40
  checkpoint-15000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
 
 
38
  old/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
39
  checkpoint-10000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
40
  checkpoint-15000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
41
+ checkpoint-20000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
{checkpoint-15000 β†’ checkpoint-20000}/config.json RENAMED
File without changes
{checkpoint-15000 β†’ checkpoint-20000}/generation_config.json RENAMED
File without changes
{checkpoint-15000 β†’ checkpoint-20000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e164f9fad0641e64a32b9d367cf6e92483eb5cd7df7a4dd42c3ddddc0cadebe1
3
  size 2371333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf89a989d29c28e1adcc3475b1c824fe5329491ea1430eecc8d0e670a8fbcd3
3
  size 2371333
{checkpoint-15000 β†’ checkpoint-20000}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42f1dac77bf4d254203f8f6dd684ada1cf998f26b81addd2e8dd06b2eeab8cd6
3
  size 990408885
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a68794e67b4a15b3adf98c8974799b4e08a27b56f9154e118dd392087fffa56
3
  size 990408885
{checkpoint-15000 β†’ checkpoint-20000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fca6bd4f2027e9f6a64120d4dd9cfacab4778f895ae586fd5c13f7cff62aac59
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1ff062526219ffde93f9f292e9365f17879a9afff6660d569e10c57e0a90df4
3
  size 14575
{checkpoint-15000 β†’ checkpoint-20000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62b793f9ea6eba39185d82063b1e7434411e2aeca1bab5a010024f955d1696b3
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f58a0653df4cbbcf9d6cc03d846193b654e5e1cc8a7d6462c99377d7fbe445ea
3
  size 627
{checkpoint-15000 β†’ checkpoint-20000}/trainer_state.json RENAMED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.8896447467876039,
5
  "eval_steps": 500,
6
- "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -427,13 +427,153 @@
427
  "eval_samples_per_second": 74.876,
428
  "eval_steps_per_second": 37.438,
429
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  }
431
  ],
432
  "logging_steps": 500,
433
  "max_steps": 20000,
434
  "num_train_epochs": 3,
435
  "save_steps": 5000,
436
- "total_flos": 2.66437480937472e+16,
437
  "trial_name": null,
438
  "trial_params": null
439
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.5195263290501386,
5
  "eval_steps": 500,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
427
  "eval_samples_per_second": 74.876,
428
  "eval_steps_per_second": 37.438,
429
  "step": 15000
430
+ },
431
+ {
432
+ "epoch": 1.95,
433
+ "learning_rate": 0.00022500000000000002,
434
+ "loss": 0.5422,
435
+ "step": 15500
436
+ },
437
+ {
438
+ "epoch": 1.95,
439
+ "eval_loss": 0.6929380297660828,
440
+ "eval_runtime": 4.107,
441
+ "eval_samples_per_second": 73.047,
442
+ "eval_steps_per_second": 36.523,
443
+ "step": 15500
444
+ },
445
+ {
446
+ "epoch": 2.02,
447
+ "learning_rate": 0.0002,
448
+ "loss": 0.5104,
449
+ "step": 16000
450
+ },
451
+ {
452
+ "epoch": 2.02,
453
+ "eval_loss": 0.7098422050476074,
454
+ "eval_runtime": 4.0323,
455
+ "eval_samples_per_second": 74.4,
456
+ "eval_steps_per_second": 37.2,
457
+ "step": 16000
458
+ },
459
+ {
460
+ "epoch": 2.08,
461
+ "learning_rate": 0.000175,
462
+ "loss": 0.3835,
463
+ "step": 16500
464
+ },
465
+ {
466
+ "epoch": 2.08,
467
+ "eval_loss": 0.7105218768119812,
468
+ "eval_runtime": 4.0594,
469
+ "eval_samples_per_second": 73.903,
470
+ "eval_steps_per_second": 36.952,
471
+ "step": 16500
472
+ },
473
+ {
474
+ "epoch": 2.14,
475
+ "learning_rate": 0.00015,
476
+ "loss": 0.3805,
477
+ "step": 17000
478
+ },
479
+ {
480
+ "epoch": 2.14,
481
+ "eval_loss": 0.7144222855567932,
482
+ "eval_runtime": 4.0853,
483
+ "eval_samples_per_second": 73.434,
484
+ "eval_steps_per_second": 36.717,
485
+ "step": 17000
486
+ },
487
+ {
488
+ "epoch": 2.2,
489
+ "learning_rate": 0.000125,
490
+ "loss": 0.3718,
491
+ "step": 17500
492
+ },
493
+ {
494
+ "epoch": 2.2,
495
+ "eval_loss": 0.7210414409637451,
496
+ "eval_runtime": 5.0511,
497
+ "eval_samples_per_second": 59.393,
498
+ "eval_steps_per_second": 29.697,
499
+ "step": 17500
500
+ },
501
+ {
502
+ "epoch": 2.27,
503
+ "learning_rate": 0.0001,
504
+ "loss": 0.3688,
505
+ "step": 18000
506
+ },
507
+ {
508
+ "epoch": 2.27,
509
+ "eval_loss": 0.7145898342132568,
510
+ "eval_runtime": 4.7793,
511
+ "eval_samples_per_second": 62.77,
512
+ "eval_steps_per_second": 31.385,
513
+ "step": 18000
514
+ },
515
+ {
516
+ "epoch": 2.33,
517
+ "learning_rate": 7.5e-05,
518
+ "loss": 0.3645,
519
+ "step": 18500
520
+ },
521
+ {
522
+ "epoch": 2.33,
523
+ "eval_loss": 0.7136221528053284,
524
+ "eval_runtime": 4.0171,
525
+ "eval_samples_per_second": 74.681,
526
+ "eval_steps_per_second": 37.34,
527
+ "step": 18500
528
+ },
529
+ {
530
+ "epoch": 2.39,
531
+ "learning_rate": 5e-05,
532
+ "loss": 0.3865,
533
+ "step": 19000
534
+ },
535
+ {
536
+ "epoch": 2.39,
537
+ "eval_loss": 0.7075753808021545,
538
+ "eval_runtime": 3.9658,
539
+ "eval_samples_per_second": 75.646,
540
+ "eval_steps_per_second": 37.823,
541
+ "step": 19000
542
+ },
543
+ {
544
+ "epoch": 2.46,
545
+ "learning_rate": 2.5e-05,
546
+ "loss": 0.3633,
547
+ "step": 19500
548
+ },
549
+ {
550
+ "epoch": 2.46,
551
+ "eval_loss": 0.7097809314727783,
552
+ "eval_runtime": 4.0163,
553
+ "eval_samples_per_second": 74.696,
554
+ "eval_steps_per_second": 37.348,
555
+ "step": 19500
556
+ },
557
+ {
558
+ "epoch": 2.52,
559
+ "learning_rate": 0.0,
560
+ "loss": 0.3674,
561
+ "step": 20000
562
+ },
563
+ {
564
+ "epoch": 2.52,
565
+ "eval_loss": 0.7079904079437256,
566
+ "eval_runtime": 4.0804,
567
+ "eval_samples_per_second": 73.522,
568
+ "eval_steps_per_second": 36.761,
569
+ "step": 20000
570
  }
571
  ],
572
  "logging_steps": 500,
573
  "max_steps": 20000,
574
  "num_train_epochs": 3,
575
  "save_steps": 5000,
576
+ "total_flos": 3.549121832463667e+16,
577
  "trial_name": null,
578
  "trial_params": null
579
  }
{checkpoint-15000 β†’ checkpoint-20000}/training_args.bin RENAMED
File without changes
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:faf89a989d29c28e1adcc3475b1c824fe5329491ea1430eecc8d0e670a8fbcd3
3
  size 2371333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e164f9fad0641e64a32b9d367cf6e92483eb5cd7df7a4dd42c3ddddc0cadebe1
3
  size 2371333
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a68794e67b4a15b3adf98c8974799b4e08a27b56f9154e118dd392087fffa56
3
  size 990408885
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42f1dac77bf4d254203f8f6dd684ada1cf998f26b81addd2e8dd06b2eeab8cd6
3
  size 990408885
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1ff062526219ffde93f9f292e9365f17879a9afff6660d569e10c57e0a90df4
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fca6bd4f2027e9f6a64120d4dd9cfacab4778f895ae586fd5c13f7cff62aac59
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f58a0653df4cbbcf9d6cc03d846193b654e5e1cc8a7d6462c99377d7fbe445ea
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62b793f9ea6eba39185d82063b1e7434411e2aeca1bab5a010024f955d1696b3
3
  size 627
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5195263290501386,
5
  "eval_steps": 500,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -427,153 +427,13 @@
427
  "eval_samples_per_second": 74.876,
428
  "eval_steps_per_second": 37.438,
429
  "step": 15000
430
- },
431
- {
432
- "epoch": 1.95,
433
- "learning_rate": 0.00022500000000000002,
434
- "loss": 0.5422,
435
- "step": 15500
436
- },
437
- {
438
- "epoch": 1.95,
439
- "eval_loss": 0.6929380297660828,
440
- "eval_runtime": 4.107,
441
- "eval_samples_per_second": 73.047,
442
- "eval_steps_per_second": 36.523,
443
- "step": 15500
444
- },
445
- {
446
- "epoch": 2.02,
447
- "learning_rate": 0.0002,
448
- "loss": 0.5104,
449
- "step": 16000
450
- },
451
- {
452
- "epoch": 2.02,
453
- "eval_loss": 0.7098422050476074,
454
- "eval_runtime": 4.0323,
455
- "eval_samples_per_second": 74.4,
456
- "eval_steps_per_second": 37.2,
457
- "step": 16000
458
- },
459
- {
460
- "epoch": 2.08,
461
- "learning_rate": 0.000175,
462
- "loss": 0.3835,
463
- "step": 16500
464
- },
465
- {
466
- "epoch": 2.08,
467
- "eval_loss": 0.7105218768119812,
468
- "eval_runtime": 4.0594,
469
- "eval_samples_per_second": 73.903,
470
- "eval_steps_per_second": 36.952,
471
- "step": 16500
472
- },
473
- {
474
- "epoch": 2.14,
475
- "learning_rate": 0.00015,
476
- "loss": 0.3805,
477
- "step": 17000
478
- },
479
- {
480
- "epoch": 2.14,
481
- "eval_loss": 0.7144222855567932,
482
- "eval_runtime": 4.0853,
483
- "eval_samples_per_second": 73.434,
484
- "eval_steps_per_second": 36.717,
485
- "step": 17000
486
- },
487
- {
488
- "epoch": 2.2,
489
- "learning_rate": 0.000125,
490
- "loss": 0.3718,
491
- "step": 17500
492
- },
493
- {
494
- "epoch": 2.2,
495
- "eval_loss": 0.7210414409637451,
496
- "eval_runtime": 5.0511,
497
- "eval_samples_per_second": 59.393,
498
- "eval_steps_per_second": 29.697,
499
- "step": 17500
500
- },
501
- {
502
- "epoch": 2.27,
503
- "learning_rate": 0.0001,
504
- "loss": 0.3688,
505
- "step": 18000
506
- },
507
- {
508
- "epoch": 2.27,
509
- "eval_loss": 0.7145898342132568,
510
- "eval_runtime": 4.7793,
511
- "eval_samples_per_second": 62.77,
512
- "eval_steps_per_second": 31.385,
513
- "step": 18000
514
- },
515
- {
516
- "epoch": 2.33,
517
- "learning_rate": 7.5e-05,
518
- "loss": 0.3645,
519
- "step": 18500
520
- },
521
- {
522
- "epoch": 2.33,
523
- "eval_loss": 0.7136221528053284,
524
- "eval_runtime": 4.0171,
525
- "eval_samples_per_second": 74.681,
526
- "eval_steps_per_second": 37.34,
527
- "step": 18500
528
- },
529
- {
530
- "epoch": 2.39,
531
- "learning_rate": 5e-05,
532
- "loss": 0.3865,
533
- "step": 19000
534
- },
535
- {
536
- "epoch": 2.39,
537
- "eval_loss": 0.7075753808021545,
538
- "eval_runtime": 3.9658,
539
- "eval_samples_per_second": 75.646,
540
- "eval_steps_per_second": 37.823,
541
- "step": 19000
542
- },
543
- {
544
- "epoch": 2.46,
545
- "learning_rate": 2.5e-05,
546
- "loss": 0.3633,
547
- "step": 19500
548
- },
549
- {
550
- "epoch": 2.46,
551
- "eval_loss": 0.7097809314727783,
552
- "eval_runtime": 4.0163,
553
- "eval_samples_per_second": 74.696,
554
- "eval_steps_per_second": 37.348,
555
- "step": 19500
556
- },
557
- {
558
- "epoch": 2.52,
559
- "learning_rate": 0.0,
560
- "loss": 0.3674,
561
- "step": 20000
562
- },
563
- {
564
- "epoch": 2.52,
565
- "eval_loss": 0.7079904079437256,
566
- "eval_runtime": 4.0804,
567
- "eval_samples_per_second": 73.522,
568
- "eval_steps_per_second": 36.761,
569
- "step": 20000
570
  }
571
  ],
572
  "logging_steps": 500,
573
  "max_steps": 20000,
574
  "num_train_epochs": 3,
575
  "save_steps": 5000,
576
- "total_flos": 3.549121832463667e+16,
577
  "trial_name": null,
578
  "trial_params": null
579
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.8896447467876039,
5
  "eval_steps": 500,
6
+ "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
427
  "eval_samples_per_second": 74.876,
428
  "eval_steps_per_second": 37.438,
429
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  }
431
  ],
432
  "logging_steps": 500,
433
  "max_steps": 20000,
434
  "num_train_epochs": 3,
435
  "save_steps": 5000,
436
+ "total_flos": 2.66437480937472e+16,
437
  "trial_name": null,
438
  "trial_params": null
439
  }