besimray commited on
Commit
fed4a95
1 Parent(s): 4d89d1a

Training in progress, step 53, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e00c7e2ca2f7201416ee0db7548754ff0b7dad3573e16f5759f340b0edf91035
3
  size 22573704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:708c6aadb9f8e545ae09f3bba3ce8a552f118e32dfe7c81df3bcce7d994d683e
3
  size 22573704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80645b0c4593e3ffa5f299466cc99572408a8e260842bda9dc94271f7e380e52
3
  size 11710970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22f988dddbf3a0b9f9e83b72b1af9a63168ed0e2c88f9190a770903f57b79541
3
  size 11710970
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f59a5254780d448320ee839ee90bfa0ce534a45ff625b1caabb810d268fac3ee
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b4464714bdff05cee69b1f8c55cac8fa8134b444386c23de5a63fd21314eaab
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7e76384fe2e1907e44b199e48722ea251cbbcfea1285f875115318fffa6d887
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62186d98e2e1a228b1a77580a02e85af5559d427fe6308d24fc721da049c3720
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.704846203327179,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 2.857142857142857,
5
  "eval_steps": 10,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -405,6 +405,27 @@
405
  "eval_samples_per_second": 7.18,
406
  "eval_steps_per_second": 1.915,
407
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
  }
409
  ],
410
  "logging_steps": 1,
@@ -428,12 +449,12 @@
428
  "should_evaluate": false,
429
  "should_log": false,
430
  "should_save": true,
431
- "should_training_stop": false
432
  },
433
  "attributes": {}
434
  }
435
  },
436
- "total_flos": 1.92436412350464e+16,
437
  "train_batch_size": 4,
438
  "trial_name": null,
439
  "trial_params": null
 
1
  {
2
  "best_metric": 0.704846203327179,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 3.0285714285714285,
5
  "eval_steps": 10,
6
+ "global_step": 53,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
405
  "eval_samples_per_second": 7.18,
406
  "eval_steps_per_second": 1.915,
407
  "step": 50
408
+ },
409
+ {
410
+ "epoch": 2.914285714285714,
411
+ "grad_norm": 0.2923741936683655,
412
+ "learning_rate": 5.328315962444874e-07,
413
+ "loss": 0.6826,
414
+ "step": 51
415
+ },
416
+ {
417
+ "epoch": 2.9714285714285715,
418
+ "grad_norm": 0.29426372051239014,
419
+ "learning_rate": 1.333858168224178e-07,
420
+ "loss": 0.6043,
421
+ "step": 52
422
+ },
423
+ {
424
+ "epoch": 3.0285714285714285,
425
+ "grad_norm": 0.29648035764694214,
426
+ "learning_rate": 0.0,
427
+ "loss": 0.6566,
428
+ "step": 53
429
  }
430
  ],
431
  "logging_steps": 1,
 
449
  "should_evaluate": false,
450
  "should_log": false,
451
  "should_save": true,
452
+ "should_training_stop": true
453
  },
454
  "attributes": {}
455
  }
456
  },
457
+ "total_flos": 2.0398259709149184e+16,
458
  "train_batch_size": 4,
459
  "trial_name": null,
460
  "trial_params": null