besimray commited on
Commit
fab6de2
1 Parent(s): 9ba18bf

Training in progress, step 280, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05f6ed908b6c0a5b0bf09c0573053a4af39c4c92f5bf61fe05d9df2519c97031
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bedd12fb64e67b0ba54ce9a65703cf9231d8f3947a3a2421c5324ea3c4f4a458
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2e3c04601c84fd5ba7eeb5da4ff1003918f386a2d1833589198a44ba65f0d8d
3
  size 46057338
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13aa5702b220dfabda9ee206f9a7825c21d198ab9f22812c322f93e88794d492
3
  size 46057338
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b90b2977b40565c7b47786677726b21587b954d614cb5eddcc13e2d79ccfddfd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fed915c84bf19420142cd5928514948593dacfab134a8c615244e7726ec07b27
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24163fa211a98380eaaf8162c38702de00babc1b46887461983dfe21c7fd7b23
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62b01704910ed42611ba769dd8b7cb883a0e572e8ef2ce5c29f4f8f6102196ba
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.151181697845459,
3
  "best_model_checkpoint": "miner_id_besimray/checkpoint-80",
4
- "epoch": 5.473684210526316,
5
  "eval_steps": 20,
6
- "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1939,6 +1939,154 @@
1939
  "eval_samples_per_second": 6.625,
1940
  "eval_steps_per_second": 0.662,
1941
  "step": 260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1942
  }
1943
  ],
1944
  "logging_steps": 1,
@@ -1953,7 +2101,7 @@
1953
  "early_stopping_threshold": 0.0
1954
  },
1955
  "attributes": {
1956
- "early_stopping_patience_counter": 9
1957
  }
1958
  },
1959
  "TrainerControl": {
@@ -1962,12 +2110,12 @@
1962
  "should_evaluate": false,
1963
  "should_log": false,
1964
  "should_save": true,
1965
- "should_training_stop": false
1966
  },
1967
  "attributes": {}
1968
  }
1969
  },
1970
- "total_flos": 2.544889321488384e+17,
1971
  "train_batch_size": 10,
1972
  "trial_name": null,
1973
  "trial_params": null
 
1
  {
2
  "best_metric": 1.151181697845459,
3
  "best_model_checkpoint": "miner_id_besimray/checkpoint-80",
4
+ "epoch": 5.894736842105263,
5
  "eval_steps": 20,
6
+ "global_step": 280,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1939
  "eval_samples_per_second": 6.625,
1940
  "eval_steps_per_second": 0.662,
1941
  "step": 260
1942
+ },
1943
+ {
1944
+ "epoch": 5.494736842105263,
1945
+ "grad_norm": 0.541333019733429,
1946
+ "learning_rate": 0.00019861942921134298,
1947
+ "loss": 0.5321,
1948
+ "step": 261
1949
+ },
1950
+ {
1951
+ "epoch": 5.515789473684211,
1952
+ "grad_norm": 0.45463690161705017,
1953
+ "learning_rate": 0.0001986084323691701,
1954
+ "loss": 0.5239,
1955
+ "step": 262
1956
+ },
1957
+ {
1958
+ "epoch": 5.536842105263158,
1959
+ "grad_norm": 0.5732460618019104,
1960
+ "learning_rate": 0.0001985973922101239,
1961
+ "loss": 0.4861,
1962
+ "step": 263
1963
+ },
1964
+ {
1965
+ "epoch": 5.557894736842105,
1966
+ "grad_norm": 0.4361143112182617,
1967
+ "learning_rate": 0.00019858630873905418,
1968
+ "loss": 0.5427,
1969
+ "step": 264
1970
+ },
1971
+ {
1972
+ "epoch": 5.578947368421053,
1973
+ "grad_norm": 0.48954471945762634,
1974
+ "learning_rate": 0.00019857518196082964,
1975
+ "loss": 0.5614,
1976
+ "step": 265
1977
+ },
1978
+ {
1979
+ "epoch": 5.6,
1980
+ "grad_norm": 0.5832586884498596,
1981
+ "learning_rate": 0.0001985640118803381,
1982
+ "loss": 0.4603,
1983
+ "step": 266
1984
+ },
1985
+ {
1986
+ "epoch": 5.621052631578947,
1987
+ "grad_norm": 0.5026202201843262,
1988
+ "learning_rate": 0.0001985527985024864,
1989
+ "loss": 0.6399,
1990
+ "step": 267
1991
+ },
1992
+ {
1993
+ "epoch": 5.6421052631578945,
1994
+ "grad_norm": 0.4579145908355713,
1995
+ "learning_rate": 0.0001985415418322003,
1996
+ "loss": 0.5354,
1997
+ "step": 268
1998
+ },
1999
+ {
2000
+ "epoch": 5.663157894736842,
2001
+ "grad_norm": 0.545054018497467,
2002
+ "learning_rate": 0.00019853024187442472,
2003
+ "loss": 0.5158,
2004
+ "step": 269
2005
+ },
2006
+ {
2007
+ "epoch": 5.684210526315789,
2008
+ "grad_norm": 0.48174452781677246,
2009
+ "learning_rate": 0.00019851889863412345,
2010
+ "loss": 0.5014,
2011
+ "step": 270
2012
+ },
2013
+ {
2014
+ "epoch": 5.705263157894737,
2015
+ "grad_norm": 0.5417779684066772,
2016
+ "learning_rate": 0.00019850751211627945,
2017
+ "loss": 0.54,
2018
+ "step": 271
2019
+ },
2020
+ {
2021
+ "epoch": 5.726315789473684,
2022
+ "grad_norm": 0.46869099140167236,
2023
+ "learning_rate": 0.00019849608232589457,
2024
+ "loss": 0.5416,
2025
+ "step": 272
2026
+ },
2027
+ {
2028
+ "epoch": 5.747368421052632,
2029
+ "grad_norm": 0.6471317410469055,
2030
+ "learning_rate": 0.00019848460926798968,
2031
+ "loss": 0.5962,
2032
+ "step": 273
2033
+ },
2034
+ {
2035
+ "epoch": 5.768421052631579,
2036
+ "grad_norm": 0.5855197310447693,
2037
+ "learning_rate": 0.00019847309294760473,
2038
+ "loss": 0.6314,
2039
+ "step": 274
2040
+ },
2041
+ {
2042
+ "epoch": 5.7894736842105265,
2043
+ "grad_norm": 0.5380208492279053,
2044
+ "learning_rate": 0.00019846153336979856,
2045
+ "loss": 0.5651,
2046
+ "step": 275
2047
+ },
2048
+ {
2049
+ "epoch": 5.810526315789474,
2050
+ "grad_norm": 0.46017733216285706,
2051
+ "learning_rate": 0.00019844993053964917,
2052
+ "loss": 0.5575,
2053
+ "step": 276
2054
+ },
2055
+ {
2056
+ "epoch": 5.831578947368421,
2057
+ "grad_norm": 0.49735313653945923,
2058
+ "learning_rate": 0.00019843828446225342,
2059
+ "loss": 0.5628,
2060
+ "step": 277
2061
+ },
2062
+ {
2063
+ "epoch": 5.852631578947369,
2064
+ "grad_norm": 0.5164270401000977,
2065
+ "learning_rate": 0.0001984265951427272,
2066
+ "loss": 0.5026,
2067
+ "step": 278
2068
+ },
2069
+ {
2070
+ "epoch": 5.873684210526315,
2071
+ "grad_norm": 0.5263252258300781,
2072
+ "learning_rate": 0.00019841486258620545,
2073
+ "loss": 0.5588,
2074
+ "step": 279
2075
+ },
2076
+ {
2077
+ "epoch": 5.894736842105263,
2078
+ "grad_norm": 0.47757405042648315,
2079
+ "learning_rate": 0.00019840308679784207,
2080
+ "loss": 0.5671,
2081
+ "step": 280
2082
+ },
2083
+ {
2084
+ "epoch": 5.894736842105263,
2085
+ "eval_loss": 1.5009753704071045,
2086
+ "eval_runtime": 15.1017,
2087
+ "eval_samples_per_second": 6.622,
2088
+ "eval_steps_per_second": 0.662,
2089
+ "step": 280
2090
  }
2091
  ],
2092
  "logging_steps": 1,
 
2101
  "early_stopping_threshold": 0.0
2102
  },
2103
  "attributes": {
2104
+ "early_stopping_patience_counter": 10
2105
  }
2106
  },
2107
  "TrainerControl": {
 
2110
  "should_evaluate": false,
2111
  "should_log": false,
2112
  "should_save": true,
2113
+ "should_training_stop": true
2114
  },
2115
  "attributes": {}
2116
  }
2117
  },
2118
+ "total_flos": 2.740650038525952e+17,
2119
  "train_batch_size": 10,
2120
  "trial_name": null,
2121
  "trial_params": null