sheepy928 commited on
Commit
f82fd37
1 Parent(s): fa1f50d

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dca14abc2231924cd6f53b4a1029a4db1b0e36b9bffb54cd66d4b80c435a97e2
3
- size 4747538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d4db39d5938238acd56dde9187a5e93434a86acab863d24bd3ecad16485eb70
3
+ size 997351674
checkpoint-100/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cc52dc4cca2eddcf0a58171fa91263a8b35d1dca5b277cef6b03ac7f02c0c38
3
  size 498661166
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ac491627d7b469328f54e32f7f1f9e2b671c5716f6b5a2408b041d27185e33b
3
  size 498661166
checkpoint-100/tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 512
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 1,
16
- "pad_type_id": 0,
17
- "pad_token": "<pad>"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
checkpoint-100/trainer_state.json CHANGED
@@ -11,13 +11,13 @@
11
  {
12
  "epoch": 0.05,
13
  "learning_rate": 0.0004995563442768412,
14
- "loss": 1.2913,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.11,
19
  "learning_rate": 0.0004986690328305235,
20
- "loss": 1.1578,
21
  "step": 20
22
  },
23
  {
@@ -25,24 +25,24 @@
25
  "eval_accuracy": 0.7386666666666667,
26
  "eval_combined_score": 0.6626504648943422,
27
  "eval_f1": 0.6276400817995911,
28
- "eval_loss": 0.7813256978988647,
29
  "eval_precision": 0.5456284444444445,
30
  "eval_recall": 0.7386666666666667,
31
- "eval_runtime": 5.7155,
32
- "eval_samples_per_second": 262.444,
33
- "eval_steps_per_second": 8.223,
34
  "step": 20
35
  },
36
  {
37
  "epoch": 0.16,
38
  "learning_rate": 0.0004977817213842058,
39
- "loss": 0.8742,
40
  "step": 30
41
  },
42
  {
43
  "epoch": 0.21,
44
  "learning_rate": 0.0004968944099378882,
45
- "loss": 0.7537,
46
  "step": 40
47
  },
48
  {
@@ -50,24 +50,24 @@
50
  "eval_accuracy": 0.7386666666666667,
51
  "eval_combined_score": 0.6626504648943422,
52
  "eval_f1": 0.6276400817995911,
53
- "eval_loss": 0.792127251625061,
54
  "eval_precision": 0.5456284444444445,
55
  "eval_recall": 0.7386666666666667,
56
- "eval_runtime": 5.6846,
57
- "eval_samples_per_second": 263.869,
58
- "eval_steps_per_second": 8.268,
59
  "step": 40
60
  },
61
  {
62
  "epoch": 0.27,
63
  "learning_rate": 0.0004960070984915705,
64
- "loss": 0.8076,
65
  "step": 50
66
  },
67
  {
68
  "epoch": 0.32,
69
  "learning_rate": 0.0004951197870452529,
70
- "loss": 0.7436,
71
  "step": 60
72
  },
73
  {
@@ -75,24 +75,24 @@
75
  "eval_accuracy": 0.7386666666666667,
76
  "eval_combined_score": 0.6626504648943422,
77
  "eval_f1": 0.6276400817995911,
78
- "eval_loss": 0.7419535517692566,
79
  "eval_precision": 0.5456284444444445,
80
  "eval_recall": 0.7386666666666667,
81
- "eval_runtime": 5.7977,
82
- "eval_samples_per_second": 258.725,
83
- "eval_steps_per_second": 8.107,
84
  "step": 60
85
  },
86
  {
87
  "epoch": 0.37,
88
  "learning_rate": 0.0004942324755989353,
89
- "loss": 0.7465,
90
  "step": 70
91
  },
92
  {
93
  "epoch": 0.43,
94
  "learning_rate": 0.0004933451641526176,
95
- "loss": 0.6516,
96
  "step": 80
97
  },
98
  {
@@ -100,24 +100,24 @@
100
  "eval_accuracy": 0.7386666666666667,
101
  "eval_combined_score": 0.6626504648943422,
102
  "eval_f1": 0.6276400817995911,
103
- "eval_loss": 0.7484750747680664,
104
  "eval_precision": 0.5456284444444445,
105
  "eval_recall": 0.7386666666666667,
106
- "eval_runtime": 5.7051,
107
- "eval_samples_per_second": 262.923,
108
- "eval_steps_per_second": 8.238,
109
  "step": 80
110
  },
111
  {
112
  "epoch": 0.48,
113
  "learning_rate": 0.0004924578527063,
114
- "loss": 0.9634,
115
  "step": 90
116
  },
117
  {
118
  "epoch": 0.53,
119
  "learning_rate": 0.0004915705412599822,
120
- "loss": 0.8011,
121
  "step": 100
122
  },
123
  {
@@ -125,12 +125,12 @@
125
  "eval_accuracy": 0.7386666666666667,
126
  "eval_combined_score": 0.6626504648943422,
127
  "eval_f1": 0.6276400817995911,
128
- "eval_loss": 0.7428026795387268,
129
  "eval_precision": 0.5456284444444445,
130
  "eval_recall": 0.7386666666666667,
131
- "eval_runtime": 5.7042,
132
- "eval_samples_per_second": 262.962,
133
- "eval_steps_per_second": 8.239,
134
  "step": 100
135
  }
136
  ],
 
11
  {
12
  "epoch": 0.05,
13
  "learning_rate": 0.0004995563442768412,
14
+ "loss": 1.3139,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.11,
19
  "learning_rate": 0.0004986690328305235,
20
+ "loss": 1.2812,
21
  "step": 20
22
  },
23
  {
 
25
  "eval_accuracy": 0.7386666666666667,
26
  "eval_combined_score": 0.6626504648943422,
27
  "eval_f1": 0.6276400817995911,
28
+ "eval_loss": 0.8499402403831482,
29
  "eval_precision": 0.5456284444444445,
30
  "eval_recall": 0.7386666666666667,
31
+ "eval_runtime": 125.9807,
32
+ "eval_samples_per_second": 11.907,
33
+ "eval_steps_per_second": 0.373,
34
  "step": 20
35
  },
36
  {
37
  "epoch": 0.16,
38
  "learning_rate": 0.0004977817213842058,
39
+ "loss": 0.832,
40
  "step": 30
41
  },
42
  {
43
  "epoch": 0.21,
44
  "learning_rate": 0.0004968944099378882,
45
+ "loss": 0.8156,
46
  "step": 40
47
  },
48
  {
 
50
  "eval_accuracy": 0.7386666666666667,
51
  "eval_combined_score": 0.6626504648943422,
52
  "eval_f1": 0.6276400817995911,
53
+ "eval_loss": 0.8848057985305786,
54
  "eval_precision": 0.5456284444444445,
55
  "eval_recall": 0.7386666666666667,
56
+ "eval_runtime": 46.3365,
57
+ "eval_samples_per_second": 32.372,
58
+ "eval_steps_per_second": 1.014,
59
  "step": 40
60
  },
61
  {
62
  "epoch": 0.27,
63
  "learning_rate": 0.0004960070984915705,
64
+ "loss": 0.7475,
65
  "step": 50
66
  },
67
  {
68
  "epoch": 0.32,
69
  "learning_rate": 0.0004951197870452529,
70
+ "loss": 0.7478,
71
  "step": 60
72
  },
73
  {
 
75
  "eval_accuracy": 0.7386666666666667,
76
  "eval_combined_score": 0.6626504648943422,
77
  "eval_f1": 0.6276400817995911,
78
+ "eval_loss": 0.7691774964332581,
79
  "eval_precision": 0.5456284444444445,
80
  "eval_recall": 0.7386666666666667,
81
+ "eval_runtime": 6.0551,
82
+ "eval_samples_per_second": 247.726,
83
+ "eval_steps_per_second": 7.762,
84
  "step": 60
85
  },
86
  {
87
  "epoch": 0.37,
88
  "learning_rate": 0.0004942324755989353,
89
+ "loss": 0.7663,
90
  "step": 70
91
  },
92
  {
93
  "epoch": 0.43,
94
  "learning_rate": 0.0004933451641526176,
95
+ "loss": 0.6034,
96
  "step": 80
97
  },
98
  {
 
100
  "eval_accuracy": 0.7386666666666667,
101
  "eval_combined_score": 0.6626504648943422,
102
  "eval_f1": 0.6276400817995911,
103
+ "eval_loss": 0.7553095817565918,
104
  "eval_precision": 0.5456284444444445,
105
  "eval_recall": 0.7386666666666667,
106
+ "eval_runtime": 6.025,
107
+ "eval_samples_per_second": 248.962,
108
+ "eval_steps_per_second": 7.801,
109
  "step": 80
110
  },
111
  {
112
  "epoch": 0.48,
113
  "learning_rate": 0.0004924578527063,
114
+ "loss": 0.9511,
115
  "step": 90
116
  },
117
  {
118
  "epoch": 0.53,
119
  "learning_rate": 0.0004915705412599822,
120
+ "loss": 0.7548,
121
  "step": 100
122
  },
123
  {
 
125
  "eval_accuracy": 0.7386666666666667,
126
  "eval_combined_score": 0.6626504648943422,
127
  "eval_f1": 0.6276400817995911,
128
+ "eval_loss": 0.7768574357032776,
129
  "eval_precision": 0.5456284444444445,
130
  "eval_recall": 0.7386666666666667,
131
+ "eval_runtime": 5.9796,
132
+ "eval_samples_per_second": 250.851,
133
+ "eval_steps_per_second": 7.86,
134
  "step": 100
135
  }
136
  ],
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94f7cf02005ce236de7bb1fda33a06f2c3053bafb778c1f449d2ec5279f3a3ec
3
  size 4472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5774ca6e2ea30af0574f0df2eee8b6bf7c47cb150d6cc3a84e7f1fdeba63d725
3
  size 4472