adamjweintraut commited on
Commit
7f44867
1 Parent(s): 449c305

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f04b87a850c8e1978cbcc919472d319a69545cea7249b6850d0c66548817b58c
3
  size 1625545896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30bf6de385d691535d4c1d22fdfb3ba49448e62983d75bf56c54f75c3c1a8b1d
3
  size 1625545896
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:111fc2c7739ba419a0649fff6c653fe1ac707bedea828f93414bbd7128ac5a24
3
  size 3250997519
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbd44a9b8822712623754f153b3e10a8a4606b43a9ed80e250d8fa95644c6bbb
3
  size 3250997519
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b85836ff314397b6e9cda5d18b4e61802951efe26d6873089b5a8ec8a7fb5ae8
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0dee2946d739abedea828dc5f18e5164afb995d6abef97fe5c41e0ee0ce17c
3
+ size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbabefdd804dedc5c232b0c700653c9873b9270f3c9973815bb3914f4fa8dadb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:004fc499cb40e3b895611c295aa6eacd35ec3d64c3847db0861c8802b04353a4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.45187193155288696,
3
- "best_model_checkpoint": "/content/drive/MyDrive/W210 Capstone - Lyric Generation with Melody/loaf/models/kwsylgen/bart/bart-finetuned-kwsylgen-64/checkpoint-500",
4
- "epoch": 0.17940437746681018,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -22,6 +22,141 @@
22
  "eval_samples_per_second": 112.166,
23
  "eval_steps_per_second": 1.757,
24
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
  "logging_steps": 500,
@@ -29,7 +164,7 @@
29
  "num_input_tokens_seen": 0,
30
  "num_train_epochs": 10,
31
  "save_steps": 500,
32
- "total_flos": 4334209204224000.0,
33
  "train_batch_size": 64,
34
  "trial_name": null,
35
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.40889972448349,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/W210 Capstone - Lyric Generation with Melody/loaf/models/kwsylgen/bart/bart-finetuned-kwsylgen-64/checkpoint-5000",
4
+ "epoch": 1.794043774668102,
5
  "eval_steps": 500,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
22
  "eval_samples_per_second": 112.166,
23
  "eval_steps_per_second": 1.757,
24
  "step": 500
25
+ },
26
+ {
27
+ "epoch": 0.36,
28
+ "grad_norm": 0.4796440005302429,
29
+ "learning_rate": 4.82113383566559e-05,
30
+ "loss": 0.4133,
31
+ "step": 1000
32
+ },
33
+ {
34
+ "epoch": 0.36,
35
+ "eval_loss": 0.43858015537261963,
36
+ "eval_runtime": 212.329,
37
+ "eval_samples_per_second": 111.539,
38
+ "eval_steps_per_second": 1.747,
39
+ "step": 1000
40
+ },
41
+ {
42
+ "epoch": 0.54,
43
+ "grad_norm": 1.4028351306915283,
44
+ "learning_rate": 4.731431646932185e-05,
45
+ "loss": 0.3907,
46
+ "step": 1500
47
+ },
48
+ {
49
+ "epoch": 0.54,
50
+ "eval_loss": 0.43201687932014465,
51
+ "eval_runtime": 213.8428,
52
+ "eval_samples_per_second": 110.75,
53
+ "eval_steps_per_second": 1.735,
54
+ "step": 1500
55
+ },
56
+ {
57
+ "epoch": 0.72,
58
+ "grad_norm": 0.6824278235435486,
59
+ "learning_rate": 4.6417294581987804e-05,
60
+ "loss": 0.3752,
61
+ "step": 2000
62
+ },
63
+ {
64
+ "epoch": 0.72,
65
+ "eval_loss": 0.4287361800670624,
66
+ "eval_runtime": 211.3439,
67
+ "eval_samples_per_second": 112.059,
68
+ "eval_steps_per_second": 1.755,
69
+ "step": 2000
70
+ },
71
+ {
72
+ "epoch": 0.9,
73
+ "grad_norm": 0.4877581298351288,
74
+ "learning_rate": 4.5520272694653755e-05,
75
+ "loss": 0.362,
76
+ "step": 2500
77
+ },
78
+ {
79
+ "epoch": 0.9,
80
+ "eval_loss": 0.42700710892677307,
81
+ "eval_runtime": 211.5555,
82
+ "eval_samples_per_second": 111.947,
83
+ "eval_steps_per_second": 1.754,
84
+ "step": 2500
85
+ },
86
+ {
87
+ "epoch": 1.08,
88
+ "grad_norm": 0.5196011066436768,
89
+ "learning_rate": 4.46232508073197e-05,
90
+ "loss": 0.3496,
91
+ "step": 3000
92
+ },
93
+ {
94
+ "epoch": 1.08,
95
+ "eval_loss": 0.4205136299133301,
96
+ "eval_runtime": 217.1988,
97
+ "eval_samples_per_second": 109.038,
98
+ "eval_steps_per_second": 1.708,
99
+ "step": 3000
100
+ },
101
+ {
102
+ "epoch": 1.26,
103
+ "grad_norm": 0.6311954855918884,
104
+ "learning_rate": 4.372622891998565e-05,
105
+ "loss": 0.3413,
106
+ "step": 3500
107
+ },
108
+ {
109
+ "epoch": 1.26,
110
+ "eval_loss": 0.41408446431159973,
111
+ "eval_runtime": 217.6178,
112
+ "eval_samples_per_second": 108.828,
113
+ "eval_steps_per_second": 1.705,
114
+ "step": 3500
115
+ },
116
+ {
117
+ "epoch": 1.44,
118
+ "grad_norm": 0.6342439651489258,
119
+ "learning_rate": 4.2829207032651594e-05,
120
+ "loss": 0.3315,
121
+ "step": 4000
122
+ },
123
+ {
124
+ "epoch": 1.44,
125
+ "eval_loss": 0.4208415150642395,
126
+ "eval_runtime": 221.8644,
127
+ "eval_samples_per_second": 106.745,
128
+ "eval_steps_per_second": 1.672,
129
+ "step": 4000
130
+ },
131
+ {
132
+ "epoch": 1.61,
133
+ "grad_norm": 0.7352110743522644,
134
+ "learning_rate": 4.1932185145317545e-05,
135
+ "loss": 0.3235,
136
+ "step": 4500
137
+ },
138
+ {
139
+ "epoch": 1.61,
140
+ "eval_loss": 0.41450121998786926,
141
+ "eval_runtime": 212.3824,
142
+ "eval_samples_per_second": 111.511,
143
+ "eval_steps_per_second": 1.747,
144
+ "step": 4500
145
+ },
146
+ {
147
+ "epoch": 1.79,
148
+ "grad_norm": 0.7556421160697937,
149
+ "learning_rate": 4.1035163257983496e-05,
150
+ "loss": 0.3171,
151
+ "step": 5000
152
+ },
153
+ {
154
+ "epoch": 1.79,
155
+ "eval_loss": 0.40889972448349,
156
+ "eval_runtime": 213.3008,
157
+ "eval_samples_per_second": 111.031,
158
+ "eval_steps_per_second": 1.739,
159
+ "step": 5000
160
  }
161
  ],
162
  "logging_steps": 500,
 
164
  "num_input_tokens_seen": 0,
165
  "num_train_epochs": 10,
166
  "save_steps": 500,
167
+ "total_flos": 4.334019582571315e+16,
168
  "train_batch_size": 64,
169
  "trial_name": null,
170
  "trial_params": null