[GHA] experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb result notebook & reports

#147
.gitattributes CHANGED
@@ -92,3 +92,4 @@ experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb filter=lfs d
92
  experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage2.ipynb filter=lfs diff=lfs merge=lfs -text
93
  experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage2.ipynb filter=lfs diff=lfs merge=lfs -text
94
  experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb filter=lfs diff=lfs merge=lfs -text
 
 
92
  experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage2.ipynb filter=lfs diff=lfs merge=lfs -text
93
  experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage2.ipynb filter=lfs diff=lfs merge=lfs -text
94
  experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/stage3.ipynb filter=lfs diff=lfs merge=lfs -text
95
+ experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb filter=lfs diff=lfs merge=lfs -text
experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb CHANGED
@@ -1,2450 +1,3 @@
1
- {
2
- "cells": [
3
- {
4
- "attachments": {},
5
- "cell_type": "markdown",
6
- "id": "83b29667",
7
- "metadata": {
8
- "papermill": {
9
- "duration": 0.004177,
10
- "end_time": "2023-09-14T04:09:42.595329",
11
- "exception": false,
12
- "start_time": "2023-09-14T04:09:42.591152",
13
- "status": "completed"
14
- },
15
- "tags": []
16
- },
17
- "source": [
18
- "# RWKV v5\n",
19
- "\n",
20
- "Simple memory training for a small model\n",
21
- "\n",
22
- "**Note:** This project assumes you have the rwkv-infctx conda env setup"
23
- ]
24
- },
25
- {
26
- "attachments": {},
27
- "cell_type": "markdown",
28
- "id": "e81e130e",
29
- "metadata": {
30
- "papermill": {
31
- "duration": 0.00248,
32
- "end_time": "2023-09-14T04:09:42.600743",
33
- "exception": false,
34
- "start_time": "2023-09-14T04:09:42.598263",
35
- "status": "completed"
36
- },
37
- "tags": []
38
- },
39
- "source": [
40
- "# Basic Setup"
41
- ]
42
- },
43
- {
44
- "cell_type": "code",
45
- "execution_count": 1,
46
- "id": "30eb35b6",
47
- "metadata": {
48
- "execution": {
49
- "iopub.execute_input": "2023-09-14T04:09:42.604796Z",
50
- "iopub.status.busy": "2023-09-14T04:09:42.604323Z",
51
- "iopub.status.idle": "2023-09-14T04:09:43.475229Z",
52
- "shell.execute_reply": "2023-09-14T04:09:43.474410Z"
53
- },
54
- "papermill": {
55
- "duration": 0.874952,
56
- "end_time": "2023-09-14T04:09:43.477084",
57
- "exception": false,
58
- "start_time": "2023-09-14T04:09:42.602132",
59
- "status": "completed"
60
- },
61
- "tags": []
62
- },
63
- "outputs": [
64
- {
65
- "name": "stdout",
66
- "output_type": "stream",
67
- "text": [
68
- "CITATION.cff RWKV-v4wavenet\t RWKV-v5headsize2x checkpoint\tnotebook\r\n",
69
- "LICENSE RWKV-v5\t\t RWKV-v5headsize32 datapath\toutput\r\n",
70
- "README.md RWKV-v5-beta2\t RWKV-v5rstack\t docker\r\n",
71
- "RWKV-v4neo RWKV-v5altwavenet RWKV-v5wavenet model\r\n"
72
- ]
73
- }
74
- ],
75
- "source": [
76
- "# First lets setup the various directories, and init the model\n",
77
- "!ls ../../../../../\n",
78
- "!mkdir -p ../../../../../model/\n",
79
- "!mkdir -p ../../../../../datapath/\n",
80
- "!mkdir -p ../../../../../checkpoint/"
81
- ]
82
- },
83
- {
84
- "cell_type": "code",
85
- "execution_count": 2,
86
- "id": "62846b47",
87
- "metadata": {
88
- "execution": {
89
- "iopub.execute_input": "2023-09-14T04:09:43.484470Z",
90
- "iopub.status.busy": "2023-09-14T04:09:43.484106Z",
91
- "iopub.status.idle": "2023-09-14T04:09:45.594235Z",
92
- "shell.execute_reply": "2023-09-14T04:09:45.593478Z"
93
- },
94
- "papermill": {
95
- "duration": 2.115687,
96
- "end_time": "2023-09-14T04:09:45.595985",
97
- "exception": false,
98
- "start_time": "2023-09-14T04:09:43.480298",
99
- "status": "completed"
100
- },
101
- "tags": []
102
- },
103
- "outputs": [
104
- {
105
- "name": "stdout",
106
- "output_type": "stream",
107
- "text": [
108
- "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\r\n",
109
- "\u001b[0m"
110
- ]
111
- }
112
- ],
113
- "source": [
114
- "# Additional dependencies for eval stuff\n",
115
- "!pip3 install -q aiocsv aiofiles"
116
- ]
117
- },
118
- {
119
- "cell_type": "code",
120
- "execution_count": 3,
121
- "id": "8b76286e",
122
- "metadata": {
123
- "execution": {
124
- "iopub.execute_input": "2023-09-14T04:09:45.603947Z",
125
- "iopub.status.busy": "2023-09-14T04:09:45.603582Z",
126
- "iopub.status.idle": "2023-09-14T04:09:45.612565Z",
127
- "shell.execute_reply": "2023-09-14T04:09:45.611873Z"
128
- },
129
- "papermill": {
130
- "duration": 0.01509,
131
- "end_time": "2023-09-14T04:09:45.614344",
132
- "exception": false,
133
- "start_time": "2023-09-14T04:09:45.599254",
134
- "status": "completed"
135
- },
136
- "tags": []
137
- },
138
- "outputs": [
139
- {
140
- "name": "stdout",
141
- "output_type": "stream",
142
- "text": [
143
- "DEEPSPEED_STRAT: deepspeed_stage_1\n",
144
- "ENABLE_WANDB: True\n",
145
- "GPU_DEVICES: auto\n",
146
- "DIR_NAME: L6-D2048-E1e-1-ctx4k\n",
147
- "NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k\n",
148
- "INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n",
149
- "TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5\n",
150
- "PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n"
151
- ]
152
- }
153
- ],
154
- "source": [
155
- "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n",
156
- "GPU_DEVICES=\"auto\"\n",
157
- "ENABLE_WANDB=True\n",
158
- "\n",
159
- "# Layer count and embed dim to start with\n",
160
- "LAYER_COUNT=6\n",
161
- "EMBED_DIM=2048\n",
162
- "\n",
163
- "EMBED_SCALE=0.1\n",
164
- "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n",
165
- "\n",
166
- "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n",
167
- "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n",
168
- "\n",
169
- "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n",
170
- "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n",
171
- "print(\"GPU_DEVICES:\", GPU_DEVICES)\n",
172
- "\n",
173
- "if ENABLE_WANDB:\n",
174
- " WANDB_MODE=\"online\"\n",
175
- "else:\n",
176
- " WANDB_MODE=\"disabled\"\n",
177
- "\n",
178
- "# Computing the notebook, and various paths\n",
179
- "import os\n",
180
- "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n",
181
- "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n",
182
- "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n",
183
- "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
184
- "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
185
- "\n",
186
- "# Get the notebook dir name\n",
187
- "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n",
188
- "\n",
189
- "# Log names and dir\n",
190
- "print(\"DIR_NAME:\", DIR_NAME)\n",
191
- "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n",
192
- "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n",
193
- "print(\"TRAINER_DIR:\", TRAINER_DIR)\n",
194
- "print(\"PROJECT_DIR:\", PROJECT_DIR)"
195
- ]
196
- },
197
- {
198
- "cell_type": "code",
199
- "execution_count": 4,
200
- "id": "bf170d72",
201
- "metadata": {
202
- "execution": {
203
- "iopub.execute_input": "2023-09-14T04:09:45.622383Z",
204
- "iopub.status.busy": "2023-09-14T04:09:45.621825Z"
205
- },
206
- "papermill": {
207
- "duration": null,
208
- "end_time": null,
209
- "exception": false,
210
- "start_time": "2023-09-14T04:09:45.618141",
211
- "status": "running"
212
- },
213
- "tags": []
214
- },
215
- "outputs": [
216
- {
217
- "name": "stdout",
218
- "output_type": "stream",
219
- "text": [
220
- "--2023-09-14 04:09:45-- https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-mem-ctx-512.pth\r\n",
221
- "Resolving huggingface.co (huggingface.co)... 13.33.33.20, 13.33.33.55, 13.33.33.110, ...\r\n",
222
- "Connecting to huggingface.co (huggingface.co)|13.33.33.20|:443... connected.\r\n",
223
- "HTTP request sent, awaiting response... "
224
- ]
225
- },
226
- {
227
- "name": "stdout",
228
- "output_type": "stream",
229
- "text": [
230
- "302 Found\r\n",
231
- "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/92bee66e66bfcba8c592c785b63cb88f4e4889d78d7cdc49c33bd53bf0e3c31f?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-mem-ctx-512.pth%22%3B&Expires=1694923785&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkyMzc4NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzkyYmVlNjZlNjZiZmNiYThjNTkyYzc4NWI2M2NiODhmNGU0ODg5ZDc4ZDdjZGM0OWMzM2JkNTNiZjBlM2MzMWY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AzuHbkYMQgD0pHP1CFX1tdK-cjGb4Y8LvRXLbAmb9kT0d-Ldc2HT5Sr%7EokrLW-0-EumdDAJHoXxv-2KjYIVh702%7EN2hmQ5TxXxSZinQa%7EBXn9iUEVcmUvmqCtQ6b-4WiKBLmLPPzodpRs7hB0oXKrMI0rYgAhw6ue1xCaRlraEb75ZWUio0oP122zwSYVC3pXuhIsFkUUI0mONTWh5r4weCBpAXNgRFgKkR4cSm9yXJb6519Fy-nCqbXrmKDJdami5QOa--SPZ3bBwK7MJdZnMe2Ekl1DKwKEzbIHRiJ1vmap2oVTW3Yj2LjB8qTWbmUSNgc8DLKevXf2GyW9qhXHQ__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
232
- "--2023-09-14 04:09:46-- https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/92bee66e66bfcba8c592c785b63cb88f4e4889d78d7cdc49c33bd53bf0e3c31f?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-mem-ctx-512.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-mem-ctx-512.pth%22%3B&Expires=1694923785&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NDkyMzc4NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzkyYmVlNjZlNjZiZmNiYThjNTkyYzc4NWI2M2NiODhmNGU0ODg5ZDc4ZDdjZGM0OWMzM2JkNTNiZjBlM2MzMWY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=AzuHbkYMQgD0pHP1CFX1tdK-cjGb4Y8LvRXLbAmb9kT0d-Ldc2HT5Sr%7EokrLW-0-EumdDAJHoXxv-2KjYIVh702%7EN2hmQ5TxXxSZinQa%7EBXn9iUEVcmUvmqCtQ6b-4WiKBLmLPPzodpRs7hB0oXKrMI0rYgAhw6ue1xCaRlraEb75ZWUio0oP122zwSYVC3pXuhIsFkUUI0mONTWh5r4weCBpAXNgRFgKkR4cSm9yXJb6519Fy-nCqbXrmKDJdami5QOa--SPZ3bBwK7MJdZnMe2Ekl1DKwKEzbIHRiJ1vmap2oVTW3Yj2LjB8qTWbmUSNgc8DLKevXf2GyW9qhXHQ__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
233
- "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... "
234
- ]
235
- },
236
- {
237
- "name": "stdout",
238
- "output_type": "stream",
239
- "text": [
240
- "18.155.68.73, 18.155.68.98, 18.155.68.94, ...\r\n",
241
- "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|18.155.68.73|:443... connected.\r\n",
242
- "HTTP request sent, awaiting response... "
243
- ]
244
- },
245
- {
246
- "name": "stdout",
247
- "output_type": "stream",
248
- "text": [
249
- "200 OK\r\n",
250
- "Length: 1066537217 (1017M) [binary/octet-stream]\r\n",
251
- "Saving to: ‘v5r3-L6-D2048-E0_1-mem-ctx-512.pth’\r\n",
252
- "\r\n",
253
- "\r",
254
- " v5r3-L6-D 0%[ ] 0 --.-KB/s "
255
- ]
256
- },
257
- {
258
- "name": "stdout",
259
- "output_type": "stream",
260
- "text": [
261
- "\r",
262
- " v5r3-L6-D2 0%[ ] 26.24K 111KB/s "
263
- ]
264
- },
265
- {
266
- "name": "stdout",
267
- "output_type": "stream",
268
- "text": [
269
- "\r",
270
- " v5r3-L6-D20 0%[ ] 60.24K 128KB/s "
271
- ]
272
- },
273
- {
274
- "name": "stdout",
275
- "output_type": "stream",
276
- "text": [
277
- "\r",
278
- " v5r3-L6-D204 0%[ ] 111.24K 157KB/s "
279
- ]
280
- },
281
- {
282
- "name": "stdout",
283
- "output_type": "stream",
284
- "text": [
285
- "\r",
286
- " v5r3-L6-D2048 0%[ ] 170.49K 181KB/s "
287
- ]
288
- },
289
- {
290
- "name": "stdout",
291
- "output_type": "stream",
292
- "text": [
293
- "\r",
294
- " v5r3-L6-D2048- 0%[ ] 217.59K 185KB/s "
295
- ]
296
- },
297
- {
298
- "name": "stdout",
299
- "output_type": "stream",
300
- "text": [
301
- "\r",
302
- " v5r3-L6-D2048-E 0%[ ] 270.33K 191KB/s "
303
- ]
304
- },
305
- {
306
- "name": "stdout",
307
- "output_type": "stream",
308
- "text": [
309
- "\r",
310
- " v5r3-L6-D2048-E0 0%[ ] 323.64K 200KB/s "
311
- ]
312
- },
313
- {
314
- "name": "stdout",
315
- "output_type": "stream",
316
- "text": [
317
- "\r",
318
- " v5r3-L6-D2048-E0_ 0%[ ] 385.17K 204KB/s "
319
- ]
320
- },
321
- {
322
- "name": "stdout",
323
- "output_type": "stream",
324
- "text": [
325
- "\r",
326
- " v5r3-L6-D2048-E0_1 0%[ ] 455.48K 215KB/s "
327
- ]
328
- },
329
- {
330
- "name": "stdout",
331
- "output_type": "stream",
332
- "text": [
333
- "\r",
334
- "v5r3-L6-D2048-E0_1- 0%[ ] 534.00K 226KB/s "
335
- ]
336
- },
337
- {
338
- "name": "stdout",
339
- "output_type": "stream",
340
- "text": [
341
- "\r",
342
- "5r3-L6-D2048-E0_1-m 0%[ ] 595.52K 230KB/s "
343
- ]
344
- },
345
- {
346
- "name": "stdout",
347
- "output_type": "stream",
348
- "text": [
349
- "\r",
350
- "r3-L6-D2048-E0_1-me 0%[ ] 680.84K 241KB/s "
351
- ]
352
- },
353
- {
354
- "name": "stdout",
355
- "output_type": "stream",
356
- "text": [
357
- "\r",
358
- "3-L6-D2048-E0_1-mem 0%[ ] 763.09K 249KB/s eta 69m 41s"
359
- ]
360
- },
361
- {
362
- "name": "stdout",
363
- "output_type": "stream",
364
- "text": [
365
- "\r",
366
- "-L6-D2048-E0_1-mem- 0%[ ] 841.62K 255KB/s eta 69m 41s"
367
- ]
368
- },
369
- {
370
- "name": "stdout",
371
- "output_type": "stream",
372
- "text": [
373
- "\r",
374
- "L6-D2048-E0_1-mem-c 0%[ ] 920.72K 260KB/s eta 69m 41s"
375
- ]
376
- },
377
- {
378
- "name": "stdout",
379
- "output_type": "stream",
380
- "text": [
381
- "\r",
382
- "6-D2048-E0_1-mem-ct 0%[ ] 1018K 270KB/s eta 69m 41s"
383
- ]
384
- },
385
- {
386
- "name": "stdout",
387
- "output_type": "stream",
388
- "text": [
389
- "\r",
390
- "-D2048-E0_1-mem-ctx 0%[ ] 1.08M 276KB/s eta 69m 41s"
391
- ]
392
- },
393
- {
394
- "name": "stdout",
395
- "output_type": "stream",
396
- "text": [
397
- "\r",
398
- "D2048-E0_1-mem-ctx- 0%[ ] 1.18M 285KB/s eta 60m 45s"
399
- ]
400
- },
401
- {
402
- "name": "stdout",
403
- "output_type": "stream",
404
- "text": [
405
- "\r",
406
- "2048-E0_1-mem-ctx-5 0%[ ] 1.28M 293KB/s eta 60m 45s"
407
- ]
408
- },
409
- {
410
- "name": "stdout",
411
- "output_type": "stream",
412
- "text": [
413
- "\r",
414
- "048-E0_1-mem-ctx-51 0%[ ] 1.40M 304KB/s eta 60m 45s"
415
- ]
416
- },
417
- {
418
- "name": "stdout",
419
- "output_type": "stream",
420
- "text": [
421
- "\r",
422
- "48-E0_1-mem-ctx-512 0%[ ] 1.51M 323KB/s eta 60m 45s"
423
- ]
424
- },
425
- {
426
- "name": "stdout",
427
- "output_type": "stream",
428
- "text": [
429
- "\r",
430
- "8-E0_1-mem-ctx-512. 0%[ ] 1.63M 342KB/s eta 60m 45s"
431
- ]
432
- },
433
- {
434
- "name": "stdout",
435
- "output_type": "stream",
436
- "text": [
437
- "\r",
438
- "-E0_1-mem-ctx-512.p 0%[ ] 1.76M 359KB/s eta 52m 1s "
439
- ]
440
- },
441
- {
442
- "name": "stdout",
443
- "output_type": "stream",
444
- "text": [
445
- "\r",
446
- "E0_1-mem-ctx-512.pt 0%[ ] 1.90M 376KB/s eta 52m 1s "
447
- ]
448
- },
449
- {
450
- "name": "stdout",
451
- "output_type": "stream",
452
- "text": [
453
- "\r",
454
- "0_1-mem-ctx-512.pth 0%[ ] 2.03M 395KB/s eta 52m 1s "
455
- ]
456
- },
457
- {
458
- "name": "stdout",
459
- "output_type": "stream",
460
- "text": [
461
- "\r",
462
- "_1-mem-ctx-512.pth 0%[ ] 2.18M 416KB/s eta 52m 1s "
463
- ]
464
- },
465
- {
466
- "name": "stdout",
467
- "output_type": "stream",
468
- "text": [
469
- "\r",
470
- "1-mem-ctx-512.pth 0%[ ] 2.33M 434KB/s eta 52m 1s "
471
- ]
472
- },
473
- {
474
- "name": "stdout",
475
- "output_type": "stream",
476
- "text": [
477
- "\r",
478
- "-mem-ctx-512.pth 0%[ ] 2.51M 464KB/s eta 44m 28s"
479
- ]
480
- },
481
- {
482
- "name": "stdout",
483
- "output_type": "stream",
484
- "text": [
485
- "\r",
486
- "mem-ctx-512.pth 0%[ ] 2.69M 488KB/s eta 44m 28s"
487
- ]
488
- },
489
- {
490
- "name": "stdout",
491
- "output_type": "stream",
492
- "text": [
493
- "\r",
494
- "em-ctx-512.pth 0%[ ] 2.89M 515KB/s eta 44m 28s"
495
- ]
496
- },
497
- {
498
- "name": "stdout",
499
- "output_type": "stream",
500
- "text": [
501
- "\r",
502
- "m-ctx-512.pth 0%[ ] 3.12M 552KB/s eta 44m 28s"
503
- ]
504
- },
505
- {
506
- "name": "stdout",
507
- "output_type": "stream",
508
- "text": [
509
- "\r",
510
- "-ctx-512.pth 0%[ ] 3.36M 585KB/s eta 44m 28s"
511
- ]
512
- },
513
- {
514
- "name": "stdout",
515
- "output_type": "stream",
516
- "text": [
517
- "\r",
518
- "ctx-512.pth 0%[ ] 3.62M 625KB/s eta 36m 17s"
519
- ]
520
- },
521
- {
522
- "name": "stdout",
523
- "output_type": "stream",
524
- "text": [
525
- "\r",
526
- "tx-512.pth 0%[ ] 3.91M 670KB/s eta 36m 17s"
527
- ]
528
- },
529
- {
530
- "name": "stdout",
531
- "output_type": "stream",
532
- "text": [
533
- "\r",
534
- "x-512.pth 0%[ ] 4.22M 721KB/s eta 36m 17s"
535
- ]
536
- },
537
- {
538
- "name": "stdout",
539
- "output_type": "stream",
540
- "text": [
541
- "\r",
542
- "-512.pth 0%[ ] 4.54M 771KB/s eta 36m 17s"
543
- ]
544
- },
545
- {
546
- "name": "stdout",
547
- "output_type": "stream",
548
- "text": [
549
- "\r",
550
- "512.pth 0%[ ] 4.90M 830KB/s eta 36m 17s"
551
- ]
552
- },
553
- {
554
- "name": "stdout",
555
- "output_type": "stream",
556
- "text": [
557
- "\r",
558
- "12.pth 0%[ ] 5.28M 890KB/s eta 28m 36s"
559
- ]
560
- },
561
- {
562
- "name": "stdout",
563
- "output_type": "stream",
564
- "text": [
565
- "\r",
566
- "2.pth 0%[ ] 5.70M 959KB/s eta 28m 36s"
567
- ]
568
- },
569
- {
570
- "name": "stdout",
571
- "output_type": "stream",
572
- "text": [
573
- "\r",
574
- ".pth 0%[ ] 6.13M 1.00MB/s eta 28m 36s"
575
- ]
576
- },
577
- {
578
- "name": "stdout",
579
- "output_type": "stream",
580
- "text": [
581
- "\r",
582
- "pth 0%[ ] 6.61M 1.08MB/s eta 28m 36s"
583
- ]
584
- },
585
- {
586
- "name": "stdout",
587
- "output_type": "stream",
588
- "text": [
589
- "\r",
590
- "th 0%[ ] 7.11M 1.16MB/s eta 28m 36s"
591
- ]
592
- },
593
- {
594
- "name": "stdout",
595
- "output_type": "stream",
596
- "text": [
597
- "\r",
598
- "h 0%[ ] 7.66M 1.25MB/s eta 22m 17s"
599
- ]
600
- },
601
- {
602
- "name": "stdout",
603
- "output_type": "stream",
604
- "text": [
605
- "\r",
606
- " 0%[ ] 8.23M 1.34MB/s eta 22m 17s"
607
- ]
608
- },
609
- {
610
- "name": "stdout",
611
- "output_type": "stream",
612
- "text": [
613
- "\r",
614
- " v 0%[ ] 8.84M 1.44MB/s eta 22m 17s"
615
- ]
616
- },
617
- {
618
- "name": "stdout",
619
- "output_type": "stream",
620
- "text": [
621
- "\r",
622
- " v5 0%[ ] 9.51M 1.56MB/s eta 22m 17s"
623
- ]
624
- },
625
- {
626
- "name": "stdout",
627
- "output_type": "stream",
628
- "text": [
629
- "\r",
630
- " v5r 1%[ ] 10.22M 1.67MB/s eta 22m 17s"
631
- ]
632
- },
633
- {
634
- "name": "stdout",
635
- "output_type": "stream",
636
- "text": [
637
- "\r",
638
- " v5r3 1%[ ] 10.97M 1.79MB/s eta 17m 18s"
639
- ]
640
- },
641
- {
642
- "name": "stdout",
643
- "output_type": "stream",
644
- "text": [
645
- "\r",
646
- " v5r3- 1%[ ] 11.76M 1.92MB/s eta 17m 18s"
647
- ]
648
- },
649
- {
650
- "name": "stdout",
651
- "output_type": "stream",
652
- "text": [
653
- "\r",
654
- " v5r3-L 1%[ ] 12.61M 2.06MB/s eta 17m 18s"
655
- ]
656
- },
657
- {
658
- "name": "stdout",
659
- "output_type": "stream",
660
- "text": [
661
- "\r",
662
- " v5r3-L6 1%[ ] 13.50M 2.20MB/s eta 17m 18s"
663
- ]
664
- },
665
- {
666
- "name": "stdout",
667
- "output_type": "stream",
668
- "text": [
669
- "\r",
670
- " v5r3-L6- 1%[ ] 14.45M 2.35MB/s eta 17m 18s"
671
- ]
672
- },
673
- {
674
- "name": "stdout",
675
- "output_type": "stream",
676
- "text": [
677
- "\r",
678
- " v5r3-L6-D 1%[ ] 15.45M 2.51MB/s eta 13m 30s"
679
- ]
680
- },
681
- {
682
- "name": "stdout",
683
- "output_type": "stream",
684
- "text": [
685
- "\r",
686
- " v5r3-L6-D2 1%[ ] 16.51M 2.67MB/s eta 13m 30s"
687
- ]
688
- },
689
- {
690
- "name": "stdout",
691
- "output_type": "stream",
692
- "text": [
693
- "\r",
694
- " v5r3-L6-D20 1%[ ] 17.62M 2.84MB/s eta 13m 30s"
695
- ]
696
- },
697
- {
698
- "name": "stdout",
699
- "output_type": "stream",
700
- "text": [
701
- "\r",
702
- " v5r3-L6-D204 1%[ ] 18.81M 3.02MB/s eta 13m 30s"
703
- ]
704
- },
705
- {
706
- "name": "stdout",
707
- "output_type": "stream",
708
- "text": [
709
- "\r",
710
- " v5r3-L6-D2048 1%[ ] 20.06M 3.21MB/s eta 13m 30s"
711
- ]
712
- },
713
- {
714
- "name": "stdout",
715
- "output_type": "stream",
716
- "text": [
717
- "\r",
718
- " v5r3-L6-D2048- 2%[ ] 21.37M 3.41MB/s eta 10m 37s"
719
- ]
720
- },
721
- {
722
- "name": "stdout",
723
- "output_type": "stream",
724
- "text": [
725
- "\r",
726
- " v5r3-L6-D2048-E 2%[ ] 22.76M 3.62MB/s eta 10m 37s"
727
- ]
728
- },
729
- {
730
- "name": "stdout",
731
- "output_type": "stream",
732
- "text": [
733
- "\r",
734
- " v5r3-L6-D2048-E0 2%[ ] 24.22M 3.93MB/s eta 10m 37s"
735
- ]
736
- },
737
- {
738
- "name": "stdout",
739
- "output_type": "stream",
740
- "text": [
741
- "\r",
742
- " v5r3-L6-D2048-E0_ 2%[ ] 25.75M 4.16MB/s eta 10m 37s"
743
- ]
744
- },
745
- {
746
- "name": "stdout",
747
- "output_type": "stream",
748
- "text": [
749
- "\r",
750
- " v5r3-L6-D2048-E0_1 2%[ ] 27.36M 4.40MB/s eta 10m 37s"
751
- ]
752
- },
753
- {
754
- "name": "stdout",
755
- "output_type": "stream",
756
- "text": [
757
- "\r",
758
- "v5r3-L6-D2048-E0_1- 2%[ ] 29.04M 4.76MB/s eta 8m 25s "
759
- ]
760
- },
761
- {
762
- "name": "stdout",
763
- "output_type": "stream",
764
- "text": [
765
- "\r",
766
- "5r3-L6-D2048-E0_1-m 2%[ ] 30.06M 4.83MB/s eta 8m 25s "
767
- ]
768
- },
769
- {
770
- "name": "stdout",
771
- "output_type": "stream",
772
- "text": [
773
- "\r",
774
- "r3-L6-D2048-E0_1-me 3%[ ] 32.53M 5.26MB/s eta 8m 25s "
775
- ]
776
- },
777
- {
778
- "name": "stdout",
779
- "output_type": "stream",
780
- "text": [
781
- "\r",
782
- "3-L6-D2048-E0_1-mem 3%[ ] 33.84M 5.39MB/s eta 8m 25s "
783
- ]
784
- },
785
- {
786
- "name": "stdout",
787
- "output_type": "stream",
788
- "text": [
789
- "\r",
790
- "-L6-D2048-E0_1-mem- 3%[ ] 35.19M 5.53MB/s eta 8m 25s "
791
- ]
792
- },
793
- {
794
- "name": "stdout",
795
- "output_type": "stream",
796
- "text": [
797
- "\r",
798
- "L6-D2048-E0_1-mem-c 3%[ ] 36.58M 5.76MB/s eta 7m 10s "
799
- ]
800
- },
801
- {
802
- "name": "stdout",
803
- "output_type": "stream",
804
- "text": [
805
- "\r",
806
- "6-D2048-E0_1-mem-ct 3%[ ] 38.00M 5.88MB/s eta 7m 10s "
807
- ]
808
- },
809
- {
810
- "name": "stdout",
811
- "output_type": "stream",
812
- "text": [
813
- "\r",
814
- "-D2048-E0_1-mem-ctx 3%[ ] 39.45M 5.99MB/s eta 7m 10s "
815
- ]
816
- },
817
- {
818
- "name": "stdout",
819
- "output_type": "stream",
820
- "text": [
821
- "\r",
822
- "D2048-E0_1-mem-ctx- 4%[ ] 40.94M 6.09MB/s eta 7m 10s "
823
- ]
824
- },
825
- {
826
- "name": "stdout",
827
- "output_type": "stream",
828
- "text": [
829
- "\r",
830
- "2048-E0_1-mem-ctx-5 4%[ ] 42.45M 6.19MB/s eta 7m 10s "
831
- ]
832
- },
833
- {
834
- "name": "stdout",
835
- "output_type": "stream",
836
- "text": [
837
- "\r",
838
- "048-E0_1-mem-ctx-51 4%[ ] 44.00M 6.35MB/s eta 6m 21s "
839
- ]
840
- },
841
- {
842
- "name": "stdout",
843
- "output_type": "stream",
844
- "text": [
845
- "\r",
846
- "48-E0_1-mem-ctx-512 4%[ ] 45.56M 6.27MB/s eta 6m 21s "
847
- ]
848
- },
849
- {
850
- "name": "stdout",
851
- "output_type": "stream",
852
- "text": [
853
- "\r",
854
- "8-E0_1-mem-ctx-512. 4%[ ] 47.15M 6.42MB/s eta 6m 21s "
855
- ]
856
- },
857
- {
858
- "name": "stdout",
859
- "output_type": "stream",
860
- "text": [
861
- "\r",
862
- "-E0_1-mem-ctx-512.p 4%[ ] 48.76M 6.51MB/s eta 6m 21s "
863
- ]
864
- },
865
- {
866
- "name": "stdout",
867
- "output_type": "stream",
868
- "text": [
869
- "\r",
870
- "E0_1-mem-ctx-512.pt 4%[ ] 50.33M 6.29MB/s eta 5m 51s "
871
- ]
872
- },
873
- {
874
- "name": "stdout",
875
- "output_type": "stream",
876
- "text": [
877
- "\r",
878
- "0_1-mem-ctx-512.pth 5%[> ] 53.14M 6.47MB/s eta 5m 51s "
879
- ]
880
- },
881
- {
882
- "name": "stdout",
883
- "output_type": "stream",
884
- "text": [
885
- "\r",
886
- "_1-mem-ctx-512.pth 5%[> ] 54.33M 6.25MB/s eta 5m 51s "
887
- ]
888
- },
889
- {
890
- "name": "stdout",
891
- "output_type": "stream",
892
- "text": [
893
- "\r",
894
- "1-mem-ctx-512.pth 5%[> ] 55.53M 6.40MB/s eta 5m 51s "
895
- ]
896
- },
897
- {
898
- "name": "stdout",
899
- "output_type": "stream",
900
- "text": [
901
- "\r",
902
- "-mem-ctx-512.pth 5%[> ] 56.75M 6.47MB/s eta 5m 51s "
903
- ]
904
- },
905
- {
906
- "name": "stdout",
907
- "output_type": "stream",
908
- "text": [
909
- "\r",
910
- "mem-ctx-512.pth 5%[> ] 57.98M 6.05MB/s eta 5m 22s "
911
- ]
912
- },
913
- {
914
- "name": "stdout",
915
- "output_type": "stream",
916
- "text": [
917
- "\r",
918
- "em-ctx-512.pth 5%[> ] 59.25M 5.96MB/s eta 5m 22s "
919
- ]
920
- },
921
- {
922
- "name": "stdout",
923
- "output_type": "stream",
924
- "text": [
925
- "\r",
926
- "m-ctx-512.pth 5%[> ] 60.51M 6.06MB/s eta 5m 22s "
927
- ]
928
- },
929
- {
930
- "name": "stdout",
931
- "output_type": "stream",
932
- "text": [
933
- "\r",
934
- "-ctx-512.pth 6%[> ] 61.79M 6.09MB/s eta 5m 22s "
935
- ]
936
- },
937
- {
938
- "name": "stdout",
939
- "output_type": "stream",
940
- "text": [
941
- "\r",
942
- "ctx-512.pth 6%[> ] 63.09M 6.06MB/s eta 5m 22s "
943
- ]
944
- },
945
- {
946
- "name": "stdout",
947
- "output_type": "stream",
948
- "text": [
949
- "\r",
950
- "tx-512.pth 6%[> ] 64.40M 5.88MB/s eta 5m 5s "
951
- ]
952
- },
953
- {
954
- "name": "stdout",
955
- "output_type": "stream",
956
- "text": [
957
- "\r",
958
- "x-512.pth 6%[> ] 65.73M 5.76MB/s eta 5m 5s "
959
- ]
960
- },
961
- {
962
- "name": "stdout",
963
- "output_type": "stream",
964
- "text": [
965
- "\r",
966
- "-512.pth 6%[> ] 67.06M 5.90MB/s eta 5m 5s "
967
- ]
968
- },
969
- {
970
- "name": "stdout",
971
- "output_type": "stream",
972
- "text": [
973
- "\r",
974
- "512.pth 6%[> ] 68.40M 5.89MB/s eta 5m 5s "
975
- ]
976
- },
977
- {
978
- "name": "stdout",
979
- "output_type": "stream",
980
- "text": [
981
- "\r",
982
- "12.pth 6%[> ] 69.76M 5.57MB/s eta 5m 5s "
983
- ]
984
- },
985
- {
986
- "name": "stdout",
987
- "output_type": "stream",
988
- "text": [
989
- "\r",
990
- "2.pth 6%[> ] 71.12M 5.78MB/s eta 4m 50s "
991
- ]
992
- },
993
- {
994
- "name": "stdout",
995
- "output_type": "stream",
996
- "text": [
997
- "\r",
998
- ".pth 7%[> ] 72.48M 5.87MB/s eta 4m 50s "
999
- ]
1000
- },
1001
- {
1002
- "name": "stdout",
1003
- "output_type": "stream",
1004
- "text": [
1005
- "\r",
1006
- "pth 7%[> ] 73.86M 5.49MB/s eta 4m 50s "
1007
- ]
1008
- },
1009
- {
1010
- "name": "stdout",
1011
- "output_type": "stream",
1012
- "text": [
1013
- "\r",
1014
- "th 7%[> ] 75.23M 5.67MB/s eta 4m 50s "
1015
- ]
1016
- },
1017
- {
1018
- "name": "stdout",
1019
- "output_type": "stream",
1020
- "text": [
1021
- "\r",
1022
- "h 7%[> ] 76.62M 5.72MB/s eta 4m 50s "
1023
- ]
1024
- },
1025
- {
1026
- "name": "stdout",
1027
- "output_type": "stream",
1028
- "text": [
1029
- "\r",
1030
- " 7%[> ] 78.01M 5.64MB/s eta 4m 37s "
1031
- ]
1032
- },
1033
- {
1034
- "name": "stdout",
1035
- "output_type": "stream",
1036
- "text": [
1037
- "\r",
1038
- " v 7%[> ] 79.40M 5.62MB/s eta 4m 37s "
1039
- ]
1040
- },
1041
- {
1042
- "name": "stdout",
1043
- "output_type": "stream",
1044
- "text": [
1045
- "\r",
1046
- " v5 7%[> ] 80.79M 5.81MB/s eta 4m 37s "
1047
- ]
1048
- },
1049
- {
1050
- "name": "stdout",
1051
- "output_type": "stream",
1052
- "text": [
1053
- "\r",
1054
- " v5r 8%[> ] 82.19M 5.88MB/s eta 4m 37s "
1055
- ]
1056
- },
1057
- {
1058
- "name": "stdout",
1059
- "output_type": "stream",
1060
- "text": [
1061
- "\r",
1062
- " v5r3 8%[> ] 83.59M 5.80MB/s eta 4m 37s "
1063
- ]
1064
- },
1065
- {
1066
- "name": "stdout",
1067
- "output_type": "stream",
1068
- "text": [
1069
- "\r",
1070
- " v5r3- 8%[> ] 84.98M 5.81MB/s eta 4m 25s "
1071
- ]
1072
- },
1073
- {
1074
- "name": "stdout",
1075
- "output_type": "stream",
1076
- "text": [
1077
- "\r",
1078
- " v5r3-L 8%[> ] 86.39M 5.72MB/s eta 4m 25s "
1079
- ]
1080
- },
1081
- {
1082
- "name": "stdout",
1083
- "output_type": "stream",
1084
- "text": [
1085
- "\r",
1086
- " v5r3-L6 8%[> ] 87.79M 6.01MB/s eta 4m 25s "
1087
- ]
1088
- },
1089
- {
1090
- "name": "stdout",
1091
- "output_type": "stream",
1092
- "text": [
1093
- "\r",
1094
- " v5r3-L6- 8%[> ] 89.20M 5.89MB/s eta 4m 25s "
1095
- ]
1096
- },
1097
- {
1098
- "name": "stdout",
1099
- "output_type": "stream",
1100
- "text": [
1101
- "\r",
1102
- " v5r3-L6-D 8%[> ] 90.61M 5.99MB/s eta 4m 25s "
1103
- ]
1104
- },
1105
- {
1106
- "name": "stdout",
1107
- "output_type": "stream",
1108
- "text": [
1109
- "\r",
1110
- " v5r3-L6-D2 9%[> ] 92.01M 5.98MB/s eta 4m 15s "
1111
- ]
1112
- },
1113
- {
1114
- "name": "stdout",
1115
- "output_type": "stream",
1116
- "text": [
1117
- "\r",
1118
- " v5r3-L6-D20 9%[> ] 93.42M 5.91MB/s eta 4m 15s "
1119
- ]
1120
- },
1121
- {
1122
- "name": "stdout",
1123
- "output_type": "stream",
1124
- "text": [
1125
- "\r",
1126
- " v5r3-L6-D204 9%[> ] 94.83M 5.80MB/s eta 4m 15s "
1127
- ]
1128
- },
1129
- {
1130
- "name": "stdout",
1131
- "output_type": "stream",
1132
- "text": [
1133
- "\r",
1134
- " v5r3-L6-D2048 9%[> ] 96.22M 6.05MB/s eta 4m 15s "
1135
- ]
1136
- },
1137
- {
1138
- "name": "stdout",
1139
- "output_type": "stream",
1140
- "text": [
1141
- "\r",
1142
- " v5r3-L6-D2048- 9%[> ] 97.62M 5.93MB/s eta 4m 15s "
1143
- ]
1144
- },
1145
- {
1146
- "name": "stdout",
1147
- "output_type": "stream",
1148
- "text": [
1149
- "\r",
1150
- " v5r3-L6-D2048-E 9%[> ] 99.03M 5.88MB/s eta 4m 6s "
1151
- ]
1152
- },
1153
- {
1154
- "name": "stdout",
1155
- "output_type": "stream",
1156
- "text": [
1157
- "\r",
1158
- " v5r3-L6-D2048-E0 9%[> ] 100.44M 6.01MB/s eta 4m 6s "
1159
- ]
1160
- },
1161
- {
1162
- "name": "stdout",
1163
- "output_type": "stream",
1164
- "text": [
1165
- "\r",
1166
- " v5r3-L6-D2048-E0_ 10%[=> ] 101.84M 5.95MB/s eta 4m 6s "
1167
- ]
1168
- },
1169
- {
1170
- "name": "stdout",
1171
- "output_type": "stream",
1172
- "text": [
1173
- "\r",
1174
- " v5r3-L6-D2048-E0_1 10%[=> ] 103.25M 5.89MB/s eta 4m 6s "
1175
- ]
1176
- },
1177
- {
1178
- "name": "stdout",
1179
- "output_type": "stream",
1180
- "text": [
1181
- "\r",
1182
- "v5r3-L6-D2048-E0_1- 10%[=> ] 104.65M 6.02MB/s eta 4m 6s "
1183
- ]
1184
- },
1185
- {
1186
- "name": "stdout",
1187
- "output_type": "stream",
1188
- "text": [
1189
- "\r",
1190
- "5r3-L6-D2048-E0_1-m 10%[=> ] 106.06M 5.95MB/s eta 3m 58s "
1191
- ]
1192
- },
1193
- {
1194
- "name": "stdout",
1195
- "output_type": "stream",
1196
- "text": [
1197
- "\r",
1198
- "r3-L6-D2048-E0_1-me 10%[=> ] 107.47M 5.96MB/s eta 3m 58s "
1199
- ]
1200
- },
1201
- {
1202
- "name": "stdout",
1203
- "output_type": "stream",
1204
- "text": [
1205
- "\r",
1206
- "3-L6-D2048-E0_1-mem 10%[=> ] 108.87M 6.08MB/s eta 3m 58s "
1207
- ]
1208
- },
1209
- {
1210
- "name": "stdout",
1211
- "output_type": "stream",
1212
- "text": [
1213
- "\r",
1214
- "-L6-D2048-E0_1-mem- 10%[=> ] 110.28M 5.96MB/s eta 3m 58s "
1215
- ]
1216
- },
1217
- {
1218
- "name": "stdout",
1219
- "output_type": "stream",
1220
- "text": [
1221
- "\r",
1222
- "L6-D2048-E0_1-mem-c 10%[=> ] 111.70M 5.85MB/s eta 3m 58s "
1223
- ]
1224
- },
1225
- {
1226
- "name": "stdout",
1227
- "output_type": "stream",
1228
- "text": [
1229
- "\r",
1230
- "6-D2048-E0_1-mem-ct 11%[=> ] 113.11M 6.08MB/s eta 3m 51s "
1231
- ]
1232
- },
1233
- {
1234
- "name": "stdout",
1235
- "output_type": "stream",
1236
- "text": [
1237
- "\r",
1238
- "-D2048-E0_1-mem-ctx 11%[=> ] 114.53M 5.85MB/s eta 3m 51s "
1239
- ]
1240
- },
1241
- {
1242
- "name": "stdout",
1243
- "output_type": "stream",
1244
- "text": [
1245
- "\r",
1246
- "D2048-E0_1-mem-ctx- 11%[=> ] 115.95M 6.10MB/s eta 3m 51s "
1247
- ]
1248
- },
1249
- {
1250
- "name": "stdout",
1251
- "output_type": "stream",
1252
- "text": [
1253
- "\r",
1254
- "2048-E0_1-mem-ctx-5 11%[=> ] 117.39M 6.12MB/s eta 3m 51s "
1255
- ]
1256
- },
1257
- {
1258
- "name": "stdout",
1259
- "output_type": "stream",
1260
- "text": [
1261
- "\r",
1262
- "048-E0_1-mem-ctx-51 11%[=> ] 118.81M 6.13MB/s eta 3m 51s "
1263
- ]
1264
- },
1265
- {
1266
- "name": "stdout",
1267
- "output_type": "stream",
1268
- "text": [
1269
- "\r",
1270
- "48-E0_1-mem-ctx-512 11%[=> ] 120.25M 6.01MB/s eta 3m 44s "
1271
- ]
1272
- },
1273
- {
1274
- "name": "stdout",
1275
- "output_type": "stream",
1276
- "text": [
1277
- "\r",
1278
- "8-E0_1-mem-ctx-512. 11%[=> ] 121.69M 5.94MB/s eta 3m 44s "
1279
- ]
1280
- },
1281
- {
1282
- "name": "stdout",
1283
- "output_type": "stream",
1284
- "text": [
1285
- "\r",
1286
- "-E0_1-mem-ctx-512.p 12%[=> ] 123.14M 6.17MB/s eta 3m 44s "
1287
- ]
1288
- },
1289
- {
1290
- "name": "stdout",
1291
- "output_type": "stream",
1292
- "text": [
1293
- "\r",
1294
- "E0_1-mem-ctx-512.pt 12%[=> ] 124.59M 6.04MB/s eta 3m 44s "
1295
- ]
1296
- },
1297
- {
1298
- "name": "stdout",
1299
- "output_type": "stream",
1300
- "text": [
1301
- "\r",
1302
- "0_1-mem-ctx-512.pth 12%[=> ] 126.06M 5.99MB/s eta 3m 44s "
1303
- ]
1304
- },
1305
- {
1306
- "name": "stdout",
1307
- "output_type": "stream",
1308
- "text": [
1309
- "\r",
1310
- "_1-mem-ctx-512.pth 12%[=> ] 127.54M 6.22MB/s eta 3m 38s "
1311
- ]
1312
- },
1313
- {
1314
- "name": "stdout",
1315
- "output_type": "stream",
1316
- "text": [
1317
- "\r",
1318
- "1-mem-ctx-512.pth 12%[=> ] 129.03M 6.11MB/s eta 3m 38s "
1319
- ]
1320
- },
1321
- {
1322
- "name": "stdout",
1323
- "output_type": "stream",
1324
- "text": [
1325
- "\r",
1326
- "-mem-ctx-512.pth 12%[=> ] 130.53M 6.12MB/s eta 3m 38s "
1327
- ]
1328
- },
1329
- {
1330
- "name": "stdout",
1331
- "output_type": "stream",
1332
- "text": [
1333
- "\r",
1334
- "mem-ctx-512.pth 12%[=> ] 132.04M 6.03MB/s eta 3m 38s "
1335
- ]
1336
- },
1337
- {
1338
- "name": "stdout",
1339
- "output_type": "stream",
1340
- "text": [
1341
- "\r",
1342
- "em-ctx-512.pth 13%[=> ] 133.56M 6.34MB/s eta 3m 38s "
1343
- ]
1344
- },
1345
- {
1346
- "name": "stdout",
1347
- "output_type": "stream",
1348
- "text": [
1349
- "\r",
1350
- "m-ctx-512.pth 13%[=> ] 135.11M 6.23MB/s eta 3m 32s "
1351
- ]
1352
- },
1353
- {
1354
- "name": "stdout",
1355
- "output_type": "stream",
1356
- "text": [
1357
- "\r",
1358
- "-ctx-512.pth 13%[=> ] 136.67M 6.14MB/s eta 3m 32s "
1359
- ]
1360
- },
1361
- {
1362
- "name": "stdout",
1363
- "output_type": "stream",
1364
- "text": [
1365
- "\r",
1366
- "ctx-512.pth 13%[=> ] 138.25M 6.44MB/s eta 3m 32s "
1367
- ]
1368
- },
1369
- {
1370
- "name": "stdout",
1371
- "output_type": "stream",
1372
- "text": [
1373
- "\r",
1374
- "tx-512.pth 13%[=> ] 139.84M 6.35MB/s eta 3m 32s "
1375
- ]
1376
- },
1377
- {
1378
- "name": "stdout",
1379
- "output_type": "stream",
1380
- "text": [
1381
- "\r",
1382
- "x-512.pth 13%[=> ] 141.45M 6.54MB/s eta 3m 32s "
1383
- ]
1384
- },
1385
- {
1386
- "name": "stdout",
1387
- "output_type": "stream",
1388
- "text": [
1389
- "\r",
1390
- "-512.pth 14%[=> ] 143.09M 6.46MB/s eta 3m 25s "
1391
- ]
1392
- },
1393
- {
1394
- "name": "stdout",
1395
- "output_type": "stream",
1396
- "text": [
1397
- "\r",
1398
- "512.pth 14%[=> ] 144.73M 6.38MB/s eta 3m 25s "
1399
- ]
1400
- },
1401
- {
1402
- "name": "stdout",
1403
- "output_type": "stream",
1404
- "text": [
1405
- "\r",
1406
- "12.pth 14%[=> ] 146.42M 6.69MB/s eta 3m 25s "
1407
- ]
1408
- },
1409
- {
1410
- "name": "stdout",
1411
- "output_type": "stream",
1412
- "text": [
1413
- "\r",
1414
- "2.pth 14%[=> ] 148.12M 6.54MB/s eta 3m 25s "
1415
- ]
1416
- },
1417
- {
1418
- "name": "stdout",
1419
- "output_type": "stream",
1420
- "text": [
1421
- "\r",
1422
- ".pth 14%[=> ] 149.86M 6.88MB/s eta 3m 25s "
1423
- ]
1424
- },
1425
- {
1426
- "name": "stdout",
1427
- "output_type": "stream",
1428
- "text": [
1429
- "\r",
1430
- "pth 14%[=> ] 151.62M 6.96MB/s eta 3m 19s "
1431
- ]
1432
- },
1433
- {
1434
- "name": "stdout",
1435
- "output_type": "stream",
1436
- "text": [
1437
- "\r",
1438
- "th 15%[==> ] 153.42M 6.89MB/s eta 3m 19s "
1439
- ]
1440
- },
1441
- {
1442
- "name": "stdout",
1443
- "output_type": "stream",
1444
- "text": [
1445
- "\r",
1446
- "h 15%[==> ] 155.23M 6.93MB/s eta 3m 19s "
1447
- ]
1448
- },
1449
- {
1450
- "name": "stdout",
1451
- "output_type": "stream",
1452
- "text": [
1453
- "\r",
1454
- " 15%[==> ] 157.08M 7.30MB/s eta 3m 19s "
1455
- ]
1456
- },
1457
- {
1458
- "name": "stdout",
1459
- "output_type": "stream",
1460
- "text": [
1461
- "\r",
1462
- " v 15%[==> ] 158.95M 7.33MB/s eta 3m 19s "
1463
- ]
1464
- },
1465
- {
1466
- "name": "stdout",
1467
- "output_type": "stream",
1468
- "text": [
1469
- "\r",
1470
- " v5 15%[==> ] 160.19M 7.17MB/s eta 3m 12s "
1471
- ]
1472
- },
1473
- {
1474
- "name": "stdout",
1475
- "output_type": "stream",
1476
- "text": [
1477
- "\r",
1478
- " v5r 15%[==> ] 161.01M 7.14MB/s eta 3m 12s "
1479
- ]
1480
- },
1481
- {
1482
- "name": "stdout",
1483
- "output_type": "stream",
1484
- "text": [
1485
- "\r",
1486
- " v5r3 16%[==> ] 162.95M 7.14MB/s eta 3m 12s "
1487
- ]
1488
- },
1489
- {
1490
- "name": "stdout",
1491
- "output_type": "stream",
1492
- "text": [
1493
- "\r",
1494
- " v5r3- 16%[==> ] 164.95M 7.63MB/s eta 3m 12s "
1495
- ]
1496
- },
1497
- {
1498
- "name": "stdout",
1499
- "output_type": "stream",
1500
- "text": [
1501
- "\r",
1502
- " v5r3-L 16%[==> ] 167.01M 7.54MB/s eta 3m 12s "
1503
- ]
1504
- },
1505
- {
1506
- "name": "stdout",
1507
- "output_type": "stream",
1508
- "text": [
1509
- "\r",
1510
- " v5r3-L6 16%[==> ] 169.09M 7.62MB/s eta 3m 5s "
1511
- ]
1512
- },
1513
- {
1514
- "name": "stdout",
1515
- "output_type": "stream",
1516
- "text": [
1517
- "\r",
1518
- " v5r3-L6- 16%[==> ] 171.23M 8.09MB/s eta 3m 5s "
1519
- ]
1520
- },
1521
- {
1522
- "name": "stdout",
1523
- "output_type": "stream",
1524
- "text": [
1525
- "\r",
1526
- " v5r3-L6-D 17%[==> ] 173.42M 7.87MB/s eta 3m 5s "
1527
- ]
1528
- },
1529
- {
1530
- "name": "stdout",
1531
- "output_type": "stream",
1532
- "text": [
1533
- "\r",
1534
- " v5r3-L6-D2 17%[==> ] 175.65M 8.43MB/s eta 3m 5s "
1535
- ]
1536
- },
1537
- {
1538
- "name": "stdout",
1539
- "output_type": "stream",
1540
- "text": [
1541
- "\r",
1542
- " v5r3-L6-D20 17%[==> ] 177.94M 8.48MB/s eta 3m 5s "
1543
- ]
1544
- },
1545
- {
1546
- "name": "stdout",
1547
- "output_type": "stream",
1548
- "text": [
1549
- "\r",
1550
- " v5r3-L6-D204 17%[==> ] 180.28M 8.42MB/s eta 2m 57s "
1551
- ]
1552
- },
1553
- {
1554
- "name": "stdout",
1555
- "output_type": "stream",
1556
- "text": [
1557
- "\r",
1558
- " v5r3-L6-D2048 17%[==> ] 182.67M 8.93MB/s eta 2m 57s "
1559
- ]
1560
- },
1561
- {
1562
- "name": "stdout",
1563
- "output_type": "stream",
1564
- "text": [
1565
- "\r",
1566
- " v5r3-L6-D2048- 18%[==> ] 185.12M 8.79MB/s eta 2m 57s "
1567
- ]
1568
- },
1569
- {
1570
- "name": "stdout",
1571
- "output_type": "stream",
1572
- "text": [
1573
- "\r",
1574
- " v5r3-L6-D2048-E 18%[==> ] 187.65M 9.09MB/s eta 2m 57s "
1575
- ]
1576
- },
1577
- {
1578
- "name": "stdout",
1579
- "output_type": "stream",
1580
- "text": [
1581
- "\r",
1582
- " v5r3-L6-D2048-E0 18%[==> ] 190.23M 9.09MB/s eta 2m 57s "
1583
- ]
1584
- },
1585
- {
1586
- "name": "stdout",
1587
- "output_type": "stream",
1588
- "text": [
1589
- "\r",
1590
- " v5r3-L6-D2048-E0_ 18%[==> ] 192.89M 9.54MB/s eta 2m 48s "
1591
- ]
1592
- },
1593
- {
1594
- "name": "stdout",
1595
- "output_type": "stream",
1596
- "text": [
1597
- "\r",
1598
- " v5r3-L6-D2048-E0_1 19%[==> ] 195.59M 9.55MB/s eta 2m 48s "
1599
- ]
1600
- },
1601
- {
1602
- "name": "stdout",
1603
- "output_type": "stream",
1604
- "text": [
1605
- "\r",
1606
- "v5r3-L6-D2048-E0_1- 19%[==> ] 198.34M 10.2MB/s eta 2m 48s "
1607
- ]
1608
- },
1609
- {
1610
- "name": "stdout",
1611
- "output_type": "stream",
1612
- "text": [
1613
- "\r",
1614
- "5r3-L6-D2048-E0_1-m 19%[==> ] 201.22M 10.1MB/s eta 2m 48s "
1615
- ]
1616
- },
1617
- {
1618
- "name": "stdout",
1619
- "output_type": "stream",
1620
- "text": [
1621
- "\r",
1622
- "r3-L6-D2048-E0_1-me 20%[===> ] 204.00M 10.3MB/s eta 2m 48s "
1623
- ]
1624
- },
1625
- {
1626
- "name": "stdout",
1627
- "output_type": "stream",
1628
- "text": [
1629
- "\r",
1630
- "3-L6-D2048-E0_1-mem 20%[===> ] 207.01M 10.3MB/s eta 2m 39s "
1631
- ]
1632
- },
1633
- {
1634
- "name": "stdout",
1635
- "output_type": "stream",
1636
- "text": [
1637
- "\r",
1638
- "-L6-D2048-E0_1-mem- 20%[===> ] 210.11M 11.0MB/s eta 2m 39s "
1639
- ]
1640
- },
1641
- {
1642
- "name": "stdout",
1643
- "output_type": "stream",
1644
- "text": [
1645
- "\r",
1646
- "L6-D2048-E0_1-mem-c 20%[===> ] 213.29M 11.0MB/s eta 2m 39s "
1647
- ]
1648
- },
1649
- {
1650
- "name": "stdout",
1651
- "output_type": "stream",
1652
- "text": [
1653
- "\r",
1654
- "6-D2048-E0_1-mem-ct 21%[===> ] 216.56M 11.1MB/s eta 2m 39s "
1655
- ]
1656
- },
1657
- {
1658
- "name": "stdout",
1659
- "output_type": "stream",
1660
- "text": [
1661
- "\r",
1662
- "-D2048-E0_1-mem-ctx 21%[===> ] 219.92M 11.8MB/s eta 2m 39s "
1663
- ]
1664
- },
1665
- {
1666
- "name": "stdout",
1667
- "output_type": "stream",
1668
- "text": [
1669
- "\r",
1670
- "D2048-E0_1-mem-ctx- 21%[===> ] 222.69M 11.6MB/s eta 2m 29s "
1671
- ]
1672
- },
1673
- {
1674
- "name": "stdout",
1675
- "output_type": "stream",
1676
- "text": [
1677
- "\r",
1678
- "2048-E0_1-mem-ctx-5 22%[===> ] 226.00M 11.6MB/s eta 2m 29s "
1679
- ]
1680
- },
1681
- {
1682
- "name": "stdout",
1683
- "output_type": "stream",
1684
- "text": [
1685
- "\r",
1686
- "048-E0_1-mem-ctx-51 22%[===> ] 229.44M 12.3MB/s eta 2m 29s "
1687
- ]
1688
- },
1689
- {
1690
- "name": "stdout",
1691
- "output_type": "stream",
1692
- "text": [
1693
- "\r",
1694
- "48-E0_1-mem-ctx-512 22%[===> ] 232.97M 12.3MB/s eta 2m 29s "
1695
- ]
1696
- },
1697
- {
1698
- "name": "stdout",
1699
- "output_type": "stream",
1700
- "text": [
1701
- "\r",
1702
- "8-E0_1-mem-ctx-512. 23%[===> ] 236.69M 12.3MB/s eta 2m 29s "
1703
- ]
1704
- },
1705
- {
1706
- "name": "stdout",
1707
- "output_type": "stream",
1708
- "text": [
1709
- "\r",
1710
- "-E0_1-mem-ctx-512.p 23%[===> ] 240.34M 13.0MB/s eta 2m 19s "
1711
- ]
1712
- },
1713
- {
1714
- "name": "stdout",
1715
- "output_type": "stream",
1716
- "text": [
1717
- "\r",
1718
- "E0_1-mem-ctx-512.pt 23%[===> ] 244.11M 13.0MB/s eta 2m 19s "
1719
- ]
1720
- },
1721
- {
1722
- "name": "stdout",
1723
- "output_type": "stream",
1724
- "text": [
1725
- "\r",
1726
- "0_1-mem-ctx-512.pth 24%[===> ] 247.87M 13.1MB/s eta 2m 19s "
1727
- ]
1728
- },
1729
- {
1730
- "name": "stdout",
1731
- "output_type": "stream",
1732
- "text": [
1733
- "\r",
1734
- "_1-mem-ctx-512.pth 24%[===> ] 251.87M 13.7MB/s eta 2m 19s "
1735
- ]
1736
- },
1737
- {
1738
- "name": "stdout",
1739
- "output_type": "stream",
1740
- "text": [
1741
- "\r",
1742
- "1-mem-ctx-512.pth 25%[====> ] 255.64M 13.7MB/s eta 2m 19s "
1743
- ]
1744
- },
1745
- {
1746
- "name": "stdout",
1747
- "output_type": "stream",
1748
- "text": [
1749
- "\r",
1750
- "-mem-ctx-512.pth 25%[====> ] 259.62M 13.7MB/s eta 2m 9s "
1751
- ]
1752
- },
1753
- {
1754
- "name": "stdout",
1755
- "output_type": "stream",
1756
- "text": [
1757
- "\r",
1758
- "mem-ctx-512.pth 25%[====> ] 263.37M 14.3MB/s eta 2m 9s "
1759
- ]
1760
- },
1761
- {
1762
- "name": "stdout",
1763
- "output_type": "stream",
1764
- "text": [
1765
- "\r",
1766
- "em-ctx-512.pth 26%[====> ] 267.14M 14.2MB/s eta 2m 9s "
1767
- ]
1768
- },
1769
- {
1770
- "name": "stdout",
1771
- "output_type": "stream",
1772
- "text": [
1773
- "\r",
1774
- "m-ctx-512.pth 26%[====> ] 271.37M 14.3MB/s eta 2m 9s "
1775
- ]
1776
- },
1777
- {
1778
- "name": "stdout",
1779
- "output_type": "stream",
1780
- "text": [
1781
- "\r",
1782
- "-ctx-512.pth 27%[====> ] 275.03M 14.2MB/s eta 2m 2s "
1783
- ]
1784
- },
1785
- {
1786
- "name": "stdout",
1787
- "output_type": "stream",
1788
- "text": [
1789
- "\r",
1790
- "ctx-512.pth 27%[====> ] 279.12M 14.4MB/s eta 2m 2s "
1791
- ]
1792
- },
1793
- {
1794
- "name": "stdout",
1795
- "output_type": "stream",
1796
- "text": [
1797
- "\r",
1798
- "tx-512.pth 27%[====> ] 282.20M 14.4MB/s eta 2m 2s "
1799
- ]
1800
- },
1801
- {
1802
- "name": "stdout",
1803
- "output_type": "stream",
1804
- "text": [
1805
- "\r",
1806
- "x-512.pth 28%[====> ] 285.34M 14.4MB/s eta 2m 2s "
1807
- ]
1808
- },
1809
- {
1810
- "name": "stdout",
1811
- "output_type": "stream",
1812
- "text": [
1813
- "\r",
1814
- "-512.pth 28%[====> ] 288.56M 14.4MB/s eta 2m 2s "
1815
- ]
1816
- },
1817
- {
1818
- "name": "stdout",
1819
- "output_type": "stream",
1820
- "text": [
1821
- "\r",
1822
- "512.pth 28%[====> ] 291.84M 14.4MB/s eta 1m 56s "
1823
- ]
1824
- },
1825
- {
1826
- "name": "stdout",
1827
- "output_type": "stream",
1828
- "text": [
1829
- "\r",
1830
- "12.pth 29%[====> ] 295.17M 14.5MB/s eta 1m 56s "
1831
- ]
1832
- },
1833
- {
1834
- "name": "stdout",
1835
- "output_type": "stream",
1836
- "text": [
1837
- "\r",
1838
- "2.pth 29%[====> ] 297.29M 14.0MB/s eta 1m 56s "
1839
- ]
1840
- },
1841
- {
1842
- "name": "stdout",
1843
- "output_type": "stream",
1844
- "text": [
1845
- "\r",
1846
- ".pth 29%[====> ] 301.00M 14.1MB/s eta 1m 56s "
1847
- ]
1848
- },
1849
- {
1850
- "name": "stdout",
1851
- "output_type": "stream",
1852
- "text": [
1853
- "\r",
1854
- "pth 29%[====> ] 303.42M 14.0MB/s eta 1m 52s "
1855
- ]
1856
- },
1857
- {
1858
- "name": "stdout",
1859
- "output_type": "stream",
1860
- "text": [
1861
- "\r",
1862
- "th 29%[====> ] 303.50M 13.4MB/s eta 1m 52s "
1863
- ]
1864
- },
1865
- {
1866
- "name": "stdout",
1867
- "output_type": "stream",
1868
- "text": [
1869
- "\r",
1870
- "h 30%[=====> ] 305.98M 13.1MB/s eta 1m 52s "
1871
- ]
1872
- },
1873
- {
1874
- "name": "stdout",
1875
- "output_type": "stream",
1876
- "text": [
1877
- "\r",
1878
- " 30%[=====> ] 308.53M 12.9MB/s eta 1m 52s "
1879
- ]
1880
- },
1881
- {
1882
- "name": "stdout",
1883
- "output_type": "stream",
1884
- "text": [
1885
- "\r",
1886
- " v 30%[=====> ] 311.09M 12.7MB/s eta 1m 52s "
1887
- ]
1888
- },
1889
- {
1890
- "name": "stdout",
1891
- "output_type": "stream",
1892
- "text": [
1893
- "\r",
1894
- " v5 30%[=====> ] 313.67M 12.4MB/s eta 1m 49s "
1895
- ]
1896
- },
1897
- {
1898
- "name": "stdout",
1899
- "output_type": "stream",
1900
- "text": [
1901
- "\r",
1902
- " v5r 31%[=====> ] 316.28M 12.2MB/s eta 1m 49s "
1903
- ]
1904
- },
1905
- {
1906
- "name": "stdout",
1907
- "output_type": "stream",
1908
- "text": [
1909
- "\r",
1910
- " v5r3 31%[=====> ] 318.94M 11.9MB/s eta 1m 49s "
1911
- ]
1912
- },
1913
- {
1914
- "name": "stdout",
1915
- "output_type": "stream",
1916
- "text": [
1917
- "\r",
1918
- " v5r3- 31%[=====> ] 321.59M 11.7MB/s eta 1m 49s "
1919
- ]
1920
- },
1921
- {
1922
- "name": "stdout",
1923
- "output_type": "stream",
1924
- "text": [
1925
- "\r",
1926
- " v5r3-L 31%[=====> ] 324.28M 11.5MB/s eta 1m 49s "
1927
- ]
1928
- },
1929
- {
1930
- "name": "stdout",
1931
- "output_type": "stream",
1932
- "text": [
1933
- "\r",
1934
- " v5r3-L6 32%[=====> ] 326.95M 11.2MB/s eta 1m 45s "
1935
- ]
1936
- },
1937
- {
1938
- "name": "stdout",
1939
- "output_type": "stream",
1940
- "text": [
1941
- "\r",
1942
- " v5r3-L6- 32%[=====> ] 329.69M 11.2MB/s eta 1m 45s "
1943
- ]
1944
- },
1945
- {
1946
- "name": "stdout",
1947
- "output_type": "stream",
1948
- "text": [
1949
- "\r",
1950
- " v5r3-L6-D 32%[=====> ] 332.44M 11.2MB/s eta 1m 45s "
1951
- ]
1952
- },
1953
- {
1954
- "name": "stdout",
1955
- "output_type": "stream",
1956
- "text": [
1957
- "\r",
1958
- " v5r3-L6-D2 32%[=====> ] 335.20M 10.9MB/s eta 1m 45s "
1959
- ]
1960
- },
1961
- {
1962
- "name": "stdout",
1963
- "output_type": "stream",
1964
- "text": [
1965
- "\r",
1966
- " v5r3-L6-D20 33%[=====> ] 337.97M 10.8MB/s eta 1m 45s "
1967
- ]
1968
- },
1969
- {
1970
- "name": "stdout",
1971
- "output_type": "stream",
1972
- "text": [
1973
- "\r",
1974
- " v5r3-L6-D204 33%[=====> ] 340.76M 10.7MB/s eta 1m 41s "
1975
- ]
1976
- },
1977
- {
1978
- "name": "stdout",
1979
- "output_type": "stream",
1980
- "text": [
1981
- "\r",
1982
- " v5r3-L6-D2048 33%[=====> ] 343.59M 11.1MB/s eta 1m 41s "
1983
- ]
1984
- },
1985
- {
1986
- "name": "stdout",
1987
- "output_type": "stream",
1988
- "text": [
1989
- "\r",
1990
- " v5r3-L6-D2048- 34%[=====> ] 346.44M 11.2MB/s eta 1m 41s "
1991
- ]
1992
- },
1993
- {
1994
- "name": "stdout",
1995
- "output_type": "stream",
1996
- "text": [
1997
- "\r",
1998
- " v5r3-L6-D2048-E 34%[=====> ] 349.26M 11.3MB/s eta 1m 41s "
1999
- ]
2000
- },
2001
- {
2002
- "name": "stdout",
2003
- "output_type": "stream",
2004
- "text": [
2005
- "\r",
2006
- " v5r3-L6-D2048-E0 34%[=====> ] 352.09M 11.4MB/s eta 1m 41s "
2007
- ]
2008
- },
2009
- {
2010
- "name": "stdout",
2011
- "output_type": "stream",
2012
- "text": [
2013
- "\r",
2014
- " v5r3-L6-D2048-E0_ 34%[=====> ] 354.86M 11.6MB/s eta 97s "
2015
- ]
2016
- },
2017
- {
2018
- "name": "stdout",
2019
- "output_type": "stream",
2020
- "text": [
2021
- "\r",
2022
- " v5r3-L6-D2048-E0_1 35%[======> ] 357.48M 11.6MB/s eta 97s "
2023
- ]
2024
- },
2025
- {
2026
- "name": "stdout",
2027
- "output_type": "stream",
2028
- "text": [
2029
- "\r",
2030
- "v5r3-L6-D2048-E0_1- 35%[======> ] 360.34M 11.7MB/s eta 97s "
2031
- ]
2032
- },
2033
- {
2034
- "name": "stdout",
2035
- "output_type": "stream",
2036
- "text": [
2037
- "\r",
2038
- "5r3-L6-D2048-E0_1-m 35%[======> ] 363.25M 11.8MB/s eta 97s "
2039
- ]
2040
- },
2041
- {
2042
- "name": "stdout",
2043
- "output_type": "stream",
2044
- "text": [
2045
- "\r",
2046
- "r3-L6-D2048-E0_1-me 35%[======> ] 366.14M 11.9MB/s eta 97s "
2047
- ]
2048
- },
2049
- {
2050
- "name": "stdout",
2051
- "output_type": "stream",
2052
- "text": [
2053
- "\r",
2054
- "3-L6-D2048-E0_1-mem 36%[======> ] 369.04M 11.9MB/s eta 94s "
2055
- ]
2056
- },
2057
- {
2058
- "name": "stdout",
2059
- "output_type": "stream",
2060
- "text": [
2061
- "\r",
2062
- "-L6-D2048-E0_1-mem- 36%[======> ] 371.97M 12.0MB/s eta 94s "
2063
- ]
2064
- },
2065
- {
2066
- "name": "stdout",
2067
- "output_type": "stream",
2068
- "text": [
2069
- "\r",
2070
- "L6-D2048-E0_1-mem-c 36%[======> ] 374.89M 11.9MB/s eta 94s "
2071
- ]
2072
- },
2073
- {
2074
- "name": "stdout",
2075
- "output_type": "stream",
2076
- "text": [
2077
- "\r",
2078
- "6-D2048-E0_1-mem-ct 37%[======> ] 377.72M 11.4MB/s eta 94s "
2079
- ]
2080
- },
2081
- {
2082
- "name": "stdout",
2083
- "output_type": "stream",
2084
- "text": [
2085
- "\r",
2086
- "-D2048-E0_1-mem-ctx 37%[======> ] 381.44M 11.8MB/s eta 91s "
2087
- ]
2088
- },
2089
- {
2090
- "name": "stdout",
2091
- "output_type": "stream",
2092
- "text": [
2093
- "\r",
2094
- "D2048-E0_1-mem-ctx- 37%[======> ] 383.50M 11.6MB/s eta 91s "
2095
- ]
2096
- },
2097
- {
2098
- "name": "stdout",
2099
- "output_type": "stream",
2100
- "text": [
2101
- "\r",
2102
- "2048-E0_1-mem-ctx-5 37%[======> ] 385.61M 11.3MB/s eta 91s "
2103
- ]
2104
- },
2105
- {
2106
- "name": "stdout",
2107
- "output_type": "stream",
2108
- "text": [
2109
- "\r",
2110
- "048-E0_1-mem-ctx-51 38%[======> ] 387.75M 11.4MB/s eta 91s "
2111
- ]
2112
- },
2113
- {
2114
- "name": "stdout",
2115
- "output_type": "stream",
2116
- "text": [
2117
- "\r",
2118
- "48-E0_1-mem-ctx-512 38%[======> ] 389.92M 11.3MB/s eta 91s "
2119
- ]
2120
- },
2121
- {
2122
- "name": "stdout",
2123
- "output_type": "stream",
2124
- "text": [
2125
- "\r",
2126
- "8-E0_1-mem-ctx-512. 38%[======> ] 392.11M 11.2MB/s eta 89s "
2127
- ]
2128
- },
2129
- {
2130
- "name": "stdout",
2131
- "output_type": "stream",
2132
- "text": [
2133
- "\r",
2134
- "-E0_1-mem-ctx-512.p 38%[======> ] 394.33M 11.0MB/s eta 89s "
2135
- ]
2136
- },
2137
- {
2138
- "name": "stdout",
2139
- "output_type": "stream",
2140
- "text": [
2141
- "\r",
2142
- "E0_1-mem-ctx-512.pt 38%[======> ] 396.56M 10.9MB/s eta 89s "
2143
- ]
2144
- },
2145
- {
2146
- "name": "stdout",
2147
- "output_type": "stream",
2148
- "text": [
2149
- "\r",
2150
- "0_1-mem-ctx-512.pth 39%[======> ] 398.83M 10.8MB/s eta 89s "
2151
- ]
2152
- },
2153
- {
2154
- "name": "stdout",
2155
- "output_type": "stream",
2156
- "text": [
2157
- "\r",
2158
- "_1-mem-ctx-512.pth 39%[======> ] 401.11M 10.7MB/s eta 89s "
2159
- ]
2160
- },
2161
- {
2162
- "name": "stdout",
2163
- "output_type": "stream",
2164
- "text": [
2165
- "\r",
2166
- "1-mem-ctx-512.pth 39%[======> ] 403.42M 10.6MB/s eta 86s "
2167
- ]
2168
- },
2169
- {
2170
- "name": "stdout",
2171
- "output_type": "stream",
2172
- "text": [
2173
- "\r",
2174
- "-mem-ctx-512.pth 39%[======> ] 405.75M 10.4MB/s eta 86s "
2175
- ]
2176
- },
2177
- {
2178
- "name": "stdout",
2179
- "output_type": "stream",
2180
- "text": [
2181
- "\r",
2182
- "mem-ctx-512.pth 40%[=======> ] 408.08M 10.2MB/s eta 86s "
2183
- ]
2184
- },
2185
- {
2186
- "name": "stdout",
2187
- "output_type": "stream",
2188
- "text": [
2189
- "\r",
2190
- "em-ctx-512.pth 40%[=======> ] 410.44M 10.1MB/s eta 86s "
2191
- ]
2192
- },
2193
- {
2194
- "name": "stdout",
2195
- "output_type": "stream",
2196
- "text": [
2197
- "\r",
2198
- "m-ctx-512.pth 40%[=======> ] 412.81M 9.85MB/s eta 86s "
2199
- ]
2200
- },
2201
- {
2202
- "name": "stdout",
2203
- "output_type": "stream",
2204
- "text": [
2205
- "\r",
2206
- "-ctx-512.pth 40%[=======> ] 415.19M 9.71MB/s eta 84s "
2207
- ]
2208
- },
2209
- {
2210
- "name": "stdout",
2211
- "output_type": "stream",
2212
- "text": [
2213
- "\r",
2214
- "ctx-512.pth 41%[=======> ] 417.58M 9.94MB/s eta 84s "
2215
- ]
2216
- },
2217
- {
2218
- "name": "stdout",
2219
- "output_type": "stream",
2220
- "text": [
2221
- "\r",
2222
- "tx-512.pth 41%[=======> ] 419.97M 9.82MB/s eta 84s "
2223
- ]
2224
- },
2225
- {
2226
- "name": "stdout",
2227
- "output_type": "stream",
2228
- "text": [
2229
- "\r",
2230
- "x-512.pth 41%[=======> ] 422.39M 9.92MB/s eta 84s "
2231
- ]
2232
- }
2233
- ],
2234
- "source": [
2235
- "# Download the model directly (stop gap till HF sync issues is resolved)\n",
2236
- "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
2237
- " wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-mem-ctx-512.pth\"\n",
2238
- "\n",
2239
- "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
2240
- " ls -alh ."
2241
- ]
2242
- },
2243
- {
2244
- "cell_type": "markdown",
2245
- "id": "d02fd021",
2246
- "metadata": {
2247
- "papermill": {
2248
- "duration": null,
2249
- "end_time": null,
2250
- "exception": null,
2251
- "start_time": null,
2252
- "status": "pending"
2253
- },
2254
- "tags": []
2255
- },
2256
- "source": [
2257
- "## Tune 3 : Ramping up the ctx size (8192), memory training\n",
2258
- "\n",
2259
- "- Tune 3: Large ctx size (8192), Scaling up!"
2260
- ]
2261
- },
2262
- {
2263
- "cell_type": "code",
2264
- "execution_count": null,
2265
- "id": "535fef69",
2266
- "metadata": {
2267
- "papermill": {
2268
- "duration": null,
2269
- "end_time": null,
2270
- "exception": null,
2271
- "start_time": null,
2272
- "status": "pending"
2273
- },
2274
- "tags": []
2275
- },
2276
- "outputs": [],
2277
- "source": [
2278
- "%%script bash\n",
2279
- "\n",
2280
- "########################################\n",
2281
- "# Generate the required jsonl dataset\n",
2282
- "########################################\n",
2283
- "\n",
2284
- "# Go to config dir\n",
2285
- "cd \"../\"\n",
2286
- "\n",
2287
- "# Reset the dataset dir\n",
2288
- "mkdir -p ../dataset\n",
2289
- "rm -rf ../dataset/*.jsonl\n",
2290
- "\n",
2291
- "# Generate the various datasets\n",
2292
- "echo \"## Generating word reptition dataset ##\"\n",
2293
- "\n",
2294
- "#\n",
2295
- "# We reduce the training set for < 50 words - and shift the focus upwards\n",
2296
- "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
2297
- "#\n",
2298
- "for i in {5..95..5} \n",
2299
- "do\n",
2300
- " python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n",
2301
- " python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 10 & \n",
2302
- "done\n",
2303
- "\n",
2304
- "#\n",
2305
- "# Ramping up the 100+ - 4200 words dataset\n",
2306
- "# \n",
2307
- "for i in {100..8000..100} \n",
2308
- "do\n",
2309
- " python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 2000 & \n",
2310
- " python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
2311
- "done\n",
2312
- "\n",
2313
- "wait\n",
2314
- "echo \"## Done ##\"\n",
2315
- "\n",
2316
- "ls -lh ../dataset/"
2317
- ]
2318
- },
2319
- {
2320
- "cell_type": "code",
2321
- "execution_count": null,
2322
- "id": "40beda36",
2323
- "metadata": {
2324
- "papermill": {
2325
- "duration": null,
2326
- "end_time": null,
2327
- "exception": null,
2328
- "start_time": null,
2329
- "status": "pending"
2330
- },
2331
- "tags": []
2332
- },
2333
- "outputs": [],
2334
- "source": [
2335
- "# Start the finetune model training\n",
2336
- "!cd \"{TRAINER_DIR}\" && \\\n",
2337
- " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
2338
- " python3 lightning_trainer.py fit \\\n",
2339
- " -c \"{CONFIG_DIR}/config-mem-template.yaml\" \\\n",
2340
- " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-8k (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
2341
- " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
2342
- " --trainer.devices=\"{GPU_DEVICES}\" \\\n",
2343
- " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/\" \\\n",
2344
- " --model.lr_init=4e-4 \\\n",
2345
- " --model.lr_final=2e-4 \\\n",
2346
- " --data.max_token_size=8192 \\\n",
2347
- " --data.sort_by_length=True \\\n",
2348
- " --model.ctx_len=4096 \\\n",
2349
- " --model.bptt_learning_range=2 \\\n",
2350
- " --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-512.pth\""
2351
- ]
2352
- },
2353
- {
2354
- "cell_type": "code",
2355
- "execution_count": null,
2356
- "id": "19eaf20f",
2357
- "metadata": {
2358
- "papermill": {
2359
- "duration": null,
2360
- "end_time": null,
2361
- "exception": null,
2362
- "start_time": null,
2363
- "status": "pending"
2364
- },
2365
- "tags": []
2366
- },
2367
- "outputs": [],
2368
- "source": [
2369
- "# Lets export the model from the checkpoint\n",
2370
- "!cd \"{TRAINER_DIR}\" && \\\n",
2371
- " python3 export_checkpoint.py \\\n",
2372
- " \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/last.ckpt\" \\\n",
2373
- " \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"bf16\"\n",
2374
- "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
2375
- ]
2376
- },
2377
- {
2378
- "cell_type": "code",
2379
- "execution_count": null,
2380
- "id": "e9be9d4f",
2381
- "metadata": {
2382
- "papermill": {
2383
- "duration": null,
2384
- "end_time": null,
2385
- "exception": null,
2386
- "start_time": null,
2387
- "status": "pending"
2388
- },
2389
- "tags": []
2390
- },
2391
- "outputs": [],
2392
- "source": [
2393
- "# Lets do a quick memory test\n",
2394
- "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
2395
- ]
2396
- },
2397
- {
2398
- "cell_type": "code",
2399
- "execution_count": null,
2400
- "id": "1449d8ff",
2401
- "metadata": {
2402
- "papermill": {
2403
- "duration": null,
2404
- "end_time": null,
2405
- "exception": null,
2406
- "start_time": null,
2407
- "status": "pending"
2408
- },
2409
- "tags": []
2410
- },
2411
- "outputs": [],
2412
- "source": [
2413
- "!python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"none\" 1000 4000"
2414
- ]
2415
- }
2416
- ],
2417
- "metadata": {
2418
- "kernelspec": {
2419
- "display_name": "python3 3 (ipykernel)",
2420
- "language": "python",
2421
- "name": "python3"
2422
- },
2423
- "language_info": {
2424
- "codemirror_mode": {
2425
- "name": "ipython",
2426
- "version": 3
2427
- },
2428
- "file_extension": ".py",
2429
- "mimetype": "text/x-python",
2430
- "name": "python",
2431
- "nbconvert_exporter": "python",
2432
- "pygments_lexer": "ipython3",
2433
- "version": "3.11.4"
2434
- },
2435
- "papermill": {
2436
- "default_parameters": {},
2437
- "duration": null,
2438
- "end_time": null,
2439
- "environment_variables": {},
2440
- "exception": null,
2441
- "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb",
2442
- "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage3.ipynb",
2443
- "parameters": {},
2444
- "start_time": "2023-09-14T04:09:41.695596",
2445
- "version": "2.4.0"
2446
- }
2447
- },
2448
- "nbformat": 4,
2449
- "nbformat_minor": 5
2450
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b01a0b009aa135d08ecf6442863da28f6a1854d6c1c4158ff255380ad9452944
3
+ size 40077989