picocreator
commited on
Commit
•
cc54ac1
1
Parent(s):
3ee4ae1
715a8b6edb467dc7d61ce79795e6fc7d3f73b519493e4f2d7c06cfcc7106186f
Browse files
experiment/rwkv-x-exp/v5-headsize2x/hello-world.ipynb
CHANGED
@@ -3,19 +3,19 @@
|
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
"execution_count": 1,
|
6 |
-
"id": "
|
7 |
"metadata": {
|
8 |
"execution": {
|
9 |
-
"iopub.execute_input": "2023-08-
|
10 |
-
"iopub.status.busy": "2023-08-
|
11 |
-
"iopub.status.idle": "2023-08-
|
12 |
-
"shell.execute_reply": "2023-08-
|
13 |
},
|
14 |
"papermill": {
|
15 |
-
"duration": 0.
|
16 |
-
"end_time": "2023-08-
|
17 |
"exception": false,
|
18 |
-
"start_time": "2023-08-
|
19 |
"status": "completed"
|
20 |
},
|
21 |
"tags": []
|
@@ -32,6 +32,123 @@
|
|
32 |
"source": [
|
33 |
"!echo \"Hello world\" # this file is for debugging the runner"
|
34 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
}
|
36 |
],
|
37 |
"metadata": {
|
@@ -49,14 +166,14 @@
|
|
49 |
},
|
50 |
"papermill": {
|
51 |
"default_parameters": {},
|
52 |
-
"duration": 1.
|
53 |
-
"end_time": "2023-08-
|
54 |
"environment_variables": {},
|
55 |
"exception": null,
|
56 |
"input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/hello-world.ipynb",
|
57 |
"output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-headsize2x/hello-world.ipynb",
|
58 |
"parameters": {},
|
59 |
-
"start_time": "2023-08-
|
60 |
"version": "2.4.0"
|
61 |
}
|
62 |
},
|
|
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
"execution_count": 1,
|
6 |
+
"id": "5a2bbd3c",
|
7 |
"metadata": {
|
8 |
"execution": {
|
9 |
+
"iopub.execute_input": "2023-08-24T04:44:25.040059Z",
|
10 |
+
"iopub.status.busy": "2023-08-24T04:44:25.039841Z",
|
11 |
+
"iopub.status.idle": "2023-08-24T04:44:25.284633Z",
|
12 |
+
"shell.execute_reply": "2023-08-24T04:44:25.283834Z"
|
13 |
},
|
14 |
"papermill": {
|
15 |
+
"duration": 0.248685,
|
16 |
+
"end_time": "2023-08-24T04:44:25.286190",
|
17 |
"exception": false,
|
18 |
+
"start_time": "2023-08-24T04:44:25.037505",
|
19 |
"status": "completed"
|
20 |
},
|
21 |
"tags": []
|
|
|
32 |
"source": [
|
33 |
"!echo \"Hello world\" # this file is for debugging the runner"
|
34 |
]
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"cell_type": "code",
|
38 |
+
"execution_count": 2,
|
39 |
+
"id": "5b3eecbc",
|
40 |
+
"metadata": {
|
41 |
+
"execution": {
|
42 |
+
"iopub.execute_input": "2023-08-24T04:44:25.289218Z",
|
43 |
+
"iopub.status.busy": "2023-08-24T04:44:25.289031Z",
|
44 |
+
"iopub.status.idle": "2023-08-24T04:44:25.295282Z",
|
45 |
+
"shell.execute_reply": "2023-08-24T04:44:25.294543Z"
|
46 |
+
},
|
47 |
+
"papermill": {
|
48 |
+
"duration": 0.009124,
|
49 |
+
"end_time": "2023-08-24T04:44:25.296411",
|
50 |
+
"exception": false,
|
51 |
+
"start_time": "2023-08-24T04:44:25.287287",
|
52 |
+
"status": "completed"
|
53 |
+
},
|
54 |
+
"tags": []
|
55 |
+
},
|
56 |
+
"outputs": [
|
57 |
+
{
|
58 |
+
"name": "stdout",
|
59 |
+
"output_type": "stream",
|
60 |
+
"text": [
|
61 |
+
"DEEPSPEED_STRAT: deepspeed_stage_2_offload\n",
|
62 |
+
"ENABLE_WANDB: True\n",
|
63 |
+
"GPU_DEVICES: auto\n",
|
64 |
+
"NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x\n",
|
65 |
+
"INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x\n",
|
66 |
+
"TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x\n",
|
67 |
+
"PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer\n"
|
68 |
+
]
|
69 |
+
}
|
70 |
+
],
|
71 |
+
"source": [
|
72 |
+
"DEEPSPEED_STRAT=\"deepspeed_stage_2_offload\"\n",
|
73 |
+
"GPU_DEVICES=\"auto\"\n",
|
74 |
+
"ENABLE_WANDB=True\n",
|
75 |
+
"\n",
|
76 |
+
"RWKV_WAVENET_LAYERS=1\n",
|
77 |
+
"\n",
|
78 |
+
"EMBED_SCALE=0.1\n",
|
79 |
+
"EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n",
|
80 |
+
"\n",
|
81 |
+
"LAYER_COUNT=6\n",
|
82 |
+
"EMBED_DIM=4096\n",
|
83 |
+
"\n",
|
84 |
+
"WANDB_PREFIX=f\"v5-hs2x-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n",
|
85 |
+
"FILENAME_PREFIX=f\"v5-hs2x-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n",
|
86 |
+
"\n",
|
87 |
+
"print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n",
|
88 |
+
"print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n",
|
89 |
+
"print(\"GPU_DEVICES:\", GPU_DEVICES)\n",
|
90 |
+
"\n",
|
91 |
+
"if ENABLE_WANDB:\n",
|
92 |
+
" WANDB_MODE=\"online\"\n",
|
93 |
+
"else:\n",
|
94 |
+
" WANDB_MODE=\"disabled\"\n",
|
95 |
+
"\n",
|
96 |
+
"# Computing the notebook, and various paths\n",
|
97 |
+
"import os\n",
|
98 |
+
"NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n",
|
99 |
+
"PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n",
|
100 |
+
"TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5headsize2x/\"))\n",
|
101 |
+
"INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5headsize2x/\"))\n",
|
102 |
+
"\n",
|
103 |
+
"print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n",
|
104 |
+
"print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n",
|
105 |
+
"print(\"TRAINER_DIR:\", TRAINER_DIR)\n",
|
106 |
+
"print(\"PROJECT_DIR:\", PROJECT_DIR)"
|
107 |
+
]
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"cell_type": "code",
|
111 |
+
"execution_count": 3,
|
112 |
+
"id": "39c31880",
|
113 |
+
"metadata": {
|
114 |
+
"execution": {
|
115 |
+
"iopub.execute_input": "2023-08-24T04:44:25.299318Z",
|
116 |
+
"iopub.status.busy": "2023-08-24T04:44:25.299143Z",
|
117 |
+
"iopub.status.idle": "2023-08-24T04:44:25.772732Z",
|
118 |
+
"shell.execute_reply": "2023-08-24T04:44:25.771978Z"
|
119 |
+
},
|
120 |
+
"papermill": {
|
121 |
+
"duration": 0.476676,
|
122 |
+
"end_time": "2023-08-24T04:44:25.774132",
|
123 |
+
"exception": false,
|
124 |
+
"start_time": "2023-08-24T04:44:25.297456",
|
125 |
+
"status": "completed"
|
126 |
+
},
|
127 |
+
"tags": []
|
128 |
+
},
|
129 |
+
"outputs": [
|
130 |
+
{
|
131 |
+
"name": "stdout",
|
132 |
+
"output_type": "stream",
|
133 |
+
"text": [
|
134 |
+
"/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5headsize2x\r\n"
|
135 |
+
]
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"name": "stdout",
|
139 |
+
"output_type": "stream",
|
140 |
+
"text": [
|
141 |
+
"total 4.0K\r\n",
|
142 |
+
"drwxr-xr-x 2 root root 10 Aug 24 04:44 .\r\n",
|
143 |
+
"drwxr-xr-x 19 root root 4.0K Aug 24 04:44 ..\r\n"
|
144 |
+
]
|
145 |
+
}
|
146 |
+
],
|
147 |
+
"source": [
|
148 |
+
"# Get the current model listing\n",
|
149 |
+
"!cd \"{TRAINER_DIR}\" && pwd\n",
|
150 |
+
"!cd \"{TRAINER_DIR}\" && ls -alh \"../model/\""
|
151 |
+
]
|
152 |
}
|
153 |
],
|
154 |
"metadata": {
|
|
|
166 |
},
|
167 |
"papermill": {
|
168 |
"default_parameters": {},
|
169 |
+
"duration": 1.911028,
|
170 |
+
"end_time": "2023-08-24T04:44:25.992833",
|
171 |
"environment_variables": {},
|
172 |
"exception": null,
|
173 |
"input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-headsize2x/hello-world.ipynb",
|
174 |
"output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-headsize2x/hello-world.ipynb",
|
175 |
"parameters": {},
|
176 |
+
"start_time": "2023-08-24T04:44:24.081805",
|
177 |
"version": "2.4.0"
|
178 |
}
|
179 |
},
|