"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"96be26e86d7640f39dc7a381f9e8ac87","version_major":2,"version_minor":0},"text/plain":["Map: 0%| | 0/448 [00:00, ? examples/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"f61d0aae58d0414898a1543855aa90b5","version_major":2,"version_minor":0},"text/plain":["Map: 0%| | 0/56 [00:00, ? examples/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...\n"]},{"data":{"text/html":["\n"," \n"," \n","
\n"," [100/112 18:15 < 02:14, 0.09 it/s, Epoch 7/8]\n","
\n"," \n"," \n"," \n"," Step | \n"," Training Loss | \n"," Validation Loss | \n","
\n"," \n"," \n"," \n"," 10 | \n"," 2.140800 | \n"," 1.371807 | \n","
\n"," \n"," 20 | \n"," 1.088900 | \n"," 0.822470 | \n","
\n"," \n"," 30 | \n"," 0.782100 | \n"," 0.707285 | \n","
\n"," \n"," 40 | \n"," 0.611200 | \n"," 0.667052 | \n","
\n"," \n"," 50 | \n"," 0.544400 | \n"," 0.625149 | \n","
\n"," \n"," 60 | \n"," 0.487200 | \n"," 0.633841 | \n","
\n"," \n"," 70 | \n"," 0.497100 | \n"," 0.602714 | \n","
\n"," \n"," 80 | \n"," 0.455200 | \n"," 0.605597 | \n","
\n"," \n"," 90 | \n"," 0.440400 | \n"," 0.615718 | \n","
\n"," \n"," 100 | \n"," 0.440000 | \n"," 0.617107 | \n","
\n"," \n","
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [7/7 00:06]\n","
\n"," "],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\\r'), FloatProgress(value=1.0, max=1.0)))"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","Run history:
eval/loss | █▃▂▂▁▁▁▁▁▁▁ |
eval/runtime | ▁▅▇▇█▇▇█▇▇▇ |
eval/samples_per_second | █▄▂▂▁▂▂▁▂▂▂ |
eval/steps_per_second | █▅▂▂▁▂▂▁▂▂▃ |
eval_loss | ▁ |
train/epoch | ▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇████ |
train/global_step | ▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇█████ |
train/learning_rate | █▇▆▆▅▄▃▃▂▁ |
train/loss | █▄▂▂▁▁▁▁▁▁ |
train/total_flos | ▁ |
train/train_loss | ▁ |
train/train_runtime | ▁ |
train/train_samples_per_second | ▁ |
train/train_steps_per_second | ▁ |
Run summary:
eval/loss | 0.60271 |
eval/runtime | 7.0512 |
eval/samples_per_second | 7.942 |
eval/steps_per_second | 0.993 |
eval_loss | 0.60271 |
train/epoch | 7.14 |
train/global_step | 100 |
train/learning_rate | 4e-05 |
train/loss | 0.44 |
train/total_flos | 3478708041547776.0 |
train/train_loss | 0.74873 |
train/train_runtime | 1107.7796 |
train/train_samples_per_second | 3.235 |
train/train_steps_per_second | 0.101 |
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run abundant-moon-22 at: https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/wdbp5xpl
Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Find logs at: ./wandb/run-20240219_184243-wdbp5xpl/logs
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["[I 2024-02-19 19:01:36,909] Trial 0 finished with value: 0.6027135848999023 and parameters: {'learning_rate': 0.0004025111363668074, 'num_train_epochs': 8, 'per_device_train_batch_size': 32, 'warmup_steps': 3}. Best is trial 0 with value: 0.6027135848999023.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"8d7ed437bb30461db50fd93c2f619150","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.011112901077528175, max=1.0…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Tracking run with wandb version 0.16.3"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Run data is saved locally in /home/iot/ITI110/poc-playground/Final project/wandb/run-20240219_190136-zbvi8cl0
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Syncing run thriving-fireworks-23 to Weights & Biases (docs)
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View project at https://wandb.ai/szehanz/Education-Chatbot-Optimization"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run at https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/zbvi8cl0"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"76ef17e7b1bc40cfac453df2d5f9979b","version_major":2,"version_minor":0},"text/plain":["Map: 0%| | 0/448 [00:00, ? examples/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"af68d7f99f184edbb84f34b5f8bbe616","version_major":2,"version_minor":0},"text/plain":["Map: 0%| | 0/56 [00:00, ? examples/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [140/224 14:09 < 08:37, 0.16 it/s, Epoch 5/8]\n","
\n"," \n"," \n"," \n"," Step | \n"," Training Loss | \n"," Validation Loss | \n","
\n"," \n"," \n"," \n"," 10 | \n"," 2.350300 | \n"," 1.578861 | \n","
\n"," \n"," 20 | \n"," 1.226600 | \n"," 0.919854 | \n","
\n"," \n"," 30 | \n"," 1.003300 | \n"," 0.858959 | \n","
\n"," \n"," 40 | \n"," 0.736900 | \n"," 0.683791 | \n","
\n"," \n"," 50 | \n"," 0.639200 | \n"," 0.672153 | \n","
\n"," \n"," 60 | \n"," 0.574200 | \n"," 0.652091 | \n","
\n"," \n"," 70 | \n"," 0.548300 | \n"," 0.631915 | \n","
\n"," \n"," 80 | \n"," 0.529600 | \n"," 0.603841 | \n","
\n"," \n"," 90 | \n"," 0.479700 | \n"," 0.605569 | \n","
\n"," \n"," 100 | \n"," 0.497700 | \n"," 0.599202 | \n","
\n"," \n"," 110 | \n"," 0.484900 | \n"," 0.598301 | \n","
\n"," \n"," 120 | \n"," 0.489000 | \n"," 0.638032 | \n","
\n"," \n"," 130 | \n"," 0.435900 | \n"," 0.633226 | \n","
\n"," \n"," 140 | \n"," 0.482900 | \n"," 0.605198 | \n","
\n"," \n","
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [7/7 00:06]\n","
\n"," "],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='0.006 MB of 0.034 MB uploaded\\r'), FloatProgress(value=0.17041999774800135, max=1.…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","Run history:
eval/loss | █▃▃▂▂▁▁▁▁▁▁▁▁▁▁ |
eval/runtime | ▁▄▅▅▆▆█▆▆▇▆▆█▅▅ |
eval/samples_per_second | █▅▄▄▃▃▁▃▃▂▃▃▁▄▄ |
eval/steps_per_second | █▅▄▅▃▄▂▃▄▂▃▃▁▄▄ |
eval_loss | ▁ |
train/epoch | ▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇████ |
train/global_step | ▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇█████ |
train/learning_rate | █▇▇▆▆▅▅▄▄▃▃▂▂▁ |
train/loss | █▄▃▂▂▂▁▁▁▁▁▁▁▁ |
train/total_flos | ▁ |
train/train_loss | ▁ |
train/train_runtime | ▁ |
train/train_samples_per_second | ▁ |
train/train_steps_per_second | ▁ |
Run summary:
eval/loss | 0.5983 |
eval/runtime | 7.0249 |
eval/samples_per_second | 7.972 |
eval/steps_per_second | 0.996 |
eval_loss | 0.5983 |
train/epoch | 5.0 |
train/global_step | 140 |
train/learning_rate | 0.00012 |
train/loss | 0.4829 |
train/total_flos | 2170063307145216.0 |
train/train_loss | 0.74847 |
train/train_runtime | 856.9078 |
train/train_samples_per_second | 4.182 |
train/train_steps_per_second | 0.261 |
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run thriving-fireworks-23 at: https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/zbvi8cl0
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Find logs at: ./wandb/run-20240219_190136-zbvi8cl0/logs
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["[I 2024-02-19 19:16:12,165] Trial 1 finished with value: 0.5983005166053772 and parameters: {'learning_rate': 0.0003028497239265799, 'num_train_epochs': 8, 'per_device_train_batch_size': 16, 'warmup_steps': 5}. Best is trial 1 with value: 0.5983005166053772.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"532aab0877da4d6b86a40ad31df51c38","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.011113190833323945, max=1.0…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Tracking run with wandb version 0.16.3"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Run data is saved locally in /home/iot/ITI110/poc-playground/Final project/wandb/run-20240219_191612-pzfniief
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Syncing run dazzling-dragon-24 to Weights & Biases (docs)
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View project at https://wandb.ai/szehanz/Education-Chatbot-Optimization"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run at https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/pzfniief"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [110/168 11:08 < 05:59, 0.16 it/s, Epoch 3/6]\n","
\n"," \n"," \n"," \n"," Step | \n"," Training Loss | \n"," Validation Loss | \n","
\n"," \n"," \n"," \n"," 10 | \n"," 2.219100 | \n"," 1.546782 | \n","
\n"," \n"," 20 | \n"," 1.149900 | \n"," 0.904034 | \n","
\n"," \n"," 30 | \n"," 0.959700 | \n"," 0.810808 | \n","
\n"," \n"," 40 | \n"," 0.705400 | \n"," 0.680841 | \n","
\n"," \n"," 50 | \n"," 0.627000 | \n"," 0.652340 | \n","
\n"," \n"," 60 | \n"," 0.556300 | \n"," 0.626374 | \n","
\n"," \n"," 70 | \n"," 0.532900 | \n"," 0.629949 | \n","
\n"," \n"," 80 | \n"," 0.522900 | \n"," 0.589803 | \n","
\n"," \n"," 90 | \n"," 0.474900 | \n"," 0.596531 | \n","
\n"," \n"," 100 | \n"," 0.486900 | \n"," 0.592761 | \n","
\n"," \n"," 110 | \n"," 0.472700 | \n"," 0.596740 | \n","
\n"," \n","
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [7/7 00:06]\n","
\n"," "],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='0.034 MB of 0.034 MB uploaded\\r'), FloatProgress(value=1.0, max=1.0)))"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","Run history:
eval/loss | █▃▃▂▁▁▁▁▁▁▁▁ |
eval/runtime | ▁▆▇█▇▅▇▇▆▄▇▆ |
eval/samples_per_second | █▃▂▁▂▄▂▃▃▅▂▃ |
eval/steps_per_second | █▃▂▁▂▄▂▂▃▅▂▃ |
eval_loss | ▁ |
train/epoch | ▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▇▇▇▇████ |
train/global_step | ▁▁▂▂▂▂▃▃▄▄▅▅▅▅▆▆▇▇▇▇█████ |
train/learning_rate | █▇▇▆▅▄▄▃▂▂▁ |
train/loss | █▄▃▂▂▁▁▁▁▁▁ |
train/total_flos | ▁ |
train/train_loss | ▁ |
train/train_runtime | ▁ |
train/train_samples_per_second | ▁ |
train/train_steps_per_second | ▁ |
Run summary:
eval/loss | 0.5898 |
eval/runtime | 7.0266 |
eval/samples_per_second | 7.97 |
eval/steps_per_second | 0.996 |
eval_loss | 0.5898 |
train/epoch | 3.93 |
train/global_step | 110 |
train/learning_rate | 0.00014 |
train/loss | 0.4727 |
train/total_flos | 1713943443406848.0 |
train/train_loss | 0.79161 |
train/train_runtime | 675.7172 |
train/train_samples_per_second | 3.978 |
train/train_steps_per_second | 0.249 |
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run dazzling-dragon-24 at: https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/pzfniief
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Find logs at: ./wandb/run-20240219_191612-pzfniief/logs
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["[I 2024-02-19 19:27:45,486] Trial 2 finished with value: 0.5898026823997498 and parameters: {'learning_rate': 0.00040925708738231623, 'num_train_epochs': 6, 'per_device_train_batch_size': 16, 'warmup_steps': 4}. Best is trial 2 with value: 0.5898026823997498.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"edd5a710ce3c41a6a72989df0b9696ee","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.011113297299986395, max=1.0…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Tracking run with wandb version 0.16.3"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Run data is saved locally in /home/iot/ITI110/poc-playground/Final project/wandb/run-20240219_192745-srz53111
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Syncing run legendary-firecracker-25 to Weights & Biases (docs)
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View project at https://wandb.ai/szehanz/Education-Chatbot-Optimization"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run at https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/srz53111"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [112/112 20:27, Epoch 8/8]\n","
\n"," \n"," \n"," \n"," Step | \n"," Training Loss | \n"," Validation Loss | \n","
\n"," \n"," \n"," \n"," 10 | \n"," 2.328900 | \n"," 1.627960 | \n","
\n"," \n"," 20 | \n"," 1.385900 | \n"," 0.988623 | \n","
\n"," \n"," 30 | \n"," 0.905100 | \n"," 0.797488 | \n","
\n"," \n"," 40 | \n"," 0.736500 | \n"," 0.737958 | \n","
\n"," \n"," 50 | \n"," 0.611200 | \n"," 0.660954 | \n","
\n"," \n"," 60 | \n"," 0.535600 | \n"," 0.659686 | \n","
\n"," \n"," 70 | \n"," 0.545200 | \n"," 0.619212 | \n","
\n"," \n"," 80 | \n"," 0.487100 | \n"," 0.617715 | \n","
\n"," \n"," 90 | \n"," 0.468500 | \n"," 0.613847 | \n","
\n"," \n"," 100 | \n"," 0.459000 | \n"," 0.611387 | \n","
\n"," \n"," 110 | \n"," 0.441400 | \n"," 0.615416 | \n","
\n"," \n","
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [7/7 00:06]\n","
\n"," "],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='0.006 MB of 0.034 MB uploaded\\r'), FloatProgress(value=0.17041103603603602, max=1.…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","Run history:
eval/loss | █▄▂▂▁▁▁▁▁▁▁▁ |
eval/runtime | ▁▅▃▅▇▄▅▄▅█▆▂ |
eval/samples_per_second | █▄▆▃▂▅▃▅▄▁▃▇ |
eval/steps_per_second | █▄▅▄▂▅▄▅▄▁▃▆ |
eval_loss | ▁ |
train/epoch | ▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇████ |
train/global_step | ▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▆▆▇▇█████ |
train/learning_rate | █▇▇▆▅▄▄▃▂▂▁ |
train/loss | █▅▃▂▂▁▁▁▁▁▁ |
train/total_flos | ▁ |
train/train_loss | ▁ |
train/train_runtime | ▁ |
train/train_samples_per_second | ▁ |
train/train_steps_per_second | ▁ |
Run summary:
eval/loss | 0.61139 |
eval/runtime | 7.018 |
eval/samples_per_second | 7.98 |
eval/steps_per_second | 0.997 |
eval_loss | 0.61139 |
train/epoch | 8.0 |
train/global_step | 112 |
train/learning_rate | 0.0 |
train/loss | 0.4414 |
train/total_flos | 3887818337157120.0 |
train/train_loss | 0.80241 |
train/train_runtime | 1240.3127 |
train/train_samples_per_second | 2.89 |
train/train_steps_per_second | 0.09 |
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run legendary-firecracker-25 at: https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/srz53111
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Find logs at: ./wandb/run-20240219_192745-srz53111/logs
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["[I 2024-02-19 19:48:44,337] Trial 3 finished with value: 0.6113868951797485 and parameters: {'learning_rate': 0.00021804269178716187, 'num_train_epochs': 8, 'per_device_train_batch_size': 32, 'warmup_steps': 3}. Best is trial 2 with value: 0.5898026823997498.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"d19268ffc4064e02bd0a33b8b30bfc2b","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.011113139222207894, max=1.0…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Tracking run with wandb version 0.16.3"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Run data is saved locally in /home/iot/ITI110/poc-playground/Final project/wandb/run-20240219_194844-o101oodx
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Syncing run abundant-wonton-26 to Weights & Biases (docs)
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View project at https://wandb.ai/szehanz/Education-Chatbot-Optimization"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run at https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/o101oodx"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [140/224 14:08 < 08:36, 0.16 it/s, Epoch 5/8]\n","
\n"," \n"," \n"," \n"," Step | \n"," Training Loss | \n"," Validation Loss | \n","
\n"," \n"," \n"," \n"," 10 | \n"," 2.310500 | \n"," 1.594241 | \n","
\n"," \n"," 20 | \n"," 1.252100 | \n"," 0.910354 | \n","
\n"," \n"," 30 | \n"," 1.011000 | \n"," 0.863987 | \n","
\n"," \n"," 40 | \n"," 0.743100 | \n"," 0.698877 | \n","
\n"," \n"," 50 | \n"," 0.647100 | \n"," 0.673689 | \n","
\n"," \n"," 60 | \n"," 0.574900 | \n"," 0.651653 | \n","
\n"," \n"," 70 | \n"," 0.551900 | \n"," 0.632135 | \n","
\n"," \n"," 80 | \n"," 0.529200 | \n"," 0.602036 | \n","
\n"," \n"," 90 | \n"," 0.482000 | \n"," 0.604638 | \n","
\n"," \n"," 100 | \n"," 0.498900 | \n"," 0.598188 | \n","
\n"," \n"," 110 | \n"," 0.487000 | \n"," 0.596326 | \n","
\n"," \n"," 120 | \n"," 0.489100 | \n"," 0.638259 | \n","
\n"," \n"," 130 | \n"," 0.436100 | \n"," 0.628063 | \n","
\n"," \n"," 140 | \n"," 0.482500 | \n"," 0.601444 | \n","
\n"," \n","
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [7/7 00:06]\n","
\n"," "],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='0.006 MB of 0.034 MB uploaded\\r'), FloatProgress(value=0.17057929403816924, max=1.…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","Run history:
eval/loss | █▃▃▂▂▁▁▁▁▁▁▁▁▁▁ |
eval/runtime | ▁▄▄▅▅▇█▆▅▇▆▅▆▅▂ |
eval/samples_per_second | █▅▅▄▄▂▁▃▄▂▃▄▃▄▇ |
eval/steps_per_second | █▅▅▄▄▂▁▂▄▂▃▄▃▄▇ |
eval_loss | ▁ |
train/epoch | ▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇████ |
train/global_step | ▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇█████ |
train/learning_rate | █▇▇▆▆▅▅▄▄▃▃▂▂▁ |
train/loss | █▄▃▂▂▂▁▁▁▁▁▁▁▁ |
train/total_flos | ▁ |
train/train_loss | ▁ |
train/train_runtime | ▁ |
train/train_samples_per_second | ▁ |
train/train_steps_per_second | ▁ |
Run summary:
eval/loss | 0.59633 |
eval/runtime | 6.9881 |
eval/samples_per_second | 8.014 |
eval/steps_per_second | 1.002 |
eval_loss | 0.59633 |
train/epoch | 5.0 |
train/global_step | 140 |
train/learning_rate | 0.00012 |
train/loss | 0.4825 |
train/total_flos | 2170063307145216.0 |
train/train_loss | 0.74968 |
train/train_runtime | 855.8976 |
train/train_samples_per_second | 4.187 |
train/train_steps_per_second | 0.262 |
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run abundant-wonton-26 at: https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/o101oodx
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Find logs at: ./wandb/run-20240219_194844-o101oodx/logs
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["[I 2024-02-19 20:03:18,557] Trial 4 finished with value: 0.5963263511657715 and parameters: {'learning_rate': 0.0003062471523433562, 'num_train_epochs': 8, 'per_device_train_batch_size': 16, 'warmup_steps': 4}. Best is trial 2 with value: 0.5898026823997498.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c6b42332b56944d189e15ca4507bb147","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.011113268544431777, max=1.0…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Tracking run with wandb version 0.16.3"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Run data is saved locally in /home/iot/ITI110/poc-playground/Final project/wandb/run-20240219_200318-1kupsvst
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Syncing run bright-pig-27 to Weights & Biases (docs)
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View project at https://wandb.ai/szehanz/Education-Chatbot-Optimization"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run at https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/1kupsvst"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [140/168 14:08 < 02:52, 0.16 it/s, Epoch 5/6]\n","
\n"," \n"," \n"," \n"," Step | \n"," Training Loss | \n"," Validation Loss | \n","
\n"," \n"," \n"," \n"," 10 | \n"," 2.310000 | \n"," 1.605123 | \n","
\n"," \n"," 20 | \n"," 1.232900 | \n"," 0.919273 | \n","
\n"," \n"," 30 | \n"," 0.990500 | \n"," 0.866596 | \n","
\n"," \n"," 40 | \n"," 0.724000 | \n"," 0.680403 | \n","
\n"," \n"," 50 | \n"," 0.634400 | \n"," 0.667967 | \n","
\n"," \n"," 60 | \n"," 0.569800 | \n"," 0.646463 | \n","
\n"," \n"," 70 | \n"," 0.544800 | \n"," 0.631393 | \n","
\n"," \n"," 80 | \n"," 0.526900 | \n"," 0.594657 | \n","
\n"," \n"," 90 | \n"," 0.473500 | \n"," 0.601250 | \n","
\n"," \n"," 100 | \n"," 0.490500 | \n"," 0.598757 | \n","
\n"," \n"," 110 | \n"," 0.478800 | \n"," 0.592457 | \n","
\n"," \n"," 120 | \n"," 0.482300 | \n"," 0.626896 | \n","
\n"," \n"," 130 | \n"," 0.426500 | \n"," 0.625595 | \n","
\n"," \n"," 140 | \n"," 0.469700 | \n"," 0.604543 | \n","
\n"," \n","
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [7/7 00:06]\n","
\n"," "],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='0.006 MB of 0.023 MB uploaded\\r'), FloatProgress(value=0.25586993243243245, max=1.…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","Run history:
eval/loss | █▃▃▂▂▁▁▁▁▁▁▁▁▁▁ |
eval/runtime | ▁▅▇▄▅▆▄▇▆██▇█▅▃ |
eval/samples_per_second | █▄▂▄▄▃▅▂▃▁▁▂▁▄▆ |
eval/steps_per_second | █▃▂▄▃▂▅▂▂▁▁▁▁▄▅ |
eval_loss | ▁ |
train/epoch | ▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇████ |
train/global_step | ▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇█████ |
train/learning_rate | █▇▇▆▆▅▅▄▄▃▃▂▂▁ |
train/loss | █▄▃▂▂▂▁▁▁▁▁▁▁▁ |
train/total_flos | ▁ |
train/train_loss | ▁ |
train/train_runtime | ▁ |
train/train_samples_per_second | ▁ |
train/train_steps_per_second | ▁ |
Run summary:
eval/loss | 0.59246 |
eval/runtime | 6.9975 |
eval/samples_per_second | 8.003 |
eval/steps_per_second | 1.0 |
eval_loss | 0.59246 |
train/epoch | 5.0 |
train/global_step | 140 |
train/learning_rate | 6e-05 |
train/loss | 0.4697 |
train/total_flos | 2170063307145216.0 |
train/train_loss | 0.73962 |
train/train_runtime | 855.0423 |
train/train_samples_per_second | 3.144 |
train/train_steps_per_second | 0.196 |
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run bright-pig-27 at: https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/1kupsvst
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Find logs at: ./wandb/run-20240219_200318-1kupsvst/logs
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["[I 2024-02-19 20:17:52,091] Trial 5 finished with value: 0.5924574732780457 and parameters: {'learning_rate': 0.0003549720190405869, 'num_train_epochs': 6, 'per_device_train_batch_size': 16, 'warmup_steps': 5}. Best is trial 2 with value: 0.5898026823997498.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"63a4222d599e4da4890c36a23ae52a3a","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.011112871322095291, max=1.0…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Tracking run with wandb version 0.16.3"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Run data is saved locally in /home/iot/ITI110/poc-playground/Final project/wandb/run-20240219_201752-hvk8kkmo
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Syncing run red-chrysanthemum-28 to Weights & Biases (docs)
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View project at https://wandb.ai/szehanz/Education-Chatbot-Optimization"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run at https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/hvk8kkmo"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [84/84 15:16, Epoch 6/6]\n","
\n"," \n"," \n"," \n"," Step | \n"," Training Loss | \n"," Validation Loss | \n","
\n"," \n"," \n"," \n"," 10 | \n"," 2.332000 | \n"," 1.561446 | \n","
\n"," \n"," 20 | \n"," 1.248800 | \n"," 0.888817 | \n","
\n"," \n"," 30 | \n"," 0.848200 | \n"," 0.778326 | \n","
\n"," \n"," 40 | \n"," 0.677900 | \n"," 0.683840 | \n","
\n"," \n"," 50 | \n"," 0.571000 | \n"," 0.657031 | \n","
\n"," \n"," 60 | \n"," 0.510400 | \n"," 0.637265 | \n","
\n"," \n"," 70 | \n"," 0.521900 | \n"," 0.623751 | \n","
\n"," \n"," 80 | \n"," 0.470300 | \n"," 0.618002 | \n","
\n"," \n","
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [7/7 00:06]\n","
\n"," "],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='0.022 MB of 0.034 MB uploaded\\r'), FloatProgress(value=0.6506108890265188, max=1.0…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","Run history:
eval/loss | █▃▂▁▁▁▁▁▁ |
eval/runtime | ▁▂▃▁▇▅█▇▇ |
eval/samples_per_second | █▇▆█▂▄▁▂▂ |
eval/steps_per_second | █▆▆█▃▄▁▃▃ |
eval_loss | ▁ |
train/epoch | ▁▁▂▂▃▃▄▄▅▅▆▆▇▇████ |
train/global_step | ▁▁▂▂▃▃▄▄▅▅▆▆▇▇█████ |
train/learning_rate | █▇▆▅▄▃▂▁ |
train/loss | █▄▂▂▁▁▁▁ |
train/total_flos | ▁ |
train/train_loss | ▁ |
train/train_runtime | ▁ |
train/train_samples_per_second | ▁ |
train/train_steps_per_second | ▁ |
Run summary:
eval/loss | 0.618 |
eval/runtime | 7.029 |
eval/samples_per_second | 7.967 |
eval/steps_per_second | 0.996 |
eval_loss | 0.618 |
train/epoch | 6.0 |
train/global_step | 84 |
train/learning_rate | 2e-05 |
train/loss | 0.4703 |
train/total_flos | 2927298512683008.0 |
train/train_loss | 0.87718 |
train/train_runtime | 928.7795 |
train/train_samples_per_second | 2.894 |
train/train_steps_per_second | 0.09 |
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run red-chrysanthemum-28 at: https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/hvk8kkmo
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Find logs at: ./wandb/run-20240219_201752-hvk8kkmo/logs
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["[I 2024-02-19 20:33:38,930] Trial 6 finished with value: 0.6180019974708557 and parameters: {'learning_rate': 0.00029993812811393003, 'num_train_epochs': 6, 'per_device_train_batch_size': 32, 'warmup_steps': 5}. Best is trial 2 with value: 0.5898026823997498.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"5c842ce8d6714182920a1750adb3d273","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.011113082922141379, max=1.0…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Tracking run with wandb version 0.16.3"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Run data is saved locally in /home/iot/ITI110/poc-playground/Final project/wandb/run-20240219_203338-u9an5iy8
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Syncing run red-wish-29 to Weights & Biases (docs)
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View project at https://wandb.ai/szehanz/Education-Chatbot-Optimization"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run at https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/u9an5iy8"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [56/56 10:07, Epoch 4/4]\n","
\n"," \n"," \n"," \n"," Step | \n"," Training Loss | \n"," Validation Loss | \n","
\n"," \n"," \n"," \n"," 10 | \n"," 2.082200 | \n"," 1.251068 | \n","
\n"," \n"," 20 | \n"," 1.007900 | \n"," 0.816748 | \n","
\n"," \n"," 30 | \n"," 0.750100 | \n"," 0.686710 | \n","
\n"," \n"," 40 | \n"," 0.600800 | \n"," 0.673636 | \n","
\n"," \n"," 50 | \n"," 0.539500 | \n"," 0.642782 | \n","
\n"," \n","
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [7/7 00:06]\n","
\n"," "],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='0.034 MB of 0.034 MB uploaded\\r'), FloatProgress(value=1.0, max=1.0)))"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","Run history:
eval/loss | █▃▂▁▁▁ |
eval/runtime | ▁▆█▄▅▃ |
eval/samples_per_second | █▃▁▄▄▆ |
eval/steps_per_second | █▃▁▅▅▆ |
eval_loss | ▁ |
train/epoch | ▁▁▃▃▄▄▆▆▇▇██ |
train/global_step | ▁▁▃▃▄▄▆▆▇▇███ |
train/learning_rate | █▆▅▃▁ |
train/loss | █▃▂▁▁ |
train/total_flos | ▁ |
train/train_loss | ▁ |
train/train_runtime | ▁ |
train/train_samples_per_second | ▁ |
train/train_steps_per_second | ▁ |
Run summary:
eval/loss | 0.64278 |
eval/runtime | 7.0018 |
eval/samples_per_second | 7.998 |
eval/steps_per_second | 1.0 |
eval_loss | 0.64278 |
train/epoch | 4.0 |
train/global_step | 56 |
train/learning_rate | 6e-05 |
train/loss | 0.5395 |
train/total_flos | 1969319745945600.0 |
train/train_loss | 0.94465 |
train/train_runtime | 619.7752 |
train/train_samples_per_second | 2.891 |
train/train_steps_per_second | 0.09 |
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run red-wish-29 at: https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/u9an5iy8
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Find logs at: ./wandb/run-20240219_203338-u9an5iy8/logs
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["[I 2024-02-19 20:44:20,345] Trial 7 finished with value: 0.6427821516990662 and parameters: {'learning_rate': 0.0004921796551065912, 'num_train_epochs': 4, 'per_device_train_batch_size': 32, 'warmup_steps': 3}. Best is trial 2 with value: 0.5898026823997498.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"2cfb2293dad348b985a23b767d53ace8","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.011113233355637122, max=1.0…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Tracking run with wandb version 0.16.3"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Run data is saved locally in /home/iot/ITI110/poc-playground/Final project/wandb/run-20240219_204420-k8hrtsr1
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Syncing run sparkling-peony-30 to Weights & Biases (docs)
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View project at https://wandb.ai/szehanz/Education-Chatbot-Optimization"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run at https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/k8hrtsr1"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [56/56 10:07, Epoch 4/4]\n","
\n"," \n"," \n"," \n"," Step | \n"," Training Loss | \n"," Validation Loss | \n","
\n"," \n"," \n"," \n"," 10 | \n"," 2.332100 | \n"," 1.553420 | \n","
\n"," \n"," 20 | \n"," 1.247700 | \n"," 0.886406 | \n","
\n"," \n"," 30 | \n"," 0.857700 | \n"," 0.771762 | \n","
\n"," \n"," 40 | \n"," 0.705500 | \n"," 0.712279 | \n","
\n"," \n"," 50 | \n"," 0.595000 | \n"," 0.679550 | \n","
\n"," \n","
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [7/7 00:06]\n","
\n"," "],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='0.006 MB of 0.034 MB uploaded\\r'), FloatProgress(value=0.170631457447707, max=1.0)…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","Run history:
eval/loss | █▃▂▁▁▁ |
eval/runtime | ▁▃▆▅█▄ |
eval/samples_per_second | █▆▃▄▁▅ |
eval/steps_per_second | █▆▃▄▁▅ |
eval_loss | ▁ |
train/epoch | ▁▁▃▃▄▄▆▆▇▇██ |
train/global_step | ▁▁▃▃▄▄▆▆▇▇███ |
train/learning_rate | █▆▅▃▁ |
train/loss | █▄▂▁▁ |
train/total_flos | ▁ |
train/train_loss | ▁ |
train/train_runtime | ▁ |
train/train_samples_per_second | ▁ |
train/train_steps_per_second | ▁ |
Run summary:
eval/loss | 0.67955 |
eval/runtime | 7.0075 |
eval/samples_per_second | 7.991 |
eval/steps_per_second | 0.999 |
eval_loss | 0.67955 |
train/epoch | 4.0 |
train/global_step | 56 |
train/learning_rate | 4e-05 |
train/loss | 0.595 |
train/total_flos | 1969319745945600.0 |
train/train_loss | 1.08562 |
train/train_runtime | 620.2921 |
train/train_samples_per_second | 2.889 |
train/train_steps_per_second | 0.09 |
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run sparkling-peony-30 at: https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/k8hrtsr1
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Find logs at: ./wandb/run-20240219_204420-k8hrtsr1/logs
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["[I 2024-02-19 20:54:59,931] Trial 8 finished with value: 0.679550051689148 and parameters: {'learning_rate': 0.0002997058981392026, 'num_train_epochs': 4, 'per_device_train_batch_size': 32, 'warmup_steps': 5}. Best is trial 2 with value: 0.5898026823997498.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"15cedcbf21794e1895357c52edcbef03","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.01111316335575086, max=1.0)…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Tracking run with wandb version 0.16.3"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Run data is saved locally in /home/iot/ITI110/poc-playground/Final project/wandb/run-20240219_205459-iosmgelq
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Syncing run abundant-chrysanthemum-31 to Weights & Biases (docs)
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View project at https://wandb.ai/szehanz/Education-Chatbot-Optimization"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run at https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/iosmgelq"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [110/112 11:07 < 00:12, 0.16 it/s, Epoch 3/4]\n","
\n"," \n"," \n"," \n"," Step | \n"," Training Loss | \n"," Validation Loss | \n","
\n"," \n"," \n"," \n"," 10 | \n"," 2.362300 | \n"," 1.603512 | \n","
\n"," \n"," 20 | \n"," 1.277600 | \n"," 0.896908 | \n","
\n"," \n"," 30 | \n"," 1.028400 | \n"," 0.843322 | \n","
\n"," \n"," 40 | \n"," 0.758800 | \n"," 0.708671 | \n","
\n"," \n"," 50 | \n"," 0.656000 | \n"," 0.693394 | \n","
\n"," \n"," 60 | \n"," 0.584600 | \n"," 0.657635 | \n","
\n"," \n"," 70 | \n"," 0.557400 | \n"," 0.658206 | \n","
\n"," \n"," 80 | \n"," 0.530400 | \n"," 0.600657 | \n","
\n"," \n"," 90 | \n"," 0.469300 | \n"," 0.609772 | \n","
\n"," \n"," 100 | \n"," 0.483300 | \n"," 0.610230 | \n","
\n"," \n"," 110 | \n"," 0.480400 | \n"," 0.604136 | \n","
\n"," \n","
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [7/7 00:06]\n","
\n"," "],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='0.012 MB of 0.034 MB uploaded\\r'), FloatProgress(value=0.3512258282433079, max=1.0…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","Run history:
eval/loss | █▃▃▂▂▁▁▁▁▁▁▁ |
eval/runtime | ▁▆▅▆█▇██▇▇▆▅ |
eval/samples_per_second | █▃▃▃▁▂▁▁▂▂▃▄ |
eval/steps_per_second | █▃▄▃▁▂▂▂▂▂▃▄ |
eval_loss | ▁ |
train/epoch | ▁▁▂▂▂▂▃▃▄▄▄▄▅▅▆▆▇▇▇▇████ |
train/global_step | ▁▁▂▂▂▂▃▃▄▄▅▅▅▅▆▆▇▇▇▇█████ |
train/learning_rate | █▇▇▆▅▅▄▃▂▂▁ |
train/loss | █▄▃▂▂▁▁▁▁▁▁ |
train/total_flos | ▁ |
train/train_loss | ▁ |
train/train_runtime | ▁ |
train/train_samples_per_second | ▁ |
train/train_steps_per_second | ▁ |
Run summary:
eval/loss | 0.60066 |
eval/runtime | 6.9977 |
eval/samples_per_second | 8.003 |
eval/steps_per_second | 1.0 |
eval_loss | 0.60066 |
train/epoch | 3.93 |
train/global_step | 110 |
train/learning_rate | 1e-05 |
train/loss | 0.4804 |
train/total_flos | 1713943443406848.0 |
train/train_loss | 0.83532 |
train/train_runtime | 674.6173 |
train/train_samples_per_second | 2.656 |
train/train_steps_per_second | 0.166 |
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run abundant-chrysanthemum-31 at: https://wandb.ai/szehanz/Education-Chatbot-Optimization/runs/iosmgelq
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Find logs at: ./wandb/run-20240219_205459-iosmgelq/logs
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["[I 2024-02-19 21:06:32,382] Trial 9 finished with value: 0.6006569266319275 and parameters: {'learning_rate': 0.0002981933140416747, 'num_train_epochs': 4, 'per_device_train_batch_size': 16, 'warmup_steps': 5}. Best is trial 2 with value: 0.5898026823997498.\n"]}],"source":["def objective(trial):\n","\n"," # Define hyperparameters outside the wandb.init to use them later in the code\n"," learning_rate = trial.suggest_float('learning_rate', 2e-4, 5e-4, log=True)\n"," num_train_epochs = trial.suggest_categorical('num_train_epochs', [4, 6, 8])\n"," per_device_train_batch_size = trial.suggest_categorical('per_device_train_batch_size', [16, 32])\n"," warmup_steps = trial.suggest_int('warmup_steps', 3, 5)\n","\n"," wandb.init(\n"," project=\"Education-Chatbot-Optimization\",\n"," entity=\"szehanz\",\n"," group=\"optuna-optimization\",\n"," job_type=\"hyperparameter_search\",\n"," reinit=True,\n"," config={\n"," \"learning_rate\": learning_rate,\n"," \"num_train_epochs\": num_train_epochs,\n"," \"per_device_train_batch_size\": per_device_train_batch_size,\n"," \"warmup_steps\": warmup_steps\n"," }\n"," )\n","\n"," # Format the current date and time\n"," current_time = datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n"," output_dir = f\"train_out_dir_{current_time}\" # Append the current date and time to the directory name\n","\n"," # Create the output directory\n"," os.makedirs(output_dir, exist_ok=True) # Using exist_ok=True to avoid error if the directory already exists\n","\n","\n"," # Define TrainingArguments with the suggested hyperparameters\n"," training_args = TrainingArguments(\n"," output_dir=output_dir, # Directory for saving output models and checkpoints.\n"," save_strategy=\"steps\", # Save model checkpoints at regular step intervals.\n"," save_steps=10, # Save model checkpoints every 10 steps.\n"," learning_rate=learning_rate, # Initial learning rate for the optimizer.\n"," per_device_train_batch_size=per_device_train_batch_size, # Batch size per device during training.\n"," per_device_eval_batch_size=8, # Batch size per device during evaluation.\n"," num_train_epochs=num_train_epochs, # Total number of training epochs.\n"," warmup_steps=warmup_steps, # Number of warmup steps for the learning rate scheduler.\n"," evaluation_strategy='steps', # Perform evaluation at regular step intervals.\n"," eval_steps=10, # Perform evaluation every 10 steps.\n"," logging_steps=10,\n"," optim='paged_adamw_8bit', # Specifies the optimizer to use.\n"," lr_scheduler_type='linear', # Type of learning rate scheduler.\n"," gradient_accumulation_steps=1, # Number of steps to accumulate gradients before performing an update.\n"," load_best_model_at_end=True, # Load the best model based on evaluation metric at the end of training.\n"," report_to='wandb', # Disable automatic integrations with external reporting tools.\n"," )\n","\n","\n"," # Initialize the Trainer with early stopping callback inside the objective function\n"," trainer = SFTTrainer(\n"," model=model, # Ensure a function or a mechanism to initialize your model\n"," train_dataset=train_dataset,\n"," eval_dataset=val_dataset,\n"," peft_config=peft_config,\n"," dataset_text_field=\"instruction\",\n"," tokenizer=tokenizer,\n"," args=training_args,\n"," max_seq_length=4096,\n"," callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n"," )\n","\n"," # Train the model and evaluate within the objective function\n"," trainer.train()\n"," eval_result = trainer.evaluate()\n","\n"," # Log the primary metric to WandB\n"," wandb.log({\"eval_loss\": eval_result[\"eval_loss\"]})\n","\n"," # Finish the WandB run for this trial\n"," wandb.finish()\n","\n"," # Return the metric to be optimized\n"," return eval_result[\"eval_loss\"]\n","\n","\n","# Run the optimization\n","study = optuna.create_study(direction='minimize')\n","study.optimize(objective, n_trials=10)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"fmdlQTVSHT8e","outputId":"a2935a56-5cad-4dbc-c55c-53b3b5ad1368"},"outputs":[{"name":"stdout","output_type":"stream","text":["Best trial:\n"," Value: 0.5898026823997498\n"," Params: \n"," learning_rate: 0.00040925708738231623\n"," num_train_epochs: 6\n"," per_device_train_batch_size: 16\n"," warmup_steps: 4\n"]}],"source":["# Best trial results\n","print(\"Best trial:\")\n","print(f\" Value: {study.best_trial.value}\")\n","print(\" Params: \")\n","for key, value in study.best_trial.params.items():\n"," print(f\" {key}: {value}\")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"mKlA_ahVHT8e","outputId":"6365a674-b011-48bb-94ea-7aa9d657d323","colab":{"referenced_widgets":[""]}},"outputs":[{"data":{"text/html":["Tracking run with wandb version 0.16.3"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Run data is saved locally in /home/iot/ITI110/poc-playground/Final project/wandb/run-20240219_210737-q16rpssr
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Syncing run fortuitous-fish-3 to Weights & Biases (docs)
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View project at https://wandb.ai/szehanz/huggingface"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run at https://wandb.ai/szehanz/huggingface/runs/q16rpssr"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n"," \n"," \n","
\n"," [168/168 16:51, Epoch 6/6]\n","
\n"," \n"," \n"," \n"," Step | \n"," Training Loss | \n"," Validation Loss | \n","
\n"," \n"," \n"," \n"," 10 | \n"," 2.218400 | \n"," 1.534913 | \n","
\n"," \n"," 20 | \n"," 1.136500 | \n"," 0.895845 | \n","
\n"," \n"," 30 | \n"," 0.955600 | \n"," 0.814537 | \n","
\n"," \n"," 40 | \n"," 0.707400 | \n"," 0.680818 | \n","
\n"," \n"," 50 | \n"," 0.629700 | \n"," 0.656619 | \n","
\n"," \n"," 60 | \n"," 0.556900 | \n"," 0.624320 | \n","
\n"," \n"," 70 | \n"," 0.532100 | \n"," 0.632881 | \n","
\n"," \n"," 80 | \n"," 0.524000 | \n"," 0.591038 | \n","
\n"," \n"," 90 | \n"," 0.476200 | \n"," 0.599967 | \n","
\n"," \n"," 100 | \n"," 0.487800 | \n"," 0.594422 | \n","
\n"," \n"," 110 | \n"," 0.475600 | \n"," 0.595004 | \n","
\n"," \n"," 120 | \n"," 0.478700 | \n"," 0.627798 | \n","
\n"," \n"," 130 | \n"," 0.426700 | \n"," 0.623343 | \n","
\n"," \n"," 140 | \n"," 0.471200 | \n"," 0.604500 | \n","
\n"," \n"," 150 | \n"," 0.420800 | \n"," 0.611532 | \n","
\n"," \n"," 160 | \n"," 0.440900 | \n"," 0.603333 | \n","
\n"," \n","
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"","version_major":2,"version_minor":0},"text/plain":["VBox(children=(Label(value='0.012 MB of 0.034 MB uploaded\\r'), FloatProgress(value=0.35244443189199876, max=1.…"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["\n","Run history:
eval/loss | █▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁ |
eval/runtime | ▁▇▅▆▆▇▇█▇██▇█▇▇█ |
eval/samples_per_second | █▂▄▃▃▂▂▁▂▁▁▂▁▂▂▁ |
eval/steps_per_second | █▂▄▃▃▂▂▁▂▁▁▂▁▂▂▁ |
train/epoch | ▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇███ |
train/global_step | ▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇███ |
train/learning_rate | ██▇▇▆▆▅▅▄▄▃▃▂▂▁▁ |
train/loss | █▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁ |
train/total_flos | ▁ |
train/train_loss | ▁ |
train/train_runtime | ▁ |
train/train_samples_per_second | ▁ |
train/train_steps_per_second | ▁ |
Run summary:
eval/loss | 0.60333 |
eval/runtime | 7.0342 |
eval/samples_per_second | 7.961 |
eval/steps_per_second | 0.995 |
train/epoch | 6.0 |
train/global_step | 168 |
train/learning_rate | 2e-05 |
train/loss | 0.4409 |
train/total_flos | 2600137329082368.0 |
train/train_loss | 0.67181 |
train/train_runtime | 1023.9774 |
train/train_samples_per_second | 2.625 |
train/train_steps_per_second | 0.164 |
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":[" View run fortuitous-fish-3 at: https://wandb.ai/szehanz/huggingface/runs/q16rpssr
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"],"text/plain":[""]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["Find logs at: ./wandb/run-20240219_210737-q16rpssr/logs
"],"text/plain":[""]},"metadata":{},"output_type":"display_data"}],"source":["# Use best hyperparameters from the study\n","best_trial = study.best_trial\n","\n","best_learning_rate = best_trial.params['learning_rate']\n","best_num_train_epochs = best_trial.params['num_train_epochs']\n","best_per_device_train_batch_size = best_trial.params['per_device_train_batch_size']\n","best_warmup_steps = best_trial.params['warmup_steps']\n","\n","\n","# Define TrainingArguments with the best hyperparameters for retraining\n","best_training_args = TrainingArguments(\n"," output_dir=\"best_train_out_dir\",\n"," save_strategy=\"steps\",\n"," save_steps=10,\n"," learning_rate=best_learning_rate,\n"," per_device_train_batch_size=best_per_device_train_batch_size,\n"," per_device_eval_batch_size=8,\n"," num_train_epochs=best_num_train_epochs,\n"," warmup_steps=best_warmup_steps,\n"," evaluation_strategy='steps',\n"," eval_steps=10,\n"," logging_steps=10,\n"," optim='paged_adamw_8bit',\n"," lr_scheduler_type='linear',\n"," gradient_accumulation_steps=1,\n"," load_best_model_at_end=True,\n"," report_to='wandb',\n",")\n","\n","# Reinitialize the Trainer with the best hyperparameters\n","best_trainer = SFTTrainer(\n"," model=model,\n"," train_dataset=train_dataset,\n"," eval_dataset=val_dataset,\n"," peft_config=peft_config,\n"," dataset_text_field=\"instruction\",\n"," tokenizer=tokenizer,\n"," args=best_training_args,\n"," max_seq_length=4096,\n",")\n","\n","# Retrain the model with the best hyperparameters\n","best_trainer.train()\n","\n","\n","# Save trained model\n","best_trainer.model.save_pretrained(new_model)\n","\n","# Finish the WandB run for this trial\n","wandb.finish()"]},{"cell_type":"markdown","metadata":{"id":"_g0fB7P9s0ol"},"source":["Merging the base model with the trained adapter."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"referenced_widgets":["aafec7a64d034e05b1aaf17bb153136b","3a01a0a298124d83a1e4fc74bcae457f"]},"id":"QQn30cRtAZ-P","outputId":"6508be7b-0a96-494e-bd33-d35c5c331f52"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"3a01a0a298124d83a1e4fc74bcae457f","version_major":2,"version_minor":0},"text/plain":["Loading checkpoint shards: 0%| | 0/3 [00:00, ?it/s]"]},"metadata":{},"output_type":"display_data"}],"source":["# Reload model in FP16 and merge it with LoRA weights\n","model = AutoModelForCausalLM.from_pretrained(\n"," base_model,\n"," low_cpu_mem_usage=True,\n"," return_dict=True,\n"," torch_dtype=torch.float16,\n"," # device_map={\"\": 0},\n",")\n","model = PeftModel.from_pretrained(model, new_model)\n","model = model.merge_and_unload()\n","\n","\n","# Reload tokenizer to save it\n","tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)\n","tokenizer.pad_token = tokenizer.eos_token\n","tokenizer.padding_side = \"right\""]},{"cell_type":"markdown","metadata":{"id":"n4_wCHy_s--5"},"source":["Push the model and tokenizer to the Hugging Face Hub."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"referenced_widgets":["3202bad4cfeb4061b79b1899b34c72fe","adb2ab52a0b349a6acf355e8e2f86195","4757fe0198c8457691b53c53968b2c57","2f57f07de87446f582fcd4a95a31664a","2d123d7e3900443a808dbf6a7952b726","1bc2e343ca5f467b90153d0d38e89394","081336c65b904925ba18418b85c4d704","4937e797c5ad4014b4ba3dc70f7ee82f","04e88468316c42b3b931149b04261a11","9ed2ef69f2ac48088295ab5a0cbbdbff","b6a5dfa4ace74ea7b3697aa8c45b092f","f290633a52784cb48737e3819bcc649d"]},"id":"x-xPb-_qB0dz","outputId":"c6eb10d5-6b16-46f8-d147-12355811ec32"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"081336c65b904925ba18418b85c4d704","version_major":2,"version_minor":0},"text/plain":["model-00002-of-00003.safetensors: 0%| | 0.00/4.95G [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"4937e797c5ad4014b4ba3dc70f7ee82f","version_major":2,"version_minor":0},"text/plain":["Upload 3 LFS files: 0%| | 0/3 [00:00, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"04e88468316c42b3b931149b04261a11","version_major":2,"version_minor":0},"text/plain":["model-00001-of-00003.safetensors: 0%| | 0.00/4.94G [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"9ed2ef69f2ac48088295ab5a0cbbdbff","version_major":2,"version_minor":0},"text/plain":["model-00003-of-00003.safetensors: 0%| | 0.00/3.59G [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"b6a5dfa4ace74ea7b3697aa8c45b092f","version_major":2,"version_minor":0},"text/plain":["README.md: 0%| | 0.00/5.18k [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"f290633a52784cb48737e3819bcc649d","version_major":2,"version_minor":0},"text/plain":["tokenizer.model: 0%| | 0.00/500k [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/plain":["CommitInfo(commit_url='https://huggingface.co/ssoh/llama-2-7b-combined_datasets/commit/59281478fddef1962456031d2a400819882b0d46', commit_message='Upload tokenizer', commit_description='', oid='59281478fddef1962456031d2a400819882b0d46', pr_url=None, pr_revision=None, pr_num=None)"]},"execution_count":14,"metadata":{},"output_type":"execute_result"}],"source":["model.push_to_hub(new_model, use_temp_dir=False)\n","tokenizer.push_to_hub(new_model, use_temp_dir=False)"]},{"cell_type":"markdown","metadata":{"id":"hAS2DuJuC7xR"},"source":["---"]},{"cell_type":"markdown","metadata":{"id":"8y_Rk94LzG7I"},"source":["# Quantize Llama 2 models using GGUF and llama.cpp\n","\n","\n","## Usage\n","\n","* `MODEL_ID`: The ID of the model to quantize (e.g., `ssoh/llama-2-7b-combined_datasets`).\n","* `QUANTIZATION_METHOD`: The quantization method to use.\n","\n","## Quantization methods\n","\n","The names of the quantization methods follow the naming convention: \"q\" + the number of bits + the variant used.\n","\n","We will be using **Q5_K_M** as it preserves most of the model's performance."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["565b3eac349740cfb4916db8d1256473","d380e9abee2644b1ab1b2e4cdcf3e13f","824a18a3d40d4c34bcb9c37d2c377a79","22f854128d294447ac56fb4f4df7725b","cde8f66d29a5422b84de4e1863046c07","e19b264b02994dfb99c991e27a4a0a6d","6b54fd1f0c2c466395bd5104c3561935","a55670ac1776402bbe039e3dd003517a","490c48e8d09346e1a3d969498148a307","a3da4e46290c4f0b9077e2c2b0fa27b0","1e5096366c47471b8cdb6c4664a92496","a0a1bb1b676c491b9532b7d354b71411","5dc8c40d5a1c417c8fcf0a774ae1d623","f9b325c484cc4c3f9d66372fcf07c6c2","5ea490610878452699d623a468dad12f","c09bd5b327334e1eb3443ede501d8a5f","41244430849a4e569ab79d91937ac02d","3a6b8e559529473fbe0dbef9217e2cd0","d96d95f84ba34af0ba22f7912bd2584f","0361960758384c3cafb2f718c9d4132b","51ad39fd3f32417397c7f0b9d7d2db8f","236c3bfec0d945c6a904edbd95a4fd73","0289cb086a404616aeb40dff47663acb","9beb39e1f1c244f79f06dc27efa9b61d","ae530f75d3ea4ce0ad2d3c130a810311","42d4ae9e7ac4457fbf4c202f5072b547","c835df4cbea04e04ad7a01f61bb7d4f8","805d7e2011be49a4b261c3da7510e85e","d4047a007ebc49d1a45be48e495967f7","775e0d54755842b4ba92454209e2a478","c4ddb468de17447a9f420762d5b0018e","1409ec73b4764cfea08e76b95a6e6752","f13a661f0e6b447e978152be38815949"]},"executionInfo":{"elapsed":562,"status":"ok","timestamp":1707923665837,"user":{"displayName":"szehanz","userId":"16137883221268059572"},"user_tz":-480},"id":"zbCYFOmU7ANP","outputId":"f3ed43e5-78d0-42c0-a4ba-23e83904f0a6"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"f13a661f0e6b447e978152be38815949","version_major":2,"version_minor":0},"text/plain":["VBox(children=(HTML(value=' =4.35.2 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from -r llama.cpp/./requirements/requirements-convert.txt (line 3)) (4.37.2)\n","Requirement already satisfied: gguf>=0.1.0 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from -r llama.cpp/./requirements/requirements-convert.txt (line 4)) (0.6.0)\n","Requirement already satisfied: protobuf<5.0.0,>=4.21.0 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from -r llama.cpp/./requirements/requirements-convert.txt (line 5)) (4.25.2)\n","Requirement already satisfied: torch~=2.1.1 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from -r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (2.1.2)\n","Requirement already satisfied: filelock in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (3.13.1)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (0.19.4)\n","Requirement already satisfied: packaging>=20.0 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (23.2)\n","Requirement already satisfied: pyyaml>=5.1 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (2023.10.3)\n","Requirement already satisfied: requests in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (2.31.0)\n","Requirement already satisfied: tokenizers<0.19,>=0.14 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (0.15.1)\n","Requirement already satisfied: safetensors>=0.4.1 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (0.4.2)\n","Requirement already satisfied: tqdm>=4.27 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (4.66.1)\n","Requirement already satisfied: typing-extensions in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (4.9.0)\n","Requirement already satisfied: sympy in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (1.12)\n","Requirement already satisfied: networkx in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (3.2.1)\n","Requirement already satisfied: jinja2 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (3.1.3)\n","Requirement already satisfied: fsspec in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (2023.10.0)\n","Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (12.1.105)\n","Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (12.1.105)\n","Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (12.1.105)\n","Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (8.9.2.26)\n","Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (12.1.3.1)\n","Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (11.0.2.54)\n","Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (10.3.2.106)\n","Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (11.4.5.107)\n","Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (12.1.0.106)\n","Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (2.18.1)\n","Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (12.1.105)\n","Requirement already satisfied: triton==2.1.0 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (2.1.0)\n","Requirement already satisfied: nvidia-nvjitlink-cu12 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (12.3.101)\n","Requirement already satisfied: MarkupSafe>=2.0 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from jinja2->torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (2.1.5)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from requests->transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from requests->transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from requests->transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (2.2.0)\n","Requirement already satisfied: certifi>=2017.4.17 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from requests->transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (2024.2.2)\n","Requirement already satisfied: mpmath>=0.19 in /home/iot/miniconda3/envs/tensorflow2/lib/python3.11/site-packages (from sympy->torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (1.3.0)\n","Git LFS initialized.\n","Cloning into 'llama-2-7b-combined_datasets'...\n","remote: Enumerating objects: 18, done.\u001b[K\n","remote: Counting objects: 100% (15/15), done.\u001b[K\n","remote: Compressing objects: 100% (15/15), done.\u001b[K\n","remote: Total 18 (delta 1), reused 0 (delta 0), pack-reused 3\u001b[K\n","Unpacking objects: 100% (18/18), 483.97 KiB | 891.00 KiB/s, done.\n","Filtering content: 100% (4/4), 4.55 GiB | 5.10 MiB/s, done.\n","Encountered 2 file(s) that may not have been copied correctly on Windows:\n","\tmodel-00001-of-00003.safetensors\n","\tmodel-00002-of-00003.safetensors\n","\n","See: `git lfs help smudge` for more details.\n"]}],"source":["# Set up environment and download model for llama.cpp inference.\n","# 1. Define model ID and quantization methods.\n","# 2. Parse model name from MODEL_ID.\n","# 3. Install and build the llama.cpp library with GPU support.\n","# 4. Install Python dependencies from llama.cpp's requirements.\n","# 5. Initialize Git Large File Storage (LFS) for handling large files.\n","# 6. Clone the specified model repository from Hugging Face.\n","\n","\n","MODEL_ID = \"ssoh/llama-2-7b-combined_datasets\"\n","QUANTIZATION_METHODS = [\"q5_k_m\"]\n","MODEL_NAME = MODEL_ID.split('/')[-1]\n","\n","\n","# Install and prepare llama.cpp\n","!git clone https://github.com/ggerganov/llama.cpp\n","!cd llama.cpp && git pull && make clean && LLAMA_CUBLAS=1 make\n","!pip install -r llama.cpp/requirements.txt\n","\n","\n","# Initialize Git LFS for large models\n","!git lfs install\n","\n","\n","# Download the model from Hugging Face\n","!git clone https://huggingface.co/{MODEL_ID}"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"UkMoXHX2DOrO"},"outputs":[],"source":["# # Specify the model ID from which to load the tokenizer\n","# model_id = \"meta-llama/Llama-2-7b-chat-hf\"\n","\n","# # Load the tokenizer associated with the specified model ID\n","# tokenizer = AutoTokenizer.from_pretrained(model_id)\n","\n","# # Create a temporary directory to store all downloaded tokenizer files\n","# temp_save_directory = \"temp_tokenizer_files\"\n","# tokenizer.save_pretrained(temp_save_directory)\n","\n","# # Specify the directory where the tokenizer.model file will be saved permanently\n","# MODEL_NAME = \"llama-2-7b-mini-ibased\"\n","# save_directory = MODEL_NAME\n","\n","# # Create the save directory if it does not exist\n","# os.makedirs(save_directory, exist_ok=True)\n","\n","# # Define the specific filename of the tokenizer we want to retain\n","# tokenizer_filename = \"tokenizer.model\"\n","\n","# # Check for the existence of tokenizer.model in the temporary directory\n","# source_file = os.path.join(temp_save_directory, tokenizer_filename)\n","# destination_file = os.path.join(save_directory, tokenizer_filename)\n","\n","# # Copy the tokenizer.model file to the final directory, if it exists\n","# if os.path.exists(source_file):\n","# shutil.copy(source_file, destination_file)\n","# print(f\"tokenizer.model has been saved in {save_directory}\")\n","# else:\n","# print(\"No tokenizer.model file found in the downloaded tokenizer files.\")\n","\n","# # Remove the temporary directory to clean up unnecessary files\n","# shutil.rmtree(temp_save_directory)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":159906,"status":"ok","timestamp":1707924525074,"user":{"displayName":"szehanz","userId":"16137883221268059572"},"user_tz":-480},"id":"fD24jJxq7t3k","outputId":"9f3bd35b-2232-4baa-be8a-3a8d9205570a"},"outputs":[{"name":"stdout","output_type":"stream","text":["Loading model file llama-2-7b-combined_datasets/model-00001-of-00003.safetensors\n","Loading model file llama-2-7b-combined_datasets/model-00001-of-00003.safetensors\n","Loading model file llama-2-7b-combined_datasets/model-00002-of-00003.safetensors\n","Loading model file llama-2-7b-combined_datasets/model-00003-of-00003.safetensors\n","params = Params(n_vocab=32000, n_embd=4096, n_layer=32, n_ctx=4096, n_ff=11008, n_head=32, n_head_kv=32, n_experts=None, n_experts_used=None, f_norm_eps=1e-05, rope_scaling_type=None, f_rope_freq_base=10000.0, f_rope_scale=None, n_orig_ctx=None, rope_finetuned=None, ftype=, path_model=PosixPath('llama-2-7b-combined_datasets'))\n","Found vocab files: {'tokenizer.model': PosixPath('llama-2-7b-combined_datasets/tokenizer.model'), 'vocab.json': None, 'tokenizer.json': PosixPath('llama-2-7b-combined_datasets/tokenizer.json')}\n","Loading vocab file 'llama-2-7b-combined_datasets/tokenizer.model', type 'spm'\n","Vocab info: \n","Special vocab info: \n","Permuting layer 0\n","Permuting layer 1\n","Permuting layer 2\n","Permuting layer 3\n","Permuting layer 4\n","Permuting layer 5\n","Permuting layer 6\n","Permuting layer 7\n","Permuting layer 8\n","Permuting layer 9\n","Permuting layer 10\n","Permuting layer 11\n","Permuting layer 12\n","Permuting layer 13\n","Permuting layer 14\n","Permuting layer 15\n","Permuting layer 16\n","Permuting layer 17\n","Permuting layer 18\n","Permuting layer 19\n","Permuting layer 20\n","Permuting layer 21\n","Permuting layer 22\n","Permuting layer 23\n","Permuting layer 24\n","Permuting layer 25\n","Permuting layer 26\n","Permuting layer 27\n","Permuting layer 28\n","Permuting layer 29\n","Permuting layer 30\n","Permuting layer 31\n","model.embed_tokens.weight -> token_embd.weight | F16 | [32000, 4096]\n","model.layers.0.input_layernorm.weight -> blk.0.attn_norm.weight | F16 | [4096]\n","model.layers.0.mlp.down_proj.weight -> blk.0.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.0.mlp.gate_proj.weight -> blk.0.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.0.mlp.up_proj.weight -> blk.0.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.0.post_attention_layernorm.weight -> blk.0.ffn_norm.weight | F16 | [4096]\n","model.layers.0.self_attn.k_proj.weight -> blk.0.attn_k.weight | F16 | [4096, 4096]\n","model.layers.0.self_attn.o_proj.weight -> blk.0.attn_output.weight | F16 | [4096, 4096]\n","model.layers.0.self_attn.q_proj.weight -> blk.0.attn_q.weight | F16 | [4096, 4096]\n","model.layers.0.self_attn.v_proj.weight -> blk.0.attn_v.weight | F16 | [4096, 4096]\n","model.layers.1.input_layernorm.weight -> blk.1.attn_norm.weight | F16 | [4096]\n","model.layers.1.mlp.down_proj.weight -> blk.1.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.1.mlp.gate_proj.weight -> blk.1.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.1.mlp.up_proj.weight -> blk.1.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.1.post_attention_layernorm.weight -> blk.1.ffn_norm.weight | F16 | [4096]\n","model.layers.1.self_attn.k_proj.weight -> blk.1.attn_k.weight | F16 | [4096, 4096]\n","model.layers.1.self_attn.o_proj.weight -> blk.1.attn_output.weight | F16 | [4096, 4096]\n","model.layers.1.self_attn.q_proj.weight -> blk.1.attn_q.weight | F16 | [4096, 4096]\n","model.layers.1.self_attn.v_proj.weight -> blk.1.attn_v.weight | F16 | [4096, 4096]\n","model.layers.10.input_layernorm.weight -> blk.10.attn_norm.weight | F16 | [4096]\n","model.layers.10.mlp.down_proj.weight -> blk.10.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.10.mlp.gate_proj.weight -> blk.10.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.10.mlp.up_proj.weight -> blk.10.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.10.post_attention_layernorm.weight -> blk.10.ffn_norm.weight | F16 | [4096]\n","model.layers.10.self_attn.k_proj.weight -> blk.10.attn_k.weight | F16 | [4096, 4096]\n","model.layers.10.self_attn.o_proj.weight -> blk.10.attn_output.weight | F16 | [4096, 4096]\n","model.layers.10.self_attn.q_proj.weight -> blk.10.attn_q.weight | F16 | [4096, 4096]\n","model.layers.10.self_attn.v_proj.weight -> blk.10.attn_v.weight | F16 | [4096, 4096]\n","model.layers.11.mlp.gate_proj.weight -> blk.11.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.11.self_attn.k_proj.weight -> blk.11.attn_k.weight | F16 | [4096, 4096]\n","model.layers.11.self_attn.o_proj.weight -> blk.11.attn_output.weight | F16 | [4096, 4096]\n","model.layers.11.self_attn.q_proj.weight -> blk.11.attn_q.weight | F16 | [4096, 4096]\n","model.layers.11.self_attn.v_proj.weight -> blk.11.attn_v.weight | F16 | [4096, 4096]\n","model.layers.2.input_layernorm.weight -> blk.2.attn_norm.weight | F16 | [4096]\n","model.layers.2.mlp.down_proj.weight -> blk.2.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.2.mlp.gate_proj.weight -> blk.2.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.2.mlp.up_proj.weight -> blk.2.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.2.post_attention_layernorm.weight -> blk.2.ffn_norm.weight | F16 | [4096]\n","model.layers.2.self_attn.k_proj.weight -> blk.2.attn_k.weight | F16 | [4096, 4096]\n","model.layers.2.self_attn.o_proj.weight -> blk.2.attn_output.weight | F16 | [4096, 4096]\n","model.layers.2.self_attn.q_proj.weight -> blk.2.attn_q.weight | F16 | [4096, 4096]\n","model.layers.2.self_attn.v_proj.weight -> blk.2.attn_v.weight | F16 | [4096, 4096]\n","model.layers.3.input_layernorm.weight -> blk.3.attn_norm.weight | F16 | [4096]\n","model.layers.3.mlp.down_proj.weight -> blk.3.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.3.mlp.gate_proj.weight -> blk.3.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.3.mlp.up_proj.weight -> blk.3.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.3.post_attention_layernorm.weight -> blk.3.ffn_norm.weight | F16 | [4096]\n","model.layers.3.self_attn.k_proj.weight -> blk.3.attn_k.weight | F16 | [4096, 4096]\n","model.layers.3.self_attn.o_proj.weight -> blk.3.attn_output.weight | F16 | [4096, 4096]\n","model.layers.3.self_attn.q_proj.weight -> blk.3.attn_q.weight | F16 | [4096, 4096]\n","model.layers.3.self_attn.v_proj.weight -> blk.3.attn_v.weight | F16 | [4096, 4096]\n","model.layers.4.input_layernorm.weight -> blk.4.attn_norm.weight | F16 | [4096]\n","model.layers.4.mlp.down_proj.weight -> blk.4.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.4.mlp.gate_proj.weight -> blk.4.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.4.mlp.up_proj.weight -> blk.4.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.4.post_attention_layernorm.weight -> blk.4.ffn_norm.weight | F16 | [4096]\n","model.layers.4.self_attn.k_proj.weight -> blk.4.attn_k.weight | F16 | [4096, 4096]\n","model.layers.4.self_attn.o_proj.weight -> blk.4.attn_output.weight | F16 | [4096, 4096]\n","model.layers.4.self_attn.q_proj.weight -> blk.4.attn_q.weight | F16 | [4096, 4096]\n","model.layers.4.self_attn.v_proj.weight -> blk.4.attn_v.weight | F16 | [4096, 4096]\n","model.layers.5.input_layernorm.weight -> blk.5.attn_norm.weight | F16 | [4096]\n","model.layers.5.mlp.down_proj.weight -> blk.5.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.5.mlp.gate_proj.weight -> blk.5.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.5.mlp.up_proj.weight -> blk.5.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.5.post_attention_layernorm.weight -> blk.5.ffn_norm.weight | F16 | [4096]\n","model.layers.5.self_attn.k_proj.weight -> blk.5.attn_k.weight | F16 | [4096, 4096]\n","model.layers.5.self_attn.o_proj.weight -> blk.5.attn_output.weight | F16 | [4096, 4096]\n","model.layers.5.self_attn.q_proj.weight -> blk.5.attn_q.weight | F16 | [4096, 4096]\n","model.layers.5.self_attn.v_proj.weight -> blk.5.attn_v.weight | F16 | [4096, 4096]\n","model.layers.6.input_layernorm.weight -> blk.6.attn_norm.weight | F16 | [4096]\n","model.layers.6.mlp.down_proj.weight -> blk.6.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.6.mlp.gate_proj.weight -> blk.6.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.6.mlp.up_proj.weight -> blk.6.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.6.post_attention_layernorm.weight -> blk.6.ffn_norm.weight | F16 | [4096]\n","model.layers.6.self_attn.k_proj.weight -> blk.6.attn_k.weight | F16 | [4096, 4096]\n","model.layers.6.self_attn.o_proj.weight -> blk.6.attn_output.weight | F16 | [4096, 4096]\n","model.layers.6.self_attn.q_proj.weight -> blk.6.attn_q.weight | F16 | [4096, 4096]\n","model.layers.6.self_attn.v_proj.weight -> blk.6.attn_v.weight | F16 | [4096, 4096]\n","model.layers.7.input_layernorm.weight -> blk.7.attn_norm.weight | F16 | [4096]\n","model.layers.7.mlp.down_proj.weight -> blk.7.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.7.mlp.gate_proj.weight -> blk.7.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.7.mlp.up_proj.weight -> blk.7.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.7.post_attention_layernorm.weight -> blk.7.ffn_norm.weight | F16 | [4096]\n","model.layers.7.self_attn.k_proj.weight -> blk.7.attn_k.weight | F16 | [4096, 4096]\n","model.layers.7.self_attn.o_proj.weight -> blk.7.attn_output.weight | F16 | [4096, 4096]\n","model.layers.7.self_attn.q_proj.weight -> blk.7.attn_q.weight | F16 | [4096, 4096]\n","model.layers.7.self_attn.v_proj.weight -> blk.7.attn_v.weight | F16 | [4096, 4096]\n","model.layers.8.input_layernorm.weight -> blk.8.attn_norm.weight | F16 | [4096]\n","model.layers.8.mlp.down_proj.weight -> blk.8.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.8.mlp.gate_proj.weight -> blk.8.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.8.mlp.up_proj.weight -> blk.8.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.8.post_attention_layernorm.weight -> blk.8.ffn_norm.weight | F16 | [4096]\n","model.layers.8.self_attn.k_proj.weight -> blk.8.attn_k.weight | F16 | [4096, 4096]\n","model.layers.8.self_attn.o_proj.weight -> blk.8.attn_output.weight | F16 | [4096, 4096]\n","model.layers.8.self_attn.q_proj.weight -> blk.8.attn_q.weight | F16 | [4096, 4096]\n","model.layers.8.self_attn.v_proj.weight -> blk.8.attn_v.weight | F16 | [4096, 4096]\n","model.layers.9.input_layernorm.weight -> blk.9.attn_norm.weight | F16 | [4096]\n","model.layers.9.mlp.down_proj.weight -> blk.9.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.9.mlp.gate_proj.weight -> blk.9.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.9.mlp.up_proj.weight -> blk.9.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.9.post_attention_layernorm.weight -> blk.9.ffn_norm.weight | F16 | [4096]\n","model.layers.9.self_attn.k_proj.weight -> blk.9.attn_k.weight | F16 | [4096, 4096]\n","model.layers.9.self_attn.o_proj.weight -> blk.9.attn_output.weight | F16 | [4096, 4096]\n","model.layers.9.self_attn.q_proj.weight -> blk.9.attn_q.weight | F16 | [4096, 4096]\n","model.layers.9.self_attn.v_proj.weight -> blk.9.attn_v.weight | F16 | [4096, 4096]\n","model.layers.11.input_layernorm.weight -> blk.11.attn_norm.weight | F16 | [4096]\n","model.layers.11.mlp.down_proj.weight -> blk.11.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.11.mlp.up_proj.weight -> blk.11.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.11.post_attention_layernorm.weight -> blk.11.ffn_norm.weight | F16 | [4096]\n","model.layers.12.input_layernorm.weight -> blk.12.attn_norm.weight | F16 | [4096]\n","model.layers.12.mlp.down_proj.weight -> blk.12.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.12.mlp.gate_proj.weight -> blk.12.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.12.mlp.up_proj.weight -> blk.12.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.12.post_attention_layernorm.weight -> blk.12.ffn_norm.weight | F16 | [4096]\n","model.layers.12.self_attn.k_proj.weight -> blk.12.attn_k.weight | F16 | [4096, 4096]\n","model.layers.12.self_attn.o_proj.weight -> blk.12.attn_output.weight | F16 | [4096, 4096]\n","model.layers.12.self_attn.q_proj.weight -> blk.12.attn_q.weight | F16 | [4096, 4096]\n","model.layers.12.self_attn.v_proj.weight -> blk.12.attn_v.weight | F16 | [4096, 4096]\n","model.layers.13.input_layernorm.weight -> blk.13.attn_norm.weight | F16 | [4096]\n","model.layers.13.mlp.down_proj.weight -> blk.13.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.13.mlp.gate_proj.weight -> blk.13.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.13.mlp.up_proj.weight -> blk.13.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.13.post_attention_layernorm.weight -> blk.13.ffn_norm.weight | F16 | [4096]\n","model.layers.13.self_attn.k_proj.weight -> blk.13.attn_k.weight | F16 | [4096, 4096]\n","model.layers.13.self_attn.o_proj.weight -> blk.13.attn_output.weight | F16 | [4096, 4096]\n","model.layers.13.self_attn.q_proj.weight -> blk.13.attn_q.weight | F16 | [4096, 4096]\n","model.layers.13.self_attn.v_proj.weight -> blk.13.attn_v.weight | F16 | [4096, 4096]\n","model.layers.14.input_layernorm.weight -> blk.14.attn_norm.weight | F16 | [4096]\n","model.layers.14.mlp.down_proj.weight -> blk.14.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.14.mlp.gate_proj.weight -> blk.14.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.14.mlp.up_proj.weight -> blk.14.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.14.post_attention_layernorm.weight -> blk.14.ffn_norm.weight | F16 | [4096]\n","model.layers.14.self_attn.k_proj.weight -> blk.14.attn_k.weight | F16 | [4096, 4096]\n","model.layers.14.self_attn.o_proj.weight -> blk.14.attn_output.weight | F16 | [4096, 4096]\n","model.layers.14.self_attn.q_proj.weight -> blk.14.attn_q.weight | F16 | [4096, 4096]\n","model.layers.14.self_attn.v_proj.weight -> blk.14.attn_v.weight | F16 | [4096, 4096]\n","model.layers.15.input_layernorm.weight -> blk.15.attn_norm.weight | F16 | [4096]\n","model.layers.15.mlp.down_proj.weight -> blk.15.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.15.mlp.gate_proj.weight -> blk.15.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.15.mlp.up_proj.weight -> blk.15.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.15.post_attention_layernorm.weight -> blk.15.ffn_norm.weight | F16 | [4096]\n","model.layers.15.self_attn.k_proj.weight -> blk.15.attn_k.weight | F16 | [4096, 4096]\n","model.layers.15.self_attn.o_proj.weight -> blk.15.attn_output.weight | F16 | [4096, 4096]\n","model.layers.15.self_attn.q_proj.weight -> blk.15.attn_q.weight | F16 | [4096, 4096]\n","model.layers.15.self_attn.v_proj.weight -> blk.15.attn_v.weight | F16 | [4096, 4096]\n","model.layers.16.input_layernorm.weight -> blk.16.attn_norm.weight | F16 | [4096]\n","model.layers.16.mlp.down_proj.weight -> blk.16.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.16.mlp.gate_proj.weight -> blk.16.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.16.mlp.up_proj.weight -> blk.16.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.16.post_attention_layernorm.weight -> blk.16.ffn_norm.weight | F16 | [4096]\n","model.layers.16.self_attn.k_proj.weight -> blk.16.attn_k.weight | F16 | [4096, 4096]\n","model.layers.16.self_attn.o_proj.weight -> blk.16.attn_output.weight | F16 | [4096, 4096]\n","model.layers.16.self_attn.q_proj.weight -> blk.16.attn_q.weight | F16 | [4096, 4096]\n","model.layers.16.self_attn.v_proj.weight -> blk.16.attn_v.weight | F16 | [4096, 4096]\n","model.layers.17.input_layernorm.weight -> blk.17.attn_norm.weight | F16 | [4096]\n","model.layers.17.mlp.down_proj.weight -> blk.17.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.17.mlp.gate_proj.weight -> blk.17.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.17.mlp.up_proj.weight -> blk.17.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.17.post_attention_layernorm.weight -> blk.17.ffn_norm.weight | F16 | [4096]\n","model.layers.17.self_attn.k_proj.weight -> blk.17.attn_k.weight | F16 | [4096, 4096]\n","model.layers.17.self_attn.o_proj.weight -> blk.17.attn_output.weight | F16 | [4096, 4096]\n","model.layers.17.self_attn.q_proj.weight -> blk.17.attn_q.weight | F16 | [4096, 4096]\n","model.layers.17.self_attn.v_proj.weight -> blk.17.attn_v.weight | F16 | [4096, 4096]\n","model.layers.18.input_layernorm.weight -> blk.18.attn_norm.weight | F16 | [4096]\n","model.layers.18.mlp.down_proj.weight -> blk.18.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.18.mlp.gate_proj.weight -> blk.18.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.18.mlp.up_proj.weight -> blk.18.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.18.post_attention_layernorm.weight -> blk.18.ffn_norm.weight | F16 | [4096]\n","model.layers.18.self_attn.k_proj.weight -> blk.18.attn_k.weight | F16 | [4096, 4096]\n","model.layers.18.self_attn.o_proj.weight -> blk.18.attn_output.weight | F16 | [4096, 4096]\n","model.layers.18.self_attn.q_proj.weight -> blk.18.attn_q.weight | F16 | [4096, 4096]\n","model.layers.18.self_attn.v_proj.weight -> blk.18.attn_v.weight | F16 | [4096, 4096]\n","model.layers.19.input_layernorm.weight -> blk.19.attn_norm.weight | F16 | [4096]\n","model.layers.19.mlp.down_proj.weight -> blk.19.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.19.mlp.gate_proj.weight -> blk.19.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.19.mlp.up_proj.weight -> blk.19.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.19.post_attention_layernorm.weight -> blk.19.ffn_norm.weight | F16 | [4096]\n","model.layers.19.self_attn.k_proj.weight -> blk.19.attn_k.weight | F16 | [4096, 4096]\n","model.layers.19.self_attn.o_proj.weight -> blk.19.attn_output.weight | F16 | [4096, 4096]\n","model.layers.19.self_attn.q_proj.weight -> blk.19.attn_q.weight | F16 | [4096, 4096]\n","model.layers.19.self_attn.v_proj.weight -> blk.19.attn_v.weight | F16 | [4096, 4096]\n","model.layers.20.input_layernorm.weight -> blk.20.attn_norm.weight | F16 | [4096]\n","model.layers.20.mlp.down_proj.weight -> blk.20.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.20.mlp.gate_proj.weight -> blk.20.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.20.mlp.up_proj.weight -> blk.20.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.20.post_attention_layernorm.weight -> blk.20.ffn_norm.weight | F16 | [4096]\n","model.layers.20.self_attn.k_proj.weight -> blk.20.attn_k.weight | F16 | [4096, 4096]\n","model.layers.20.self_attn.o_proj.weight -> blk.20.attn_output.weight | F16 | [4096, 4096]\n","model.layers.20.self_attn.q_proj.weight -> blk.20.attn_q.weight | F16 | [4096, 4096]\n","model.layers.20.self_attn.v_proj.weight -> blk.20.attn_v.weight | F16 | [4096, 4096]\n","model.layers.21.input_layernorm.weight -> blk.21.attn_norm.weight | F16 | [4096]\n","model.layers.21.mlp.down_proj.weight -> blk.21.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.21.mlp.gate_proj.weight -> blk.21.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.21.mlp.up_proj.weight -> blk.21.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.21.post_attention_layernorm.weight -> blk.21.ffn_norm.weight | F16 | [4096]\n","model.layers.21.self_attn.k_proj.weight -> blk.21.attn_k.weight | F16 | [4096, 4096]\n","model.layers.21.self_attn.o_proj.weight -> blk.21.attn_output.weight | F16 | [4096, 4096]\n","model.layers.21.self_attn.q_proj.weight -> blk.21.attn_q.weight | F16 | [4096, 4096]\n","model.layers.21.self_attn.v_proj.weight -> blk.21.attn_v.weight | F16 | [4096, 4096]\n","model.layers.22.input_layernorm.weight -> blk.22.attn_norm.weight | F16 | [4096]\n","model.layers.22.mlp.down_proj.weight -> blk.22.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.22.mlp.gate_proj.weight -> blk.22.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.22.mlp.up_proj.weight -> blk.22.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.22.post_attention_layernorm.weight -> blk.22.ffn_norm.weight | F16 | [4096]\n","model.layers.22.self_attn.k_proj.weight -> blk.22.attn_k.weight | F16 | [4096, 4096]\n","model.layers.22.self_attn.o_proj.weight -> blk.22.attn_output.weight | F16 | [4096, 4096]\n","model.layers.22.self_attn.q_proj.weight -> blk.22.attn_q.weight | F16 | [4096, 4096]\n","model.layers.22.self_attn.v_proj.weight -> blk.22.attn_v.weight | F16 | [4096, 4096]\n","model.layers.23.mlp.gate_proj.weight -> blk.23.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.23.mlp.up_proj.weight -> blk.23.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.23.self_attn.k_proj.weight -> blk.23.attn_k.weight | F16 | [4096, 4096]\n","model.layers.23.self_attn.o_proj.weight -> blk.23.attn_output.weight | F16 | [4096, 4096]\n","model.layers.23.self_attn.q_proj.weight -> blk.23.attn_q.weight | F16 | [4096, 4096]\n","model.layers.23.self_attn.v_proj.weight -> blk.23.attn_v.weight | F16 | [4096, 4096]\n","lm_head.weight -> output.weight | F16 | [32000, 4096]\n","model.layers.23.input_layernorm.weight -> blk.23.attn_norm.weight | F16 | [4096]\n","model.layers.23.mlp.down_proj.weight -> blk.23.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.23.post_attention_layernorm.weight -> blk.23.ffn_norm.weight | F16 | [4096]\n","model.layers.24.input_layernorm.weight -> blk.24.attn_norm.weight | F16 | [4096]\n","model.layers.24.mlp.down_proj.weight -> blk.24.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.24.mlp.gate_proj.weight -> blk.24.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.24.mlp.up_proj.weight -> blk.24.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.24.post_attention_layernorm.weight -> blk.24.ffn_norm.weight | F16 | [4096]\n","model.layers.24.self_attn.k_proj.weight -> blk.24.attn_k.weight | F16 | [4096, 4096]\n","model.layers.24.self_attn.o_proj.weight -> blk.24.attn_output.weight | F16 | [4096, 4096]\n","model.layers.24.self_attn.q_proj.weight -> blk.24.attn_q.weight | F16 | [4096, 4096]\n","model.layers.24.self_attn.v_proj.weight -> blk.24.attn_v.weight | F16 | [4096, 4096]\n","model.layers.25.input_layernorm.weight -> blk.25.attn_norm.weight | F16 | [4096]\n","model.layers.25.mlp.down_proj.weight -> blk.25.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.25.mlp.gate_proj.weight -> blk.25.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.25.mlp.up_proj.weight -> blk.25.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.25.post_attention_layernorm.weight -> blk.25.ffn_norm.weight | F16 | [4096]\n","model.layers.25.self_attn.k_proj.weight -> blk.25.attn_k.weight | F16 | [4096, 4096]\n","model.layers.25.self_attn.o_proj.weight -> blk.25.attn_output.weight | F16 | [4096, 4096]\n","model.layers.25.self_attn.q_proj.weight -> blk.25.attn_q.weight | F16 | [4096, 4096]\n","model.layers.25.self_attn.v_proj.weight -> blk.25.attn_v.weight | F16 | [4096, 4096]\n","model.layers.26.input_layernorm.weight -> blk.26.attn_norm.weight | F16 | [4096]\n","model.layers.26.mlp.down_proj.weight -> blk.26.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.26.mlp.gate_proj.weight -> blk.26.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.26.mlp.up_proj.weight -> blk.26.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.26.post_attention_layernorm.weight -> blk.26.ffn_norm.weight | F16 | [4096]\n","model.layers.26.self_attn.k_proj.weight -> blk.26.attn_k.weight | F16 | [4096, 4096]\n","model.layers.26.self_attn.o_proj.weight -> blk.26.attn_output.weight | F16 | [4096, 4096]\n","model.layers.26.self_attn.q_proj.weight -> blk.26.attn_q.weight | F16 | [4096, 4096]\n","model.layers.26.self_attn.v_proj.weight -> blk.26.attn_v.weight | F16 | [4096, 4096]\n","model.layers.27.input_layernorm.weight -> blk.27.attn_norm.weight | F16 | [4096]\n","model.layers.27.mlp.down_proj.weight -> blk.27.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.27.mlp.gate_proj.weight -> blk.27.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.27.mlp.up_proj.weight -> blk.27.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.27.post_attention_layernorm.weight -> blk.27.ffn_norm.weight | F16 | [4096]\n","model.layers.27.self_attn.k_proj.weight -> blk.27.attn_k.weight | F16 | [4096, 4096]\n","model.layers.27.self_attn.o_proj.weight -> blk.27.attn_output.weight | F16 | [4096, 4096]\n","model.layers.27.self_attn.q_proj.weight -> blk.27.attn_q.weight | F16 | [4096, 4096]\n","model.layers.27.self_attn.v_proj.weight -> blk.27.attn_v.weight | F16 | [4096, 4096]\n","model.layers.28.input_layernorm.weight -> blk.28.attn_norm.weight | F16 | [4096]\n","model.layers.28.mlp.down_proj.weight -> blk.28.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.28.mlp.gate_proj.weight -> blk.28.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.28.mlp.up_proj.weight -> blk.28.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.28.post_attention_layernorm.weight -> blk.28.ffn_norm.weight | F16 | [4096]\n","model.layers.28.self_attn.k_proj.weight -> blk.28.attn_k.weight | F16 | [4096, 4096]\n","model.layers.28.self_attn.o_proj.weight -> blk.28.attn_output.weight | F16 | [4096, 4096]\n","model.layers.28.self_attn.q_proj.weight -> blk.28.attn_q.weight | F16 | [4096, 4096]\n","model.layers.28.self_attn.v_proj.weight -> blk.28.attn_v.weight | F16 | [4096, 4096]\n","model.layers.29.input_layernorm.weight -> blk.29.attn_norm.weight | F16 | [4096]\n","model.layers.29.mlp.down_proj.weight -> blk.29.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.29.mlp.gate_proj.weight -> blk.29.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.29.mlp.up_proj.weight -> blk.29.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.29.post_attention_layernorm.weight -> blk.29.ffn_norm.weight | F16 | [4096]\n","model.layers.29.self_attn.k_proj.weight -> blk.29.attn_k.weight | F16 | [4096, 4096]\n","model.layers.29.self_attn.o_proj.weight -> blk.29.attn_output.weight | F16 | [4096, 4096]\n","model.layers.29.self_attn.q_proj.weight -> blk.29.attn_q.weight | F16 | [4096, 4096]\n","model.layers.29.self_attn.v_proj.weight -> blk.29.attn_v.weight | F16 | [4096, 4096]\n","model.layers.30.input_layernorm.weight -> blk.30.attn_norm.weight | F16 | [4096]\n","model.layers.30.mlp.down_proj.weight -> blk.30.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.30.mlp.gate_proj.weight -> blk.30.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.30.mlp.up_proj.weight -> blk.30.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.30.post_attention_layernorm.weight -> blk.30.ffn_norm.weight | F16 | [4096]\n","model.layers.30.self_attn.k_proj.weight -> blk.30.attn_k.weight | F16 | [4096, 4096]\n","model.layers.30.self_attn.o_proj.weight -> blk.30.attn_output.weight | F16 | [4096, 4096]\n","model.layers.30.self_attn.q_proj.weight -> blk.30.attn_q.weight | F16 | [4096, 4096]\n","model.layers.30.self_attn.v_proj.weight -> blk.30.attn_v.weight | F16 | [4096, 4096]\n","model.layers.31.input_layernorm.weight -> blk.31.attn_norm.weight | F16 | [4096]\n","model.layers.31.mlp.down_proj.weight -> blk.31.ffn_down.weight | F16 | [4096, 11008]\n","model.layers.31.mlp.gate_proj.weight -> blk.31.ffn_gate.weight | F16 | [11008, 4096]\n","model.layers.31.mlp.up_proj.weight -> blk.31.ffn_up.weight | F16 | [11008, 4096]\n","model.layers.31.post_attention_layernorm.weight -> blk.31.ffn_norm.weight | F16 | [4096]\n","model.layers.31.self_attn.k_proj.weight -> blk.31.attn_k.weight | F16 | [4096, 4096]\n","model.layers.31.self_attn.o_proj.weight -> blk.31.attn_output.weight | F16 | [4096, 4096]\n","model.layers.31.self_attn.q_proj.weight -> blk.31.attn_q.weight | F16 | [4096, 4096]\n","model.layers.31.self_attn.v_proj.weight -> blk.31.attn_v.weight | F16 | [4096, 4096]\n","model.norm.weight -> output_norm.weight | F16 | [4096]\n","Writing llama-2-7b-combined_datasets/llama-2-7b-combined_datasets.fp16.bin, format 1\n","Ignoring added_tokens.json since model matches vocab size without it.\n","gguf: This GGUF file is for Little Endian only\n","gguf: Setting special token type bos to 1\n","gguf: Setting special token type eos to 2\n","gguf: Setting special token type unk to 0\n","gguf: Setting special token type pad to 2\n","gguf: Setting add_bos_token to True\n","gguf: Setting add_eos_token to False\n","gguf: Setting chat_template to {% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}\n","[ 1/291] Writing tensor token_embd.weight | size 32000 x 4096 | type F16 | T+ 0\n","[ 2/291] Writing tensor blk.0.attn_norm.weight | size 4096 | type F32 | T+ 0\n","[ 3/291] Writing tensor blk.0.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 0\n","[ 4/291] Writing tensor blk.0.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 0\n","[ 5/291] Writing tensor blk.0.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 0\n","[ 6/291] Writing tensor blk.0.ffn_norm.weight | size 4096 | type F32 | T+ 0\n","[ 7/291] Writing tensor blk.0.attn_k.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 8/291] Writing tensor blk.0.attn_output.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 9/291] Writing tensor blk.0.attn_q.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 10/291] Writing tensor blk.0.attn_v.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 11/291] Writing tensor blk.1.attn_norm.weight | size 4096 | type F32 | T+ 0\n","[ 12/291] Writing tensor blk.1.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 0\n","[ 13/291] Writing tensor blk.1.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 0\n","[ 14/291] Writing tensor blk.1.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 0\n","[ 15/291] Writing tensor blk.1.ffn_norm.weight | size 4096 | type F32 | T+ 0\n","[ 16/291] Writing tensor blk.1.attn_k.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 17/291] Writing tensor blk.1.attn_output.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 18/291] Writing tensor blk.1.attn_q.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 19/291] Writing tensor blk.1.attn_v.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 20/291] Writing tensor blk.10.attn_norm.weight | size 4096 | type F32 | T+ 0\n","[ 21/291] Writing tensor blk.10.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 0\n","[ 22/291] Writing tensor blk.10.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 0\n","[ 23/291] Writing tensor blk.10.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 0\n","[ 24/291] Writing tensor blk.10.ffn_norm.weight | size 4096 | type F32 | T+ 0\n","[ 25/291] Writing tensor blk.10.attn_k.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 26/291] Writing tensor blk.10.attn_output.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 27/291] Writing tensor blk.10.attn_q.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 28/291] Writing tensor blk.10.attn_v.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 29/291] Writing tensor blk.11.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 0\n","[ 30/291] Writing tensor blk.11.attn_k.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 31/291] Writing tensor blk.11.attn_output.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 32/291] Writing tensor blk.11.attn_q.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 33/291] Writing tensor blk.11.attn_v.weight | size 4096 x 4096 | type F16 | T+ 0\n","[ 34/291] Writing tensor blk.2.attn_norm.weight | size 4096 | type F32 | T+ 0\n","[ 35/291] Writing tensor blk.2.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 0\n","[ 36/291] Writing tensor blk.2.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 0\n","[ 37/291] Writing tensor blk.2.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 1\n","[ 38/291] Writing tensor blk.2.ffn_norm.weight | size 4096 | type F32 | T+ 1\n","[ 39/291] Writing tensor blk.2.attn_k.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 40/291] Writing tensor blk.2.attn_output.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 41/291] Writing tensor blk.2.attn_q.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 42/291] Writing tensor blk.2.attn_v.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 43/291] Writing tensor blk.3.attn_norm.weight | size 4096 | type F32 | T+ 1\n","[ 44/291] Writing tensor blk.3.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 1\n","[ 45/291] Writing tensor blk.3.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 1\n","[ 46/291] Writing tensor blk.3.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 1\n","[ 47/291] Writing tensor blk.3.ffn_norm.weight | size 4096 | type F32 | T+ 1\n","[ 48/291] Writing tensor blk.3.attn_k.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 49/291] Writing tensor blk.3.attn_output.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 50/291] Writing tensor blk.3.attn_q.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 51/291] Writing tensor blk.3.attn_v.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 52/291] Writing tensor blk.4.attn_norm.weight | size 4096 | type F32 | T+ 1\n","[ 53/291] Writing tensor blk.4.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 1\n","[ 54/291] Writing tensor blk.4.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 1\n","[ 55/291] Writing tensor blk.4.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 1\n","[ 56/291] Writing tensor blk.4.ffn_norm.weight | size 4096 | type F32 | T+ 1\n","[ 57/291] Writing tensor blk.4.attn_k.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 58/291] Writing tensor blk.4.attn_output.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 59/291] Writing tensor blk.4.attn_q.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 60/291] Writing tensor blk.4.attn_v.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 61/291] Writing tensor blk.5.attn_norm.weight | size 4096 | type F32 | T+ 1\n","[ 62/291] Writing tensor blk.5.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 1\n","[ 63/291] Writing tensor blk.5.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 1\n","[ 64/291] Writing tensor blk.5.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 1\n","[ 65/291] Writing tensor blk.5.ffn_norm.weight | size 4096 | type F32 | T+ 1\n","[ 66/291] Writing tensor blk.5.attn_k.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 67/291] Writing tensor blk.5.attn_output.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 68/291] Writing tensor blk.5.attn_q.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 69/291] Writing tensor blk.5.attn_v.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 70/291] Writing tensor blk.6.attn_norm.weight | size 4096 | type F32 | T+ 1\n","[ 71/291] Writing tensor blk.6.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 1\n","[ 72/291] Writing tensor blk.6.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 1\n","[ 73/291] Writing tensor blk.6.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 1\n","[ 74/291] Writing tensor blk.6.ffn_norm.weight | size 4096 | type F32 | T+ 1\n","[ 75/291] Writing tensor blk.6.attn_k.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 76/291] Writing tensor blk.6.attn_output.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 77/291] Writing tensor blk.6.attn_q.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 78/291] Writing tensor blk.6.attn_v.weight | size 4096 x 4096 | type F16 | T+ 1\n","[ 79/291] Writing tensor blk.7.attn_norm.weight | size 4096 | type F32 | T+ 1\n","[ 80/291] Writing tensor blk.7.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 1\n","[ 81/291] Writing tensor blk.7.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 2\n","[ 82/291] Writing tensor blk.7.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 2\n","[ 83/291] Writing tensor blk.7.ffn_norm.weight | size 4096 | type F32 | T+ 2\n","[ 84/291] Writing tensor blk.7.attn_k.weight | size 4096 x 4096 | type F16 | T+ 2\n","[ 85/291] Writing tensor blk.7.attn_output.weight | size 4096 x 4096 | type F16 | T+ 2\n","[ 86/291] Writing tensor blk.7.attn_q.weight | size 4096 x 4096 | type F16 | T+ 2\n","[ 87/291] Writing tensor blk.7.attn_v.weight | size 4096 x 4096 | type F16 | T+ 2\n","[ 88/291] Writing tensor blk.8.attn_norm.weight | size 4096 | type F32 | T+ 2\n","[ 89/291] Writing tensor blk.8.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 2\n","[ 90/291] Writing tensor blk.8.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 2\n","[ 91/291] Writing tensor blk.8.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 2\n","[ 92/291] Writing tensor blk.8.ffn_norm.weight | size 4096 | type F32 | T+ 2\n","[ 93/291] Writing tensor blk.8.attn_k.weight | size 4096 x 4096 | type F16 | T+ 2\n","[ 94/291] Writing tensor blk.8.attn_output.weight | size 4096 x 4096 | type F16 | T+ 2\n","[ 95/291] Writing tensor blk.8.attn_q.weight | size 4096 x 4096 | type F16 | T+ 2\n","[ 96/291] Writing tensor blk.8.attn_v.weight | size 4096 x 4096 | type F16 | T+ 2\n","[ 97/291] Writing tensor blk.9.attn_norm.weight | size 4096 | type F32 | T+ 2\n","[ 98/291] Writing tensor blk.9.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 2\n","[ 99/291] Writing tensor blk.9.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 2\n","[100/291] Writing tensor blk.9.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 2\n","[101/291] Writing tensor blk.9.ffn_norm.weight | size 4096 | type F32 | T+ 2\n","[102/291] Writing tensor blk.9.attn_k.weight | size 4096 x 4096 | type F16 | T+ 2\n","[103/291] Writing tensor blk.9.attn_output.weight | size 4096 x 4096 | type F16 | T+ 2\n","[104/291] Writing tensor blk.9.attn_q.weight | size 4096 x 4096 | type F16 | T+ 2\n","[105/291] Writing tensor blk.9.attn_v.weight | size 4096 x 4096 | type F16 | T+ 2\n","[106/291] Writing tensor blk.11.attn_norm.weight | size 4096 | type F32 | T+ 2\n","[107/291] Writing tensor blk.11.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 2\n","[108/291] Writing tensor blk.11.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 2\n","[109/291] Writing tensor blk.11.ffn_norm.weight | size 4096 | type F32 | T+ 2\n","[110/291] Writing tensor blk.12.attn_norm.weight | size 4096 | type F32 | T+ 2\n","[111/291] Writing tensor blk.12.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 2\n","[112/291] Writing tensor blk.12.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 2\n","[113/291] Writing tensor blk.12.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 2\n","[114/291] Writing tensor blk.12.ffn_norm.weight | size 4096 | type F32 | T+ 2\n","[115/291] Writing tensor blk.12.attn_k.weight | size 4096 x 4096 | type F16 | T+ 2\n","[116/291] Writing tensor blk.12.attn_output.weight | size 4096 x 4096 | type F16 | T+ 2\n","[117/291] Writing tensor blk.12.attn_q.weight | size 4096 x 4096 | type F16 | T+ 2\n","[118/291] Writing tensor blk.12.attn_v.weight | size 4096 x 4096 | type F16 | T+ 3\n","[119/291] Writing tensor blk.13.attn_norm.weight | size 4096 | type F32 | T+ 3\n","[120/291] Writing tensor blk.13.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 3\n","[121/291] Writing tensor blk.13.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 3\n","[122/291] Writing tensor blk.13.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 3\n","[123/291] Writing tensor blk.13.ffn_norm.weight | size 4096 | type F32 | T+ 3\n","[124/291] Writing tensor blk.13.attn_k.weight | size 4096 x 4096 | type F16 | T+ 3\n","[125/291] Writing tensor blk.13.attn_output.weight | size 4096 x 4096 | type F16 | T+ 3\n","[126/291] Writing tensor blk.13.attn_q.weight | size 4096 x 4096 | type F16 | T+ 3\n","[127/291] Writing tensor blk.13.attn_v.weight | size 4096 x 4096 | type F16 | T+ 3\n","[128/291] Writing tensor blk.14.attn_norm.weight | size 4096 | type F32 | T+ 3\n","[129/291] Writing tensor blk.14.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 3\n","[130/291] Writing tensor blk.14.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 3\n","[131/291] Writing tensor blk.14.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 3\n","[132/291] Writing tensor blk.14.ffn_norm.weight | size 4096 | type F32 | T+ 3\n","[133/291] Writing tensor blk.14.attn_k.weight | size 4096 x 4096 | type F16 | T+ 3\n","[134/291] Writing tensor blk.14.attn_output.weight | size 4096 x 4096 | type F16 | T+ 3\n","[135/291] Writing tensor blk.14.attn_q.weight | size 4096 x 4096 | type F16 | T+ 3\n","[136/291] Writing tensor blk.14.attn_v.weight | size 4096 x 4096 | type F16 | T+ 3\n","[137/291] Writing tensor blk.15.attn_norm.weight | size 4096 | type F32 | T+ 3\n","[138/291] Writing tensor blk.15.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 3\n","[139/291] Writing tensor blk.15.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 3\n","[140/291] Writing tensor blk.15.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 3\n","[141/291] Writing tensor blk.15.ffn_norm.weight | size 4096 | type F32 | T+ 3\n","[142/291] Writing tensor blk.15.attn_k.weight | size 4096 x 4096 | type F16 | T+ 3\n","[143/291] Writing tensor blk.15.attn_output.weight | size 4096 x 4096 | type F16 | T+ 3\n","[144/291] Writing tensor blk.15.attn_q.weight | size 4096 x 4096 | type F16 | T+ 3\n","[145/291] Writing tensor blk.15.attn_v.weight | size 4096 x 4096 | type F16 | T+ 3\n","[146/291] Writing tensor blk.16.attn_norm.weight | size 4096 | type F32 | T+ 3\n","[147/291] Writing tensor blk.16.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 3\n","[148/291] Writing tensor blk.16.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 3\n","[149/291] Writing tensor blk.16.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 3\n","[150/291] Writing tensor blk.16.ffn_norm.weight | size 4096 | type F32 | T+ 3\n","[151/291] Writing tensor blk.16.attn_k.weight | size 4096 x 4096 | type F16 | T+ 3\n","[152/291] Writing tensor blk.16.attn_output.weight | size 4096 x 4096 | type F16 | T+ 3\n","[153/291] Writing tensor blk.16.attn_q.weight | size 4096 x 4096 | type F16 | T+ 3\n","[154/291] Writing tensor blk.16.attn_v.weight | size 4096 x 4096 | type F16 | T+ 3\n","[155/291] Writing tensor blk.17.attn_norm.weight | size 4096 | type F32 | T+ 3\n","[156/291] Writing tensor blk.17.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 3\n","[157/291] Writing tensor blk.17.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 4\n","[158/291] Writing tensor blk.17.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 4\n","[159/291] Writing tensor blk.17.ffn_norm.weight | size 4096 | type F32 | T+ 4\n","[160/291] Writing tensor blk.17.attn_k.weight | size 4096 x 4096 | type F16 | T+ 4\n","[161/291] Writing tensor blk.17.attn_output.weight | size 4096 x 4096 | type F16 | T+ 4\n","[162/291] Writing tensor blk.17.attn_q.weight | size 4096 x 4096 | type F16 | T+ 4\n","[163/291] Writing tensor blk.17.attn_v.weight | size 4096 x 4096 | type F16 | T+ 4\n","[164/291] Writing tensor blk.18.attn_norm.weight | size 4096 | type F32 | T+ 4\n","[165/291] Writing tensor blk.18.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 4\n","[166/291] Writing tensor blk.18.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 4\n","[167/291] Writing tensor blk.18.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 4\n","[168/291] Writing tensor blk.18.ffn_norm.weight | size 4096 | type F32 | T+ 4\n","[169/291] Writing tensor blk.18.attn_k.weight | size 4096 x 4096 | type F16 | T+ 4\n","[170/291] Writing tensor blk.18.attn_output.weight | size 4096 x 4096 | type F16 | T+ 4\n","[171/291] Writing tensor blk.18.attn_q.weight | size 4096 x 4096 | type F16 | T+ 4\n","[172/291] Writing tensor blk.18.attn_v.weight | size 4096 x 4096 | type F16 | T+ 4\n","[173/291] Writing tensor blk.19.attn_norm.weight | size 4096 | type F32 | T+ 4\n","[174/291] Writing tensor blk.19.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 4\n","[175/291] Writing tensor blk.19.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 4\n","[176/291] Writing tensor blk.19.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 4\n","[177/291] Writing tensor blk.19.ffn_norm.weight | size 4096 | type F32 | T+ 4\n","[178/291] Writing tensor blk.19.attn_k.weight | size 4096 x 4096 | type F16 | T+ 4\n","[179/291] Writing tensor blk.19.attn_output.weight | size 4096 x 4096 | type F16 | T+ 4\n","[180/291] Writing tensor blk.19.attn_q.weight | size 4096 x 4096 | type F16 | T+ 4\n","[181/291] Writing tensor blk.19.attn_v.weight | size 4096 x 4096 | type F16 | T+ 4\n","[182/291] Writing tensor blk.20.attn_norm.weight | size 4096 | type F32 | T+ 4\n","[183/291] Writing tensor blk.20.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 4\n","[184/291] Writing tensor blk.20.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 4\n","[185/291] Writing tensor blk.20.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 4\n","[186/291] Writing tensor blk.20.ffn_norm.weight | size 4096 | type F32 | T+ 4\n","[187/291] Writing tensor blk.20.attn_k.weight | size 4096 x 4096 | type F16 | T+ 4\n","[188/291] Writing tensor blk.20.attn_output.weight | size 4096 x 4096 | type F16 | T+ 4\n","[189/291] Writing tensor blk.20.attn_q.weight | size 4096 x 4096 | type F16 | T+ 4\n","[190/291] Writing tensor blk.20.attn_v.weight | size 4096 x 4096 | type F16 | T+ 4\n","[191/291] Writing tensor blk.21.attn_norm.weight | size 4096 | type F32 | T+ 4\n","[192/291] Writing tensor blk.21.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 4\n","[193/291] Writing tensor blk.21.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 4\n","[194/291] Writing tensor blk.21.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 5\n","[195/291] Writing tensor blk.21.ffn_norm.weight | size 4096 | type F32 | T+ 5\n","[196/291] Writing tensor blk.21.attn_k.weight | size 4096 x 4096 | type F16 | T+ 5\n","[197/291] Writing tensor blk.21.attn_output.weight | size 4096 x 4096 | type F16 | T+ 5\n","[198/291] Writing tensor blk.21.attn_q.weight | size 4096 x 4096 | type F16 | T+ 5\n","[199/291] Writing tensor blk.21.attn_v.weight | size 4096 x 4096 | type F16 | T+ 5\n","[200/291] Writing tensor blk.22.attn_norm.weight | size 4096 | type F32 | T+ 5\n","[201/291] Writing tensor blk.22.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 5\n","[202/291] Writing tensor blk.22.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 5\n","[203/291] Writing tensor blk.22.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 5\n","[204/291] Writing tensor blk.22.ffn_norm.weight | size 4096 | type F32 | T+ 5\n","[205/291] Writing tensor blk.22.attn_k.weight | size 4096 x 4096 | type F16 | T+ 5\n","[206/291] Writing tensor blk.22.attn_output.weight | size 4096 x 4096 | type F16 | T+ 5\n","[207/291] Writing tensor blk.22.attn_q.weight | size 4096 x 4096 | type F16 | T+ 5\n","[208/291] Writing tensor blk.22.attn_v.weight | size 4096 x 4096 | type F16 | T+ 5\n","[209/291] Writing tensor blk.23.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 5\n","[210/291] Writing tensor blk.23.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 5\n","[211/291] Writing tensor blk.23.attn_k.weight | size 4096 x 4096 | type F16 | T+ 5\n","[212/291] Writing tensor blk.23.attn_output.weight | size 4096 x 4096 | type F16 | T+ 5\n","[213/291] Writing tensor blk.23.attn_q.weight | size 4096 x 4096 | type F16 | T+ 5\n","[214/291] Writing tensor blk.23.attn_v.weight | size 4096 x 4096 | type F16 | T+ 5\n","[215/291] Writing tensor output.weight | size 32000 x 4096 | type F16 | T+ 5\n","[216/291] Writing tensor blk.23.attn_norm.weight | size 4096 | type F32 | T+ 5\n","[217/291] Writing tensor blk.23.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 5\n","[218/291] Writing tensor blk.23.ffn_norm.weight | size 4096 | type F32 | T+ 5\n","[219/291] Writing tensor blk.24.attn_norm.weight | size 4096 | type F32 | T+ 5\n","[220/291] Writing tensor blk.24.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 5\n","[221/291] Writing tensor blk.24.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 5\n","[222/291] Writing tensor blk.24.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 5\n","[223/291] Writing tensor blk.24.ffn_norm.weight | size 4096 | type F32 | T+ 6\n","[224/291] Writing tensor blk.24.attn_k.weight | size 4096 x 4096 | type F16 | T+ 6\n","[225/291] Writing tensor blk.24.attn_output.weight | size 4096 x 4096 | type F16 | T+ 6\n","[226/291] Writing tensor blk.24.attn_q.weight | size 4096 x 4096 | type F16 | T+ 6\n","[227/291] Writing tensor blk.24.attn_v.weight | size 4096 x 4096 | type F16 | T+ 6\n","[228/291] Writing tensor blk.25.attn_norm.weight | size 4096 | type F32 | T+ 6\n","[229/291] Writing tensor blk.25.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 6\n","[230/291] Writing tensor blk.25.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 6\n","[231/291] Writing tensor blk.25.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 6\n","[232/291] Writing tensor blk.25.ffn_norm.weight | size 4096 | type F32 | T+ 6\n","[233/291] Writing tensor blk.25.attn_k.weight | size 4096 x 4096 | type F16 | T+ 6\n","[234/291] Writing tensor blk.25.attn_output.weight | size 4096 x 4096 | type F16 | T+ 6\n","[235/291] Writing tensor blk.25.attn_q.weight | size 4096 x 4096 | type F16 | T+ 6\n","[236/291] Writing tensor blk.25.attn_v.weight | size 4096 x 4096 | type F16 | T+ 6\n","[237/291] Writing tensor blk.26.attn_norm.weight | size 4096 | type F32 | T+ 6\n","[238/291] Writing tensor blk.26.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 6\n","[239/291] Writing tensor blk.26.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 6\n","[240/291] Writing tensor blk.26.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 6\n","[241/291] Writing tensor blk.26.ffn_norm.weight | size 4096 | type F32 | T+ 6\n","[242/291] Writing tensor blk.26.attn_k.weight | size 4096 x 4096 | type F16 | T+ 6\n","[243/291] Writing tensor blk.26.attn_output.weight | size 4096 x 4096 | type F16 | T+ 6\n","[244/291] Writing tensor blk.26.attn_q.weight | size 4096 x 4096 | type F16 | T+ 6\n","[245/291] Writing tensor blk.26.attn_v.weight | size 4096 x 4096 | type F16 | T+ 6\n","[246/291] Writing tensor blk.27.attn_norm.weight | size 4096 | type F32 | T+ 6\n","[247/291] Writing tensor blk.27.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 6\n","[248/291] Writing tensor blk.27.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 6\n","[249/291] Writing tensor blk.27.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 6\n","[250/291] Writing tensor blk.27.ffn_norm.weight | size 4096 | type F32 | T+ 6\n","[251/291] Writing tensor blk.27.attn_k.weight | size 4096 x 4096 | type F16 | T+ 6\n","[252/291] Writing tensor blk.27.attn_output.weight | size 4096 x 4096 | type F16 | T+ 6\n","[253/291] Writing tensor blk.27.attn_q.weight | size 4096 x 4096 | type F16 | T+ 6\n","[254/291] Writing tensor blk.27.attn_v.weight | size 4096 x 4096 | type F16 | T+ 6\n","[255/291] Writing tensor blk.28.attn_norm.weight | size 4096 | type F32 | T+ 6\n","[256/291] Writing tensor blk.28.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 6\n","[257/291] Writing tensor blk.28.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 6\n","[258/291] Writing tensor blk.28.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 6\n","[259/291] Writing tensor blk.28.ffn_norm.weight | size 4096 | type F32 | T+ 6\n","[260/291] Writing tensor blk.28.attn_k.weight | size 4096 x 4096 | type F16 | T+ 6\n","[261/291] Writing tensor blk.28.attn_output.weight | size 4096 x 4096 | type F16 | T+ 6\n","[262/291] Writing tensor blk.28.attn_q.weight | size 4096 x 4096 | type F16 | T+ 6\n","[263/291] Writing tensor blk.28.attn_v.weight | size 4096 x 4096 | type F16 | T+ 6\n","[264/291] Writing tensor blk.29.attn_norm.weight | size 4096 | type F32 | T+ 7\n","[265/291] Writing tensor blk.29.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 7\n","[266/291] Writing tensor blk.29.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 7\n","[267/291] Writing tensor blk.29.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 7\n","[268/291] Writing tensor blk.29.ffn_norm.weight | size 4096 | type F32 | T+ 7\n","[269/291] Writing tensor blk.29.attn_k.weight | size 4096 x 4096 | type F16 | T+ 7\n","[270/291] Writing tensor blk.29.attn_output.weight | size 4096 x 4096 | type F16 | T+ 7\n","[271/291] Writing tensor blk.29.attn_q.weight | size 4096 x 4096 | type F16 | T+ 7\n","[272/291] Writing tensor blk.29.attn_v.weight | size 4096 x 4096 | type F16 | T+ 7\n","[273/291] Writing tensor blk.30.attn_norm.weight | size 4096 | type F32 | T+ 7\n","[274/291] Writing tensor blk.30.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 7\n","[275/291] Writing tensor blk.30.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 7\n","[276/291] Writing tensor blk.30.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 7\n","[277/291] Writing tensor blk.30.ffn_norm.weight | size 4096 | type F32 | T+ 7\n","[278/291] Writing tensor blk.30.attn_k.weight | size 4096 x 4096 | type F16 | T+ 7\n","[279/291] Writing tensor blk.30.attn_output.weight | size 4096 x 4096 | type F16 | T+ 7\n","[280/291] Writing tensor blk.30.attn_q.weight | size 4096 x 4096 | type F16 | T+ 7\n","[281/291] Writing tensor blk.30.attn_v.weight | size 4096 x 4096 | type F16 | T+ 7\n","[282/291] Writing tensor blk.31.attn_norm.weight | size 4096 | type F32 | T+ 7\n","[283/291] Writing tensor blk.31.ffn_down.weight | size 4096 x 11008 | type F16 | T+ 7\n","[284/291] Writing tensor blk.31.ffn_gate.weight | size 11008 x 4096 | type F16 | T+ 7\n","[285/291] Writing tensor blk.31.ffn_up.weight | size 11008 x 4096 | type F16 | T+ 7\n","[286/291] Writing tensor blk.31.ffn_norm.weight | size 4096 | type F32 | T+ 7\n","[287/291] Writing tensor blk.31.attn_k.weight | size 4096 x 4096 | type F16 | T+ 7\n","[288/291] Writing tensor blk.31.attn_output.weight | size 4096 x 4096 | type F16 | T+ 7\n","[289/291] Writing tensor blk.31.attn_q.weight | size 4096 x 4096 | type F16 | T+ 7\n","[290/291] Writing tensor blk.31.attn_v.weight | size 4096 x 4096 | type F16 | T+ 7\n","[291/291] Writing tensor output_norm.weight | size 4096 | type F32 | T+ 7\n","Wrote llama-2-7b-combined_datasets/llama-2-7b-combined_datasets.fp16.bin\n"]}],"source":["# Convert to fp16\n","fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin\"\n","!python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":752625,"status":"ok","timestamp":1707925277694,"user":{"displayName":"szehanz","userId":"16137883221268059572"},"user_tz":-480},"id":"mw5y-GWdkbx6","outputId":"4b95a5a4-b44c-413b-ca05-621d0f935d0f"},"outputs":[{"name":"stdout","output_type":"stream","text":["FP16 file created successfully: llama-2-7b-combined_datasets/llama-2-7b-combined_datasets.fp16.bin\n","ggml_init_cublas: GGML_CUDA_FORCE_MMQ: no\n","ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes\n","ggml_init_cublas: found 1 CUDA devices:\n"," Device 0: Tesla T4, compute capability 7.5, VMM: yes\n","main: build = 2203 (9d679f0f)\n","main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n","main: quantizing 'llama-2-7b-combined_datasets/llama-2-7b-combined_datasets.fp16.bin' to 'llama-2-7b-combined_datasets/llama-2-7b-combined_datasets.Q5_K_M.gguf' as Q5_K_M\n","llama_model_loader: loaded meta data with 23 key-value pairs and 291 tensors from llama-2-7b-combined_datasets/llama-2-7b-combined_datasets.fp16.bin (version GGUF V3 (latest))\n","llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n","llama_model_loader: - kv 0: general.architecture str = llama\n","llama_model_loader: - kv 1: general.name str = LLaMA v2\n","llama_model_loader: - kv 2: llama.context_length u32 = 4096\n","llama_model_loader: - kv 3: llama.embedding_length u32 = 4096\n","llama_model_loader: - kv 4: llama.block_count u32 = 32\n","llama_model_loader: - kv 5: llama.feed_forward_length u32 = 11008\n","llama_model_loader: - kv 6: llama.rope.dimension_count u32 = 128\n","llama_model_loader: - kv 7: llama.attention.head_count u32 = 32\n","llama_model_loader: - kv 8: llama.attention.head_count_kv u32 = 32\n","llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 = 0.000010\n","llama_model_loader: - kv 10: llama.rope.freq_base f32 = 10000.000000\n","llama_model_loader: - kv 11: general.file_type u32 = 1\n","llama_model_loader: - kv 12: tokenizer.ggml.model str = llama\n","llama_model_loader: - kv 13: tokenizer.ggml.tokens arr[str,32000] = [\"\", \"\", \"\", \"<0x00>\", \"<...\n","llama_model_loader: - kv 14: tokenizer.ggml.scores arr[f32,32000] = [0.000000, 0.000000, 0.000000, 0.0000...\n","llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,32000] = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n","llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 = 1\n","llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 = 2\n","llama_model_loader: - kv 18: tokenizer.ggml.unknown_token_id u32 = 0\n","llama_model_loader: - kv 19: tokenizer.ggml.padding_token_id u32 = 2\n","llama_model_loader: - kv 20: tokenizer.ggml.add_bos_token bool = true\n","llama_model_loader: - kv 21: tokenizer.ggml.add_eos_token bool = false\n","llama_model_loader: - kv 22: tokenizer.chat_template str = {% if messages[0]['role'] == 'system'...\n","llama_model_loader: - type f32: 65 tensors\n","llama_model_loader: - type f16: 226 tensors\n","llama_model_quantize_internal: meta size = 742080 bytes\n","[ 1/ 291] token_embd.weight - [ 4096, 32000, 1, 1], type = f16, quantizing to q5_K .. size = 250.00 MiB -> 85.94 MiB\n","[ 2/ 291] blk.0.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 3/ 291] blk.0.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 4/ 291] blk.0.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 5/ 291] blk.0.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 6/ 291] blk.0.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 7/ 291] blk.0.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 8/ 291] blk.0.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 9/ 291] blk.0.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 10/ 291] blk.0.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 11/ 291] blk.1.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 12/ 291] blk.1.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 13/ 291] blk.1.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 14/ 291] blk.1.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 15/ 291] blk.1.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 16/ 291] blk.1.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 17/ 291] blk.1.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 18/ 291] blk.1.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 19/ 291] blk.1.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 20/ 291] blk.10.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 21/ 291] blk.10.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 22/ 291] blk.10.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 23/ 291] blk.10.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 24/ 291] blk.10.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 25/ 291] blk.10.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 26/ 291] blk.10.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 27/ 291] blk.10.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 28/ 291] blk.10.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 29/ 291] blk.11.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 30/ 291] blk.11.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 31/ 291] blk.11.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 32/ 291] blk.11.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 33/ 291] blk.11.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 34/ 291] blk.2.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 35/ 291] blk.2.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 36/ 291] blk.2.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 37/ 291] blk.2.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 38/ 291] blk.2.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 39/ 291] blk.2.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 40/ 291] blk.2.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 41/ 291] blk.2.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 42/ 291] blk.2.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 43/ 291] blk.3.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 44/ 291] blk.3.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 45/ 291] blk.3.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 46/ 291] blk.3.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 47/ 291] blk.3.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 48/ 291] blk.3.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 49/ 291] blk.3.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 50/ 291] blk.3.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 51/ 291] blk.3.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 52/ 291] blk.4.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 53/ 291] blk.4.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 54/ 291] blk.4.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 55/ 291] blk.4.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 56/ 291] blk.4.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 57/ 291] blk.4.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 58/ 291] blk.4.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 59/ 291] blk.4.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 60/ 291] blk.4.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 61/ 291] blk.5.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 62/ 291] blk.5.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 63/ 291] blk.5.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 64/ 291] blk.5.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 65/ 291] blk.5.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 66/ 291] blk.5.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 67/ 291] blk.5.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 68/ 291] blk.5.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 69/ 291] blk.5.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 70/ 291] blk.6.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 71/ 291] blk.6.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 72/ 291] blk.6.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 73/ 291] blk.6.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 74/ 291] blk.6.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 75/ 291] blk.6.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 76/ 291] blk.6.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 77/ 291] blk.6.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 78/ 291] blk.6.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 79/ 291] blk.7.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 80/ 291] blk.7.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 81/ 291] blk.7.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 82/ 291] blk.7.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 83/ 291] blk.7.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 84/ 291] blk.7.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 85/ 291] blk.7.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 86/ 291] blk.7.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 87/ 291] blk.7.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 88/ 291] blk.8.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 89/ 291] blk.8.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 90/ 291] blk.8.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 91/ 291] blk.8.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 92/ 291] blk.8.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 93/ 291] blk.8.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 94/ 291] blk.8.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 95/ 291] blk.8.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 96/ 291] blk.8.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 97/ 291] blk.9.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 98/ 291] blk.9.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 99/ 291] blk.9.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 100/ 291] blk.9.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 101/ 291] blk.9.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 102/ 291] blk.9.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 103/ 291] blk.9.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 104/ 291] blk.9.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 105/ 291] blk.9.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 106/ 291] blk.11.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 107/ 291] blk.11.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 108/ 291] blk.11.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 109/ 291] blk.11.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 110/ 291] blk.12.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 111/ 291] blk.12.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 112/ 291] blk.12.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 113/ 291] blk.12.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 114/ 291] blk.12.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 115/ 291] blk.12.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 116/ 291] blk.12.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 117/ 291] blk.12.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 118/ 291] blk.12.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 119/ 291] blk.13.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 120/ 291] blk.13.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 121/ 291] blk.13.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 122/ 291] blk.13.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 123/ 291] blk.13.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 124/ 291] blk.13.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 125/ 291] blk.13.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 126/ 291] blk.13.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 127/ 291] blk.13.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 128/ 291] blk.14.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 129/ 291] blk.14.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 130/ 291] blk.14.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 131/ 291] blk.14.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 132/ 291] blk.14.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 133/ 291] blk.14.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 134/ 291] blk.14.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 135/ 291] blk.14.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 136/ 291] blk.14.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 137/ 291] blk.15.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 138/ 291] blk.15.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 139/ 291] blk.15.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 140/ 291] blk.15.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 141/ 291] blk.15.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 142/ 291] blk.15.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 143/ 291] blk.15.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 144/ 291] blk.15.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 145/ 291] blk.15.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 146/ 291] blk.16.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 147/ 291] blk.16.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 148/ 291] blk.16.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 149/ 291] blk.16.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 150/ 291] blk.16.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 151/ 291] blk.16.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 152/ 291] blk.16.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 153/ 291] blk.16.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 154/ 291] blk.16.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 155/ 291] blk.17.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 156/ 291] blk.17.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 157/ 291] blk.17.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 158/ 291] blk.17.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 159/ 291] blk.17.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 160/ 291] blk.17.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 161/ 291] blk.17.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 162/ 291] blk.17.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 163/ 291] blk.17.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 164/ 291] blk.18.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 165/ 291] blk.18.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 166/ 291] blk.18.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 167/ 291] blk.18.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 168/ 291] blk.18.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 169/ 291] blk.18.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 170/ 291] blk.18.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 171/ 291] blk.18.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 172/ 291] blk.18.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 173/ 291] blk.19.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 174/ 291] blk.19.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 175/ 291] blk.19.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 176/ 291] blk.19.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 177/ 291] blk.19.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 178/ 291] blk.19.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 179/ 291] blk.19.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 180/ 291] blk.19.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 181/ 291] blk.19.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 182/ 291] blk.20.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 183/ 291] blk.20.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 184/ 291] blk.20.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 185/ 291] blk.20.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 186/ 291] blk.20.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 187/ 291] blk.20.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 188/ 291] blk.20.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 189/ 291] blk.20.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 190/ 291] blk.20.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 191/ 291] blk.21.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 192/ 291] blk.21.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 193/ 291] blk.21.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 194/ 291] blk.21.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 195/ 291] blk.21.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 196/ 291] blk.21.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 197/ 291] blk.21.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 198/ 291] blk.21.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 199/ 291] blk.21.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 200/ 291] blk.22.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 201/ 291] blk.22.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 202/ 291] blk.22.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 203/ 291] blk.22.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 204/ 291] blk.22.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 205/ 291] blk.22.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 206/ 291] blk.22.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 207/ 291] blk.22.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 208/ 291] blk.22.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 209/ 291] blk.23.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 210/ 291] blk.23.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 211/ 291] blk.23.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 212/ 291] blk.23.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 213/ 291] blk.23.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 214/ 291] blk.23.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 215/ 291] output.weight - [ 4096, 32000, 1, 1], type = f16, quantizing to q6_K .. size = 250.00 MiB -> 102.54 MiB\n","[ 216/ 291] blk.23.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 217/ 291] blk.23.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 218/ 291] blk.23.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 219/ 291] blk.24.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 220/ 291] blk.24.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 221/ 291] blk.24.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 222/ 291] blk.24.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 223/ 291] blk.24.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 224/ 291] blk.24.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 225/ 291] blk.24.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 226/ 291] blk.24.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 227/ 291] blk.24.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 228/ 291] blk.25.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 229/ 291] blk.25.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 230/ 291] blk.25.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 231/ 291] blk.25.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 232/ 291] blk.25.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 233/ 291] blk.25.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 234/ 291] blk.25.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 235/ 291] blk.25.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 236/ 291] blk.25.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 237/ 291] blk.26.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 238/ 291] blk.26.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 239/ 291] blk.26.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 240/ 291] blk.26.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 241/ 291] blk.26.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 242/ 291] blk.26.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 243/ 291] blk.26.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 244/ 291] blk.26.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 245/ 291] blk.26.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 246/ 291] blk.27.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 247/ 291] blk.27.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 248/ 291] blk.27.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 249/ 291] blk.27.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 250/ 291] blk.27.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 251/ 291] blk.27.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 252/ 291] blk.27.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 253/ 291] blk.27.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 254/ 291] blk.27.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 255/ 291] blk.28.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 256/ 291] blk.28.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 257/ 291] blk.28.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 258/ 291] blk.28.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 259/ 291] blk.28.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 260/ 291] blk.28.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 261/ 291] blk.28.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 262/ 291] blk.28.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 263/ 291] blk.28.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 264/ 291] blk.29.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 265/ 291] blk.29.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 266/ 291] blk.29.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 267/ 291] blk.29.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 268/ 291] blk.29.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 269/ 291] blk.29.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 270/ 291] blk.29.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 271/ 291] blk.29.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 272/ 291] blk.29.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 273/ 291] blk.30.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 274/ 291] blk.30.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 275/ 291] blk.30.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 276/ 291] blk.30.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 277/ 291] blk.30.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 278/ 291] blk.30.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 279/ 291] blk.30.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 280/ 291] blk.30.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 281/ 291] blk.30.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 282/ 291] blk.31.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 283/ 291] blk.31.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 86.00 MiB -> 35.27 MiB\n","[ 284/ 291] blk.31.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 285/ 291] blk.31.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MiB -> 29.56 MiB\n","[ 286/ 291] blk.31.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","[ 287/ 291] blk.31.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 288/ 291] blk.31.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 289/ 291] blk.31.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MiB -> 11.00 MiB\n","[ 290/ 291] blk.31.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q6_K .. size = 32.00 MiB -> 13.12 MiB\n","[ 291/ 291] output_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n","llama_model_quantize_internal: model size = 12853.02 MB\n","llama_model_quantize_internal: quant size = 4560.87 MB\n","\n","main: quantize time = 27944.02 ms\n","main: total time = 27944.02 ms\n"]}],"source":["# Verify creation of FP16 file and quantize the model for specified methods.\n","# First, check if the FP16 model file exists, indicating successful conversion.\n","# If the file does not exist, terminate the script to prevent further errors.\n","# Then, for each quantization method listed, perform model quantization,\n","# generating a quantized model file for each method.\n","\n","\n","if os.path.exists(fp16):\n"," print(f\"FP16 file created successfully: {fp16}\")\n","else:\n"," print(f\"Failed to create FP16 file at: {fp16}\")\n"," import sys\n"," sys.exit(\"Stopping script due to missing FP16 file.\")\n","\n","\n","# Quantize the model using specified methods\n","for method in QUANTIZATION_METHODS:\n"," qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf\"\n"," !./llama.cpp/quantize {fp16} {qtype} {method}"]},{"cell_type":"markdown","metadata":{"id":"WqI1CPiXI4dP"},"source":["## Run inference\n","\n","Below is a script to run our quantized model. We are offloading every layer to the GPU (33 for a 7b parameter model) to speed up inference."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":64968,"status":"ok","timestamp":1707925342654,"user":{"displayName":"szehanz","userId":"16137883221268059572"},"user_tz":-480},"id":"vNPL9WYg78l-","outputId":"16ef6d44-0eda-4b35-e4eb-d4eb9ac083a3"},"outputs":[{"name":"stdin","output_type":"stream","text":["Enter your prompt: what is deep learning\n"]},{"name":"stdout","output_type":"stream","text":["Log start\n","main: build = 2203 (9d679f0f)\n","main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n","main: seed = 1708358764\n","ggml_init_cublas: GGML_CUDA_FORCE_MMQ: no\n","ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes\n","ggml_init_cublas: found 1 CUDA devices:\n"," Device 0: Tesla T4, compute capability 7.5, VMM: yes\n","llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from llama-2-7b-combined_datasets/llama-2-7b-combined_datasets.Q5_K_M.gguf (version GGUF V3 (latest))\n","llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n","llama_model_loader: - kv 0: general.architecture str = llama\n","llama_model_loader: - kv 1: general.name str = LLaMA v2\n","llama_model_loader: - kv 2: llama.context_length u32 = 4096\n","llama_model_loader: - kv 3: llama.embedding_length u32 = 4096\n","llama_model_loader: - kv 4: llama.block_count u32 = 32\n","llama_model_loader: - kv 5: llama.feed_forward_length u32 = 11008\n","llama_model_loader: - kv 6: llama.rope.dimension_count u32 = 128\n","llama_model_loader: - kv 7: llama.attention.head_count u32 = 32\n","llama_model_loader: - kv 8: llama.attention.head_count_kv u32 = 32\n","llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 = 0.000010\n","llama_model_loader: - kv 10: llama.rope.freq_base f32 = 10000.000000\n","llama_model_loader: - kv 11: general.file_type u32 = 17\n","llama_model_loader: - kv 12: tokenizer.ggml.model str = llama\n","llama_model_loader: - kv 13: tokenizer.ggml.tokens arr[str,32000] = [\"\", \"\", \"\", \"<0x00>\", \"<...\n","llama_model_loader: - kv 14: tokenizer.ggml.scores arr[f32,32000] = [0.000000, 0.000000, 0.000000, 0.0000...\n","llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,32000] = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n","llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 = 1\n","llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 = 2\n","llama_model_loader: - kv 18: tokenizer.ggml.unknown_token_id u32 = 0\n","llama_model_loader: - kv 19: tokenizer.ggml.padding_token_id u32 = 2\n","llama_model_loader: - kv 20: tokenizer.ggml.add_bos_token bool = true\n","llama_model_loader: - kv 21: tokenizer.ggml.add_eos_token bool = false\n","llama_model_loader: - kv 22: tokenizer.chat_template str = {% if messages[0]['role'] == 'system'...\n","llama_model_loader: - kv 23: general.quantization_version u32 = 2\n","llama_model_loader: - type f32: 65 tensors\n","llama_model_loader: - type q5_K: 193 tensors\n","llama_model_loader: - type q6_K: 33 tensors\n","llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n","llm_load_print_meta: format = GGUF V3 (latest)\n","llm_load_print_meta: arch = llama\n","llm_load_print_meta: vocab type = SPM\n","llm_load_print_meta: n_vocab = 32000\n","llm_load_print_meta: n_merges = 0\n","llm_load_print_meta: n_ctx_train = 4096\n","llm_load_print_meta: n_embd = 4096\n","llm_load_print_meta: n_head = 32\n","llm_load_print_meta: n_head_kv = 32\n","llm_load_print_meta: n_layer = 32\n","llm_load_print_meta: n_rot = 128\n","llm_load_print_meta: n_embd_head_k = 128\n","llm_load_print_meta: n_embd_head_v = 128\n","llm_load_print_meta: n_gqa = 1\n","llm_load_print_meta: n_embd_k_gqa = 4096\n","llm_load_print_meta: n_embd_v_gqa = 4096\n","llm_load_print_meta: f_norm_eps = 0.0e+00\n","llm_load_print_meta: f_norm_rms_eps = 1.0e-05\n","llm_load_print_meta: f_clamp_kqv = 0.0e+00\n","llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n","llm_load_print_meta: n_ff = 11008\n","llm_load_print_meta: n_expert = 0\n","llm_load_print_meta: n_expert_used = 0\n","llm_load_print_meta: rope scaling = linear\n","llm_load_print_meta: freq_base_train = 10000.0\n","llm_load_print_meta: freq_scale_train = 1\n","llm_load_print_meta: n_yarn_orig_ctx = 4096\n","llm_load_print_meta: rope_finetuned = unknown\n","llm_load_print_meta: model type = 7B\n","llm_load_print_meta: model ftype = Q5_K - Medium\n","llm_load_print_meta: model params = 6.74 B\n","llm_load_print_meta: model size = 4.45 GiB (5.68 BPW) \n","llm_load_print_meta: general.name = LLaMA v2\n","llm_load_print_meta: BOS token = 1 ''\n","llm_load_print_meta: EOS token = 2 ''\n","llm_load_print_meta: UNK token = 0 ''\n","llm_load_print_meta: PAD token = 2 ''\n","llm_load_print_meta: LF token = 13 '<0x0A>'\n","llm_load_tensors: ggml ctx size = 0.22 MiB\n","llm_load_tensors: offloading 32 repeating layers to GPU\n","llm_load_tensors: offloading non-repeating layers to GPU\n","llm_load_tensors: offloaded 33/33 layers to GPU\n","llm_load_tensors: CPU buffer size = 85.94 MiB\n","llm_load_tensors: CUDA0 buffer size = 4474.94 MiB\n","..................................................................................................\n","llama_new_context_with_model: n_ctx = 512\n","llama_new_context_with_model: freq_base = 10000.0\n","llama_new_context_with_model: freq_scale = 1\n","llama_kv_cache_init: CUDA0 KV buffer size = 256.00 MiB\n","llama_new_context_with_model: KV self size = 256.00 MiB, K (f16): 128.00 MiB, V (f16): 128.00 MiB\n","llama_new_context_with_model: CUDA_Host input buffer size = 10.01 MiB\n","llama_new_context_with_model: CUDA0 compute buffer size = 70.50 MiB\n","llama_new_context_with_model: CUDA_Host compute buffer size = 8.00 MiB\n","llama_new_context_with_model: graph splits (measure): 3\n","\n","system_info: n_threads = 24 / 48 | AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 1 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | \n","sampling: \n","\trepeat_last_n = 64, repeat_penalty = 1.100, frequency_penalty = 0.000, presence_penalty = 0.000\n","\ttop_k = 40, tfs_z = 1.000, top_p = 0.950, min_p = 0.050, typical_p = 1.000, temp = 0.800\n","\tmirostat = 0, mirostat_lr = 0.100, mirostat_ent = 5.000\n","sampling order: \n","CFG -> Penalties -> top_k -> tfs_z -> typical_p -> top_p -> min_p -> temperature \n","generate: n_ctx = 512, n_batch = 512, n_predict = 128, n_keep = 0\n","\n","\n","\u001b[33m what is deep learning\u001b[0m in machine learning?\n"," Summarize the key characteristics and applications of deep learning.\n","\n","Deep learning is a subset of machine learning that involves the use of artificial neural networks to model and analyze complex data sets. It is based on the idea that a neural network with multiple layers can learn and represent more abstract and sophisticated patterns in data than a single-layer network. Deep learning has been successful in solving many challenging tasks in computer vision, natural language processing, speech recognition, and other areas of machine learning.\n","\n","Key characteristics of deep learning:\n","\n","1. Multi-layered neural networks: Deep learning\n","llama_print_timings: load time = 895.86 ms\n","llama_print_timings: sample time = 51.51 ms / 128 runs ( 0.40 ms per token, 2485.20 tokens per second)\n","llama_print_timings: prompt eval time = 38.97 ms / 5 tokens ( 7.79 ms per token, 128.32 tokens per second)\n","llama_print_timings: eval time = 3130.83 ms / 127 runs ( 24.65 ms per token, 40.56 tokens per second)\n","llama_print_timings: total time = 3251.49 ms / 132 tokens\n","Log end\n"]}],"source":["# Run text generation using a specific quantized model in llama.cpp.\n","# 1. Prompt the user to enter text for the model to process.\n","# 2. Construct the model file path ('qtype') using MODEL_NAME and a specified quantization method.\n","# 3. Execute the llama.cpp main program with the constructed model path,\n","# setting the number of tokens to generate, enabling color, limiting the number of generated lines,\n","# and using the user-provided prompt.\n","\n","prompt = input(\"Enter your prompt: \")\n","\n","# Construct the path to the model file with the quantization method 'Q5_K_M'\n","qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.Q5_K_M.gguf\"\n","\n","# Execute the llama.cpp main program with specified parameters\n","!./llama.cpp/main -m {qtype} -n 128 --color -ngl 35 -p \"{prompt}\""]},{"cell_type":"markdown","metadata":{"id":"Ar8pO7bb80US"},"source":["## Push to hub"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":188,"referenced_widgets":["042396eff4ce41bf9729880c8dd9144f","88e20b09bb994e0dbd092e3c625d5a1a","0e8715a0c48c4e0883fe45ac9794dc42","67965e7b79f64ec288167f9affb45d81","bfdd34dd4a294d3f9c8bb05b27e108f2","32a0ec8495224b92ad1c48613bc03891","77c4b22e3ed3478487e04edf011475ce","8b949ab00d9c41729973b0c9cc689493","37ea5d15761f44fdbcd1ddb8f582936a","eeee199faf7b4464997f3317f3973a19","306d81e5c3414519b7ccbe2329a1c3be","f79521693d2d42d180534ed94994f6fe"]},"executionInfo":{"elapsed":141900,"status":"ok","timestamp":1707925523299,"user":{"displayName":"szehanz","userId":"16137883221268059572"},"user_tz":-480},"id":"UOyKfUD-8jmh","outputId":"4093844f-7f46-484f-922a-f902fc089242"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"f79521693d2d42d180534ed94994f6fe","version_major":2,"version_minor":0},"text/plain":["llama-2-7b-combined_datasets.Q5_K_M.gguf: 0%| | 0.00/4.78G [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/plain":["'https://huggingface.co/ssoh/llama-2-7b-combined_datasets-GGUF/tree/main/'"]},"execution_count":10,"metadata":{},"output_type":"execute_result"}],"source":["# Create a new model repository on Hugging Face and upload gguf files.\n","# 1. Initialize the HfApi object to interact with Hugging Face's API.\n","# 2. Define the username associated with the Hugging Face account.\n","# 3. Use create_repo to create an empty repository for the model,\n","# allowing for the repository to exist already with exist_ok=True.\n","# 4. Upload all gguf files from the local MODEL_NAME directory to the newly\n","# created repository on Hugging Face, using upload_folder with a filter\n","# to only include files with a .gguf extension.\n","\n","\n","api = HfApi()\n","username = \"ssoh\"\n","\n","\n","# Create an empty repository on Hugging Face\n","create_repo(\n"," repo_id=f\"{username}/{MODEL_NAME}-GGUF\",\n"," repo_type=\"model\",\n"," exist_ok=True,\n",")\n","\n","\n","# Upload gguf model files to the repository\n","api.upload_folder(\n"," folder_path=MODEL_NAME,\n"," repo_id=f\"{username}/{MODEL_NAME}-GGUF\",\n"," allow_patterns=\"*.gguf\",\n",")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"yE7yfvzvwaEj"},"outputs":[],"source":[]}],"metadata":{"accelerator":"GPU","colab":{"gpuType":"T4","provenance":[{"file_id":"1p68M5E5fZ7kSa7nA-e-20489nuFSXVp2","timestamp":1706370554565},{"file_id":"119-Y6eV94vuFqWLh8t8NBB-Uf54eCl3i","timestamp":1700232595435},{"file_id":"1PEQyJO1-f6j0S_XJ8DV50NkpzasXkrzd","timestamp":1699194238655}]},"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.0"},"widgets":{"application/vnd.jupyter.widget-state+json":{"01ff6ffe93d64b2f8d5dd55b9e3cfcc6":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0786fc506a7640a287016991f57d3768":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0ba6b1d30c4642648fcaa57af3c52f49","placeholder":"","style":"IPY_MODEL_23176fa0130348c5bc1b0deb591709f3","value":"model.safetensors.index.json: 100%"}},"08400a144d3c497a94ae4d84e72a1067":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0af30e36f2524c1084c1237948f3b18e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_963973c1e970410cb2ba97e9fb4ef511","placeholder":"","style":"IPY_MODEL_ca8d07b9b454471e883d43e03b475a9f","value":" 26.8k/26.8k [00:00<00:00, 389kB/s]"}},"0ba4f20507af472a8b1504de483f7800":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_187745df1f154e7da09a0c646a22cebb","IPY_MODEL_20cc750b1bde4efdb3c3a5b5321222b4","IPY_MODEL_77c0d63f6cd6494daf7e94cfe0397e45"],"layout":"IPY_MODEL_01ff6ffe93d64b2f8d5dd55b9e3cfcc6"}},"0ba6b1d30c4642648fcaa57af3c52f49":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0cda6495ef4b4c9ab3eeff2392d89163":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"11b88a389a4042b5ad5ba06f51ac22e0":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_2cb9cfbde1e0483c97a2c531e0034adf","placeholder":"","style":"IPY_MODEL_de756c426cf0492bb122a45b94d4bbe7","value":"
Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. "}},"14d0ff6e07a148c0b65224af86af6b95":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"187745df1f154e7da09a0c646a22cebb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_638517f3c94445c7b610b84744859ef8","placeholder":"","style":"IPY_MODEL_5206f9fc25234586b735fc9c65e6b19a","value":"generation_config.json: 100%"}},"19b8b7692cfd46eca5abb50ac2854262":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"19d3adedf1a245f69a2498b1838f6415":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_31ffba7793b944cd80bec0fb6ac2fbbe","IPY_MODEL_1ae848be349a41239a7ba6efadc82edf","IPY_MODEL_9a33d92ad0604eff9ac56944586facbb"],"layout":"IPY_MODEL_293d425206684064b0572e990a815cee"}},"1ae848be349a41239a7ba6efadc82edf":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_8ae8a9d03adc476a8dd64f09e194907d","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_aeb94dbf3f8a4b43bb07c833af3b98c9","value":2}},"1c6e5dd4ca3446ec8cf8a03b85ad57dc":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0786fc506a7640a287016991f57d3768","IPY_MODEL_3af05a188f40469dbd52fda55c7a9e22","IPY_MODEL_0af30e36f2524c1084c1237948f3b18e"],"layout":"IPY_MODEL_dfe1c2648a564676bfe1e09bda61d439"}},"1ea12323b1244c768a17a6ed5420f854":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1f9e8ddb08814b2db7761478eb9069f5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_7982d1d315964d138f31643db445b48a","max":614,"min":0,"orientation":"horizontal","style":"IPY_MODEL_67346bc716384552a69d1afdc844db00","value":614}},"20cc750b1bde4efdb3c3a5b5321222b4":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_a199f20edde447829b17804b17af2fe5","max":188,"min":0,"orientation":"horizontal","style":"IPY_MODEL_447f608108364159ab7ed546ad02559a","value":188}},"21556be54ed34b15b909bf8e7b8fd93a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b8113970ea7245e9890221d4e4cf5e8e","placeholder":"","style":"IPY_MODEL_d3224d16458249a3bfd29253c2d6a86f","value":"\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. "}},"2254fe28315f43a99c3579195c0c0008":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"22e8bae9ff6744bca3e90d46d220106f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"23176fa0130348c5bc1b0deb591709f3":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"236b3417f0e94039a1a449e7ab96738b":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"25543b118c8e423aa7fd8c898d5e1256":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2924e96aa10346efb39684e5369e2170":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"LabelModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bce8d1501218410ba8b042aeb3f0fc26","placeholder":"","style":"IPY_MODEL_405603de026d484ab283f053f4b17c6d","value":"Your token has been saved to /root/.cache/huggingface/token"}},"293d425206684064b0572e990a815cee":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"29f303aa6ac8464aa91124c3fe659379":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2cb9cfbde1e0483c97a2c531e0034adf":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2dce1978d19e4de3a6a1b1cef6ed518f":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3076e4abb7fe427fa4fccb43e9f3371e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"30d47a9da70a4cf5a7e597f13d28d526":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"31ffba7793b944cd80bec0fb6ac2fbbe":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7ee17b5879ec4909b7b616b79d5eab36","placeholder":"","style":"IPY_MODEL_d8b847adb46d4049a4b43bba2a0e06da","value":"Downloading shards: 100%"}},"32a57f9df45b41da91e1d638f531505d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"3af05a188f40469dbd52fda55c7a9e22":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_82829039182948efb800703e3e21d96d","max":26788,"min":0,"orientation":"horizontal","style":"IPY_MODEL_32a57f9df45b41da91e1d638f531505d","value":26788}},"3e4a06b9b13444e3b82e0c3c26e17b8f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"CheckboxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_f91ebc43c1344e8688e2eeb2771c7b65","style":"IPY_MODEL_ea00aa1eb73949fc94083f1d31372915","value":true}},"405603de026d484ab283f053f4b17c6d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"42262413163d49fd8902c275c13da1e6":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cf370d3015b04b6caa9cbfb8f21dd5e7","placeholder":"","style":"IPY_MODEL_1ea12323b1244c768a17a6ed5420f854","value":" 9.98G/9.98G [01:29<00:00, 126MB/s]"}},"441c37014e6b4c8995e7a5305f96fa38":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"447f608108364159ab7ed546ad02559a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"46e41557d0bc4cb4a03c6894fcc57d0e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4cee3e23bcbd4643ba542d94b82dbfd2":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5206f9fc25234586b735fc9c65e6b19a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"52f2b6adb9904cbeb014387429c0d1ad":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b61ac64b2a0c4beb8ea209782b9dd09b","IPY_MODEL_9e9c2d712ae84d3aa87fbe433ae0701b","IPY_MODEL_fe4d028a78e64a4d8b47f8bc8b8ec15d"],"layout":"IPY_MODEL_88a7aeaf3b2547ea91cf20ed2f1a3dc0"}},"537d173a4313491f8f89f799f1dc7b2a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"541ef20ab6f34337a2d6d20098f6fef5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"LabelModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b0f0ac261e364edd99d7b75e747e2c47","placeholder":"","style":"IPY_MODEL_ff9f726db3434e3184e723d5da884d0a","value":"Token is valid (permission: write)."}},"55e506bd03d24dd7b24129dc64da9c01":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5e5b95c9801443cdbce9c8e629c33589":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"PasswordModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_29f303aa6ac8464aa91124c3fe659379","placeholder":"","style":"IPY_MODEL_3076e4abb7fe427fa4fccb43e9f3371e","value":""}},"62f1cf19fe204aa4a424248e807ce061":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"638517f3c94445c7b610b84744859ef8":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6430379d01874ec3a7cf9fea59c42914":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_2dce1978d19e4de3a6a1b1cef6ed518f","style":"IPY_MODEL_e81c501824f94e7d839684fafbc65b31","tooltip":""}},"67346bc716384552a69d1afdc844db00":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"6edf40f558f54d8b82d949f83557d609":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"LabelModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_08400a144d3c497a94ae4d84e72a1067","placeholder":"","style":"IPY_MODEL_976a3440d2c3423c8be835b0d6f56492","value":"Connecting..."}},"77c0d63f6cd6494daf7e94cfe0397e45":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4cee3e23bcbd4643ba542d94b82dbfd2","placeholder":"","style":"IPY_MODEL_25543b118c8e423aa7fd8c898d5e1256","value":" 188/188 [00:00<00:00, 12.3kB/s]"}},"77daad9def9f4254933afdeb7202e5d2":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7982d1d315964d138f31643db445b48a":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7c701d53772344f6a057cd43742d47f5":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b9e4e59acd4d468393f42857d5635e25","placeholder":"","style":"IPY_MODEL_30d47a9da70a4cf5a7e597f13d28d526","value":"Loading checkpoint shards: 100%"}},"7ee17b5879ec4909b7b616b79d5eab36":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8080eb1edf8f44f69b5f218046715f36":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e2df35aaf7e0420b9524d533cb122521","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d90c3a424bac4bb09e99339c28df2eda","value":2}},"82829039182948efb800703e3e21d96d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"83127281047a429a846b9bb3b3209760":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"86014f569ee34895badef4acfe958051":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"86616a0d2f9d4da1a75e00d092ca9d4d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a7a17a5883784ed9961437d0102fd9a4","IPY_MODEL_9934540648a444a09f0783843ab2d901","IPY_MODEL_42262413163d49fd8902c275c13da1e6"],"layout":"IPY_MODEL_83127281047a429a846b9bb3b3209760"}},"88a7aeaf3b2547ea91cf20ed2f1a3dc0":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8ae8a9d03adc476a8dd64f09e194907d":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"963973c1e970410cb2ba97e9fb4ef511":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"976a3440d2c3423c8be835b0d6f56492":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9934540648a444a09f0783843ab2d901":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f702ae2d46834e9b934a40abca49a990","max":9976576152,"min":0,"orientation":"horizontal","style":"IPY_MODEL_14d0ff6e07a148c0b65224af86af6b95","value":9976576152}},"9a33d92ad0604eff9ac56944586facbb":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_537d173a4313491f8f89f799f1dc7b2a","placeholder":"","style":"IPY_MODEL_55e506bd03d24dd7b24129dc64da9c01","value":" 2/2 [01:56<00:00, 52.47s/it]"}},"9c7550ec29c34ef2b674a3667ebd740e":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_236b3417f0e94039a1a449e7ab96738b","placeholder":"","style":"IPY_MODEL_77daad9def9f4254933afdeb7202e5d2","value":"config.json: 100%"}},"9e4d26b87e1243f3ae744ddbbe2c5b51":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9e9c2d712ae84d3aa87fbe433ae0701b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"FloatProgressModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_86014f569ee34895badef4acfe958051","max":3500296424,"min":0,"orientation":"horizontal","style":"IPY_MODEL_22e8bae9ff6744bca3e90d46d220106f","value":3500296424}},"a066a28914bc46cc85ebcb7d90d27fbe":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a199f20edde447829b17804b17af2fe5":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a39b2a49ff814181aa0df84c57e9245b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a7a17a5883784ed9961437d0102fd9a4":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ae173a50f4374c7fa55d8e360d30d4d1","placeholder":"","style":"IPY_MODEL_b93a21bcaf6344c49a0ab8e2a887922c","value":"model-00001-of-00002.safetensors: 100%"}},"a7ef8ff133144d4b9817800e5b4739a4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a9628200c0a34cccacdb0ea66e200b87":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0cda6495ef4b4c9ab3eeff2392d89163","placeholder":"","style":"IPY_MODEL_9e4d26b87e1243f3ae744ddbbe2c5b51","value":" 2/2 [00:58<00:00, 26.87s/it]"}},"a96de5c1f80f43d8b0b689d5db0fa248":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ae173a50f4374c7fa55d8e360d30d4d1":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"aeb94dbf3f8a4b43bb07c833af3b98c9":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b0f0ac261e364edd99d7b75e747e2c47":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b4ed4d5e266a4cd5b9011436b4e92951":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_7c701d53772344f6a057cd43742d47f5","IPY_MODEL_8080eb1edf8f44f69b5f218046715f36","IPY_MODEL_a9628200c0a34cccacdb0ea66e200b87"],"layout":"IPY_MODEL_2254fe28315f43a99c3579195c0c0008"}},"b577aeffe358446ea41e4b3c58b80641":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_441c37014e6b4c8995e7a5305f96fa38","placeholder":"","style":"IPY_MODEL_a39b2a49ff814181aa0df84c57e9245b","value":" 614/614 [00:00<00:00, 15.7kB/s]"}},"b61ac64b2a0c4beb8ea209782b9dd09b":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a066a28914bc46cc85ebcb7d90d27fbe","placeholder":"","style":"IPY_MODEL_a96de5c1f80f43d8b0b689d5db0fa248","value":"model-00002-of-00002.safetensors: 100%"}},"b634ae14caf84bc99dcafd3a70b7a383":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b64f26ac024c46eabfc4728586369130":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"LabelModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a7ef8ff133144d4b9817800e5b4739a4","placeholder":"","style":"IPY_MODEL_ba4cf32b2f71428282721e7818b34a5a","value":"Login successful"}},"b8113970ea7245e9890221d4e4cf5e8e":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b93a21bcaf6344c49a0ab8e2a887922c":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"b9e4e59acd4d468393f42857d5635e25":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ba4cf32b2f71428282721e7818b34a5a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bce8d1501218410ba8b042aeb3f0fc26":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c3d08bc595a74c3180a7a83afc569584":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"VBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_541ef20ab6f34337a2d6d20098f6fef5","IPY_MODEL_fe46e1cf697f4b1fab764104be32da95","IPY_MODEL_2924e96aa10346efb39684e5369e2170","IPY_MODEL_b64f26ac024c46eabfc4728586369130"],"layout":"IPY_MODEL_cad60b6f14f249c187d573dd3a4428e0"}},"ca8d07b9b454471e883d43e03b475a9f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"cad60b6f14f249c187d573dd3a4428e0":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"cf370d3015b04b6caa9cbfb8f21dd5e7":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d3224d16458249a3bfd29253c2d6a86f":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d8b847adb46d4049a4b43bba2a0e06da":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d90c3a424bac4bb09e99339c28df2eda":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ProgressStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"de756c426cf0492bb122a45b94d4bbe7":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"df0d8231953b44f1bf8e6d2cd39b2f66":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HBoxModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_9c7550ec29c34ef2b674a3667ebd740e","IPY_MODEL_1f9e8ddb08814b2db7761478eb9069f5","IPY_MODEL_b577aeffe358446ea41e4b3c58b80641"],"layout":"IPY_MODEL_19b8b7692cfd46eca5abb50ac2854262"}},"dfe1c2648a564676bfe1e09bda61d439":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e2df35aaf7e0420b9524d533cb122521":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e81c501824f94e7d839684fafbc65b31":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"ButtonStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"ea00aa1eb73949fc94083f1d31372915":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f702ae2d46834e9b934a40abca49a990":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f81ada25e7ff4f5da6b3f6c6e73590e4":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f91ebc43c1344e8688e2eeb2771c7b65":{"model_module":"@jupyter-widgets/base","model_module_version":"1.2.0","model_name":"LayoutModel","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fe46e1cf697f4b1fab764104be32da95":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"LabelModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f81ada25e7ff4f5da6b3f6c6e73590e4","placeholder":"","style":"IPY_MODEL_62f1cf19fe204aa4a424248e807ce061","value":"Your token has been saved in your configured git credential helpers (store)."}},"fe4d028a78e64a4d8b47f8bc8b8ec15d":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"HTMLModel","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b634ae14caf84bc99dcafd3a70b7a383","placeholder":"","style":"IPY_MODEL_46e41557d0bc4cb4a03c6894fcc57d0e","value":" 3.50G/3.50G [00:26<00:00, 176MB/s]"}},"ff9f726db3434e3184e723d5da884d0a":{"model_module":"@jupyter-widgets/controls","model_module_version":"1.5.0","model_name":"DescriptionStyleModel","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0}