diff --git "a/ipynb/llm/4. Chosen_model.ipynb" "b/ipynb/llm/4. Chosen_model.ipynb"
--- "a/ipynb/llm/4. Chosen_model.ipynb"
+++ "b/ipynb/llm/4. Chosen_model.ipynb"
@@ -1 +1 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[{"file_id":"1p68M5E5fZ7kSa7nA-e-20489nuFSXVp2","timestamp":1706370554565},{"file_id":"119-Y6eV94vuFqWLh8t8NBB-Uf54eCl3i","timestamp":1700232595435},{"file_id":"1PEQyJO1-f6j0S_XJ8DV50NkpzasXkrzd","timestamp":1699194238655}]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"ab4ff66efeac49d19ace40170dc0c36a":{"model_module":"@jupyter-widgets/controls","model_name":"VBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_9ec9f592bd894cf5ba12ea6562930e34","IPY_MODEL_6fc0fb61fb574cb69e76711ba44d8a9a","IPY_MODEL_a8b4824053344a3995aabd6113b9b3c9","IPY_MODEL_773ae1c1178e4dd3baccc0d69f30e677"],"layout":"IPY_MODEL_fd55806b261e40bab129ef627a34cb9b"}},"f4c80534f940422e861d43b823c6e3a5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_436cddbaef254cac8d60ec557d7f957b","placeholder":"​","style":"IPY_MODEL_c35ab46f0f6b4e6a9e8bb88deb2213a1","value":"<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"}},"4b8662da0d16454e9f1332b240fff5ab":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_117217ad6da442329d584e9fb2ee57d9","placeholder":"​","style":"IPY_MODEL_287bce8803534ee3a1828138b163e38a","value":""}},"4815248074fa445c8b0a6edd889e2bf4":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_ea40ca5c71cc4eaf8108b0378845afa3","style":"IPY_MODEL_a314995ef8c249a0b65c3ca5287795e6","value":true}},"b9b93ec3eff34a71b5d7fbf09550e1a3":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_2b3b753e05b0441bbf172f018d8f5ed2","style":"IPY_MODEL_c4392be8e95442caa0c306f81db40b53","tooltip":""}},"abcf4756859a46a8964fe19d61503ed6":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0ffb8d43c247449d992a59347a316380","placeholder":"​","style":"IPY_MODEL_b7537278046747dda06aa01d23a7e252","value":"\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"}},"fd55806b261e40bab129ef627a34cb9b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"436cddbaef254cac8d60ec557d7f957b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c35ab46f0f6b4e6a9e8bb88deb2213a1":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"117217ad6da442329d584e9fb2ee57d9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"287bce8803534ee3a1828138b163e38a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ea40ca5c71cc4eaf8108b0378845afa3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a314995ef8c249a0b65c3ca5287795e6":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2b3b753e05b0441bbf172f018d8f5ed2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c4392be8e95442caa0c306f81db40b53":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"0ffb8d43c247449d992a59347a316380":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b7537278046747dda06aa01d23a7e252":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7534b78e7bc34622869e0d7d10461351":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c2d028b601aa44bb9ade3a4d8dd05af1","placeholder":"​","style":"IPY_MODEL_44a6ddf7951a4820b2681212fa2e2087","value":"Connecting..."}},"c2d028b601aa44bb9ade3a4d8dd05af1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"44a6ddf7951a4820b2681212fa2e2087":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9ec9f592bd894cf5ba12ea6562930e34":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d85f75c94669478c9a1cbded4291c3cb","placeholder":"​","style":"IPY_MODEL_a6674ea434524c588508874e83dca93d","value":"Token is valid (permission: write)."}},"6fc0fb61fb574cb69e76711ba44d8a9a":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e8e4dfc6714e46b7a4e5c6e3bc7278af","placeholder":"​","style":"IPY_MODEL_105c2e2829314849b1d01a998eaded9a","value":"Your token has been saved in your configured git credential helpers (store)."}},"a8b4824053344a3995aabd6113b9b3c9":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_36852cc01640458ba09420a58c425ffd","placeholder":"​","style":"IPY_MODEL_38af97c64fd744c295fff526ef15fc8b","value":"Your token has been saved to /root/.cache/huggingface/token"}},"773ae1c1178e4dd3baccc0d69f30e677":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_03f2a4340a11450e9494653fdb4de62a","placeholder":"​","style":"IPY_MODEL_db9a7d6f7fe6422aa23ea8cf45dd8779","value":"Login successful"}},"d85f75c94669478c9a1cbded4291c3cb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a6674ea434524c588508874e83dca93d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e8e4dfc6714e46b7a4e5c6e3bc7278af":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"105c2e2829314849b1d01a998eaded9a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"36852cc01640458ba09420a58c425ffd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"38af97c64fd744c295fff526ef15fc8b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"03f2a4340a11450e9494653fdb4de62a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"db9a7d6f7fe6422aa23ea8cf45dd8779":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2a1d3a4a2547428d9747a7bee4dc7eda":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2dad29c1b5e94b41bc88e90d70754dea","IPY_MODEL_0dac39da8ef0424ea19a5aab6fe2cff0","IPY_MODEL_00ed8c45868742118b6b2cc6a02e1027"],"layout":"IPY_MODEL_e73112cabaed42dd9946d4d86f16eee8"}},"2dad29c1b5e94b41bc88e90d70754dea":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b364145a48ec45c99cc3adba97fb612b","placeholder":"​","style":"IPY_MODEL_41bf12fc48134561ab04ebee7db2dec2","value":"Downloading readme: 100%"}},"0dac39da8ef0424ea19a5aab6fe2cff0":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_3e05be327d494424858f635d0296f211","max":308,"min":0,"orientation":"horizontal","style":"IPY_MODEL_75973f4dda984c5b895ee02ed87d0674","value":308}},"00ed8c45868742118b6b2cc6a02e1027":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3f43e6ea88cc428cb93867922cf6cccb","placeholder":"​","style":"IPY_MODEL_d2c34c75928a4e50b6f7db95a0f37407","value":" 308/308 [00:00&lt;00:00, 18.5kB/s]"}},"e73112cabaed42dd9946d4d86f16eee8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b364145a48ec45c99cc3adba97fb612b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"41bf12fc48134561ab04ebee7db2dec2":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3e05be327d494424858f635d0296f211":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"75973f4dda984c5b895ee02ed87d0674":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"3f43e6ea88cc428cb93867922cf6cccb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d2c34c75928a4e50b6f7db95a0f37407":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"37acf9dddda14cd7a62379ab0c359e56":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_8543570210f04d679d89e44084b33270","IPY_MODEL_024023fb7c2d4f0baaee26a30188c189","IPY_MODEL_e240e800d1394d45b58046f0b2c9e0ca"],"layout":"IPY_MODEL_0d0d766422634d49a920aa6e96fd0848"}},"8543570210f04d679d89e44084b33270":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_99ec8b2cb258486ba4daf24c10e7c2f0","placeholder":"​","style":"IPY_MODEL_32c8dbc8ba504c34a38c315e5a100126","value":"Downloading data: 100%"}},"024023fb7c2d4f0baaee26a30188c189":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_acf26e09c7ae4c77acbb0c7adf5b6ac5","max":28265,"min":0,"orientation":"horizontal","style":"IPY_MODEL_610c14a14bd84b9eb83d26ce7ecfe723","value":28265}},"e240e800d1394d45b58046f0b2c9e0ca":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_aa37bcfdff0e418aa93986db50cf9458","placeholder":"​","style":"IPY_MODEL_5bba14781afb4e87a700fafc5c00168c","value":" 28.3k/28.3k [00:00&lt;00:00, 68.0kB/s]"}},"0d0d766422634d49a920aa6e96fd0848":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"99ec8b2cb258486ba4daf24c10e7c2f0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"32c8dbc8ba504c34a38c315e5a100126":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"acf26e09c7ae4c77acbb0c7adf5b6ac5":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"610c14a14bd84b9eb83d26ce7ecfe723":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"aa37bcfdff0e418aa93986db50cf9458":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5bba14781afb4e87a700fafc5c00168c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d63ee6fbf2044f2eb9708d667985528c":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_59a7bc95ef0c4af3a3c84f1cdc6adecf","IPY_MODEL_f775791c0fbb4f68adfd140fcade9c39","IPY_MODEL_6afba4c004194414bc1ffb3b54bde31f"],"layout":"IPY_MODEL_a0c64e30e23c4cb6ae69559cfbf91cbd"}},"59a7bc95ef0c4af3a3c84f1cdc6adecf":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bf93b4a0020a42688f119f2726d21d98","placeholder":"​","style":"IPY_MODEL_cff662941eb84b38a16f2ab38a291500","value":"Generating train split: 100%"}},"f775791c0fbb4f68adfd140fcade9c39":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_2b2fe82c802841549995d8437392c762","max":55,"min":0,"orientation":"horizontal","style":"IPY_MODEL_0c37faa85f73496488fa4ed3c797ca56","value":55}},"6afba4c004194414bc1ffb3b54bde31f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4a9ca72f6cf9440081765b2bf980bbca","placeholder":"​","style":"IPY_MODEL_2e803da642db4432afe97fe13a8f3bfd","value":" 55/55 [00:00&lt;00:00, 853.40 examples/s]"}},"a0c64e30e23c4cb6ae69559cfbf91cbd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bf93b4a0020a42688f119f2726d21d98":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cff662941eb84b38a16f2ab38a291500":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2b2fe82c802841549995d8437392c762":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0c37faa85f73496488fa4ed3c797ca56":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4a9ca72f6cf9440081765b2bf980bbca":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2e803da642db4432afe97fe13a8f3bfd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"571a411bc41a4a80bfe3109a23dd9571":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_884f0d88224444c3866c1fa435c2a611","IPY_MODEL_71fa6659371d46eea2ca8ea3dbf9cf4b","IPY_MODEL_50cd1d1d81694f7aae31ed36e481cbe4"],"layout":"IPY_MODEL_cc34b4bda6584fcc80d699ead4f54364"}},"884f0d88224444c3866c1fa435c2a611":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4bb4190fc0e345f0b585814e577b3512","placeholder":"​","style":"IPY_MODEL_ac286a893e124cf2a858bf31b81c316c","value":"tokenizer_config.json: 100%"}},"71fa6659371d46eea2ca8ea3dbf9cf4b":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f8b4a919b74540dabf7692d1d727f1ac","max":1618,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c7ac8409845741d5a9559bd5093fa0af","value":1618}},"50cd1d1d81694f7aae31ed36e481cbe4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_496286bf844d4457b03ff62456f77416","placeholder":"​","style":"IPY_MODEL_10b674d0db7f4fe09dfd2c44dda1662b","value":" 1.62k/1.62k [00:00&lt;00:00, 72.8kB/s]"}},"cc34b4bda6584fcc80d699ead4f54364":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4bb4190fc0e345f0b585814e577b3512":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ac286a893e124cf2a858bf31b81c316c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f8b4a919b74540dabf7692d1d727f1ac":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c7ac8409845741d5a9559bd5093fa0af":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"496286bf844d4457b03ff62456f77416":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"10b674d0db7f4fe09dfd2c44dda1662b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7bba3fff75db45c7be7253d15be86788":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ee21868c30da48f3a726d00aa94e16fb","IPY_MODEL_1c24922d10f3432b946e100f0916d10f","IPY_MODEL_d933df499f4b44689731ed2f406dbd06"],"layout":"IPY_MODEL_fc792d8469244bf8beb68694ac9c2247"}},"ee21868c30da48f3a726d00aa94e16fb":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cb0459cce3bb443dbf52bd774fb38e7c","placeholder":"​","style":"IPY_MODEL_af52714bf7da4259af38be32e134500b","value":"tokenizer.model: 100%"}},"1c24922d10f3432b946e100f0916d10f":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_eb092f82f934401abe11e1927bdb67ec","max":499723,"min":0,"orientation":"horizontal","style":"IPY_MODEL_7d9a856ef4a744c8bc067511becc4c4f","value":499723}},"d933df499f4b44689731ed2f406dbd06":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_573c05051fea484b9bf80c2329722954","placeholder":"​","style":"IPY_MODEL_20aabf04bf80401b91e6b496715b4b38","value":" 500k/500k [00:00&lt;00:00, 9.21MB/s]"}},"fc792d8469244bf8beb68694ac9c2247":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cb0459cce3bb443dbf52bd774fb38e7c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"af52714bf7da4259af38be32e134500b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"eb092f82f934401abe11e1927bdb67ec":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7d9a856ef4a744c8bc067511becc4c4f":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"573c05051fea484b9bf80c2329722954":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"20aabf04bf80401b91e6b496715b4b38":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"563235b3e0b940e28f7b1db7d9fb0dd6":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e925bed5384a46beb7868b01fc3dfaa5","IPY_MODEL_db9cd0bdb4fc484c9fb86c7aace054d3","IPY_MODEL_c5b3d3f16c484ccbb9841ea4273507ba"],"layout":"IPY_MODEL_d60dd0a467b2453096780f0492ae3713"}},"e925bed5384a46beb7868b01fc3dfaa5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c19d9f3d0cfb4d3dabcf834d1a981183","placeholder":"​","style":"IPY_MODEL_90c303d171bf4e81b7a9d4cb0a8c01a9","value":"tokenizer.json: 100%"}},"db9cd0bdb4fc484c9fb86c7aace054d3":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1cbbae5516814f01a9937ec1c3c0a958","max":1842767,"min":0,"orientation":"horizontal","style":"IPY_MODEL_15219d4e86f344e59aa8ec0e06a8c00f","value":1842767}},"c5b3d3f16c484ccbb9841ea4273507ba":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8265a8bcfbeb488aa5b8314eedf2d1da","placeholder":"​","style":"IPY_MODEL_2150fa0816f540a8891101df810a74d7","value":" 1.84M/1.84M [00:00&lt;00:00, 5.90MB/s]"}},"d60dd0a467b2453096780f0492ae3713":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c19d9f3d0cfb4d3dabcf834d1a981183":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"90c303d171bf4e81b7a9d4cb0a8c01a9":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1cbbae5516814f01a9937ec1c3c0a958":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"15219d4e86f344e59aa8ec0e06a8c00f":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"8265a8bcfbeb488aa5b8314eedf2d1da":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2150fa0816f540a8891101df810a74d7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5dc57f5a22654629a21f60afa9ab1c10":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_85540848b9d249068457aabbf92f7623","IPY_MODEL_76aedc51330e48f49b993c4a821fb169","IPY_MODEL_1fbb7ab17c8b4ce986a527ea87f211f1"],"layout":"IPY_MODEL_87a83ee573b94a05a7c8f8fb9cf05c42"}},"85540848b9d249068457aabbf92f7623":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c7395d4e03df4b48bebc60b611fe90e2","placeholder":"​","style":"IPY_MODEL_92b19560083a449194194df7e6a39594","value":"special_tokens_map.json: 100%"}},"76aedc51330e48f49b993c4a821fb169":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_41e3bdbb58db4b3aa7cafbd45b2fec05","max":414,"min":0,"orientation":"horizontal","style":"IPY_MODEL_883f99ae637c4d1dbd4f156adae06b9e","value":414}},"1fbb7ab17c8b4ce986a527ea87f211f1":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6536ff9fce2d4c6996869d2e4c728d09","placeholder":"​","style":"IPY_MODEL_f855a8d5fb2a4663bbfa7f62a9a11256","value":" 414/414 [00:00&lt;00:00, 23.8kB/s]"}},"87a83ee573b94a05a7c8f8fb9cf05c42":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c7395d4e03df4b48bebc60b611fe90e2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"92b19560083a449194194df7e6a39594":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"41e3bdbb58db4b3aa7cafbd45b2fec05":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"883f99ae637c4d1dbd4f156adae06b9e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"6536ff9fce2d4c6996869d2e4c728d09":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f855a8d5fb2a4663bbfa7f62a9a11256":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a75d29a6a4a542cc9d983d135f896452":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0e132de8133f484f84c3b667445fb898","IPY_MODEL_4353c723283c42ed8b52599a27f87d40","IPY_MODEL_b14c8d721a7b49d18c604301bce869dc"],"layout":"IPY_MODEL_3432b14f2a3948328fff7c4cdb02c11c"}},"0e132de8133f484f84c3b667445fb898":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_336377a9609944a881f44b00baf5b120","placeholder":"​","style":"IPY_MODEL_c903216580b84471bf75d0effbd19e1b","value":"config.json: 100%"}},"4353c723283c42ed8b52599a27f87d40":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_73ca94ad90df4322b396d69b8e7c475f","max":614,"min":0,"orientation":"horizontal","style":"IPY_MODEL_93939ae3e4ee40328c7e0bc567c05d6b","value":614}},"b14c8d721a7b49d18c604301bce869dc":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4381d57318eb4458a94be8d5fd5b9804","placeholder":"​","style":"IPY_MODEL_c635bd55af4a42929edd21fcf88d2043","value":" 614/614 [00:00&lt;00:00, 42.2kB/s]"}},"3432b14f2a3948328fff7c4cdb02c11c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"336377a9609944a881f44b00baf5b120":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c903216580b84471bf75d0effbd19e1b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"73ca94ad90df4322b396d69b8e7c475f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"93939ae3e4ee40328c7e0bc567c05d6b":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4381d57318eb4458a94be8d5fd5b9804":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c635bd55af4a42929edd21fcf88d2043":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"909e95b5b2294883b3772c0f64f01e72":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_97fdcd4f8ff9454ea376fb8a4b522489","IPY_MODEL_b57b040489ad46edaa0c9194d1a1b345","IPY_MODEL_1d4f21af339a48668dc10caf75c32d65"],"layout":"IPY_MODEL_f937b50a13464e6b9671134184f8f9fb"}},"97fdcd4f8ff9454ea376fb8a4b522489":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9005020a11f948b580d01ff3d28b9858","placeholder":"​","style":"IPY_MODEL_d55098393fd54084ad860e18f12da71c","value":"model.safetensors.index.json: 100%"}},"b57b040489ad46edaa0c9194d1a1b345":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1d4a63596b96451fa82e3f6e2d494fc5","max":26788,"min":0,"orientation":"horizontal","style":"IPY_MODEL_107c67514a7d48d2807c00ebe34bbb13","value":26788}},"1d4f21af339a48668dc10caf75c32d65":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0c8644e5ba6f418aa62266e7eef0c880","placeholder":"​","style":"IPY_MODEL_d0eca9bc1cce4d279df3edd9f8590c3d","value":" 26.8k/26.8k [00:00&lt;00:00, 1.58MB/s]"}},"f937b50a13464e6b9671134184f8f9fb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9005020a11f948b580d01ff3d28b9858":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d55098393fd54084ad860e18f12da71c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1d4a63596b96451fa82e3f6e2d494fc5":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"107c67514a7d48d2807c00ebe34bbb13":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"0c8644e5ba6f418aa62266e7eef0c880":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d0eca9bc1cce4d279df3edd9f8590c3d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5f2c2d6a76604ef0bddb8ba297c6155c":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2aa1137044b545e59082b4cbdba3b462","IPY_MODEL_b5177ade69ad486a871f04ccfcd88ef0","IPY_MODEL_a36c07294c654d62b8bf2fae81b4fb0f"],"layout":"IPY_MODEL_93c7716aa0724fb4b8a199ba285888c7"}},"2aa1137044b545e59082b4cbdba3b462":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a881fa4a40494bd18d9feed19379c410","placeholder":"​","style":"IPY_MODEL_dd3da60ebca04e5781d053579f36ec05","value":"Downloading shards: 100%"}},"b5177ade69ad486a871f04ccfcd88ef0":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_04fe40daf9cc429986e8f8ad18249d2e","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_067a548e3d6b4962b035334c647a7667","value":2}},"a36c07294c654d62b8bf2fae81b4fb0f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4c215db6782b442bbad70340fc09e5b9","placeholder":"​","style":"IPY_MODEL_92027998723e42fea07476344f9c6eff","value":" 2/2 [01:41&lt;00:00, 45.37s/it]"}},"93c7716aa0724fb4b8a199ba285888c7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a881fa4a40494bd18d9feed19379c410":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dd3da60ebca04e5781d053579f36ec05":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"04fe40daf9cc429986e8f8ad18249d2e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"067a548e3d6b4962b035334c647a7667":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4c215db6782b442bbad70340fc09e5b9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"92027998723e42fea07476344f9c6eff":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e50129d86e644988936f5362acf6537a":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_20d993ee24d44ca2905c89cbd2659e12","IPY_MODEL_3ad59bdf9492408fb47543df97004e8d","IPY_MODEL_f8875b618f64437ab8d5a851fc4b84d8"],"layout":"IPY_MODEL_f6e9bc87a77f43f7808ad5ac7d964945"}},"20d993ee24d44ca2905c89cbd2659e12":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1eaf9e1882694114813e3bb3951ba267","placeholder":"​","style":"IPY_MODEL_897f15deb12b4c98859a860b6078cfcb","value":"model-00001-of-00002.safetensors: 100%"}},"3ad59bdf9492408fb47543df97004e8d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_c1639ecd520348f5b2e329158d6d9a61","max":9976576152,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5d53fd9a1d0449998c91549634053adb","value":9976576152}},"f8875b618f64437ab8d5a851fc4b84d8":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_daf26c6416c74c1cb81e09ebb7b390b0","placeholder":"​","style":"IPY_MODEL_51e459cb2c5c422da3dbb900a099ecde","value":" 9.98G/9.98G [01:20&lt;00:00, 186MB/s]"}},"f6e9bc87a77f43f7808ad5ac7d964945":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1eaf9e1882694114813e3bb3951ba267":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"897f15deb12b4c98859a860b6078cfcb":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c1639ecd520348f5b2e329158d6d9a61":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5d53fd9a1d0449998c91549634053adb":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"daf26c6416c74c1cb81e09ebb7b390b0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"51e459cb2c5c422da3dbb900a099ecde":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fec1280d8e3b4e0f9565474f3ca20fa7":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e8ede37f965c4a5e9ddd0ca34c17f2a5","IPY_MODEL_b77b7c7b9d9e4a8c967f0184c5b9650a","IPY_MODEL_7da3591ee67c49fd8eb1f0f31cba209f"],"layout":"IPY_MODEL_c316d0e7c5954cab893665ce460dcc2d"}},"e8ede37f965c4a5e9ddd0ca34c17f2a5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3e63face290649f59d59da7a60e8f61b","placeholder":"​","style":"IPY_MODEL_02ff093a4bc645ba90c8bcbcf79e79a9","value":"model-00002-of-00002.safetensors: 100%"}},"b77b7c7b9d9e4a8c967f0184c5b9650a":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_bcf2bb1d2e33459287b1fe15ce6506f2","max":3500296424,"min":0,"orientation":"horizontal","style":"IPY_MODEL_4fc1e747664c4d76b6dbfe5315b3d5a6","value":3500296424}},"7da3591ee67c49fd8eb1f0f31cba209f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cabc2e2daddf4532be42fcb690044c89","placeholder":"​","style":"IPY_MODEL_296e4cf8aeb6423698b84f06e5fbed52","value":" 3.50G/3.50G [00:20&lt;00:00, 249MB/s]"}},"c316d0e7c5954cab893665ce460dcc2d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3e63face290649f59d59da7a60e8f61b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"02ff093a4bc645ba90c8bcbcf79e79a9":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bcf2bb1d2e33459287b1fe15ce6506f2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4fc1e747664c4d76b6dbfe5315b3d5a6":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"cabc2e2daddf4532be42fcb690044c89":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"296e4cf8aeb6423698b84f06e5fbed52":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"03d48c4316634b919453c344d4cfc3be":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b3367521dc5b4685bac4ebe336df0c84","IPY_MODEL_56f9816029424a149459f79b46ca9bcc","IPY_MODEL_020da94052104c449448e06e45ec527a"],"layout":"IPY_MODEL_6b3ee4a0641a467e88f98856d664052d"}},"b3367521dc5b4685bac4ebe336df0c84":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_112f32ca92b64506834cffe9b1aff93a","placeholder":"​","style":"IPY_MODEL_95c024bcbd6b4b4594f01848442b93c8","value":"Loading checkpoint shards: 100%"}},"56f9816029424a149459f79b46ca9bcc":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_24e8f111b06c4d578285d96199a9f867","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_469d8ff05aef4e1dbb0ca90fb619dea0","value":2}},"020da94052104c449448e06e45ec527a":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bef72385a0c54d2298c3c2e7f39ffbc4","placeholder":"​","style":"IPY_MODEL_bc6c3079aeee404082974e22d8f7548d","value":" 2/2 [01:02&lt;00:00, 28.51s/it]"}},"6b3ee4a0641a467e88f98856d664052d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"112f32ca92b64506834cffe9b1aff93a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"95c024bcbd6b4b4594f01848442b93c8":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"24e8f111b06c4d578285d96199a9f867":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"469d8ff05aef4e1dbb0ca90fb619dea0":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"bef72385a0c54d2298c3c2e7f39ffbc4":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bc6c3079aeee404082974e22d8f7548d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bc3087c2127a4ab4bf03dfb472a88f74":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_8b7ff40bbf854e02a1b66cba3b277766","IPY_MODEL_55a7fe8715834b959d197dd0b246d2ff","IPY_MODEL_c5b2be258505494dbbb6c5cdddccf5e1"],"layout":"IPY_MODEL_dc19c31b90f147939c6f12db5de4c669"}},"8b7ff40bbf854e02a1b66cba3b277766":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1d47f136bc91430b919b6d2a9d598d6e","placeholder":"​","style":"IPY_MODEL_c26c97c1278c43bfbd9cc3a3e4bfb713","value":"generation_config.json: 100%"}},"55a7fe8715834b959d197dd0b246d2ff":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_2a2478163a31423caf5b7aa052982ef0","max":188,"min":0,"orientation":"horizontal","style":"IPY_MODEL_e04179179dbb43b0a447c89104fb6e68","value":188}},"c5b2be258505494dbbb6c5cdddccf5e1":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_03435912247041429054f7584c26de9d","placeholder":"​","style":"IPY_MODEL_54be3e2d69aa4821a320413ef0c14ceb","value":" 188/188 [00:00&lt;00:00, 14.1kB/s]"}},"dc19c31b90f147939c6f12db5de4c669":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1d47f136bc91430b919b6d2a9d598d6e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c26c97c1278c43bfbd9cc3a3e4bfb713":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2a2478163a31423caf5b7aa052982ef0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e04179179dbb43b0a447c89104fb6e68":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"03435912247041429054f7584c26de9d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"54be3e2d69aa4821a320413ef0c14ceb":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c98bf1666c184598adfc255ab05195c2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4f7489532ac042e88ad09736e41fb66b","IPY_MODEL_ecdbf8b345934dcd9e9ba85a06b0842d","IPY_MODEL_e036315a88174e48adb13985c4ab6bdd"],"layout":"IPY_MODEL_8b0860ced35e4df09b60fe75aab97008"}},"4f7489532ac042e88ad09736e41fb66b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_91fb9c6ff7fa40aba89d7b97b4994abd","placeholder":"​","style":"IPY_MODEL_3451c95f05eb4aaebbd5dd6899a1b037","value":"Map: 100%"}},"ecdbf8b345934dcd9e9ba85a06b0842d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_35c6a08754754ae29007ef3f780c6fb9","max":55,"min":0,"orientation":"horizontal","style":"IPY_MODEL_37f1d7146ae54ff2ad652601a72c08c1","value":55}},"e036315a88174e48adb13985c4ab6bdd":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b1587a0f4925466898458da3a144e83e","placeholder":"​","style":"IPY_MODEL_ad94094494724e6e84e45a555b14942e","value":" 55/55 [00:00&lt;00:00, 196.91 examples/s]"}},"8b0860ced35e4df09b60fe75aab97008":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"91fb9c6ff7fa40aba89d7b97b4994abd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3451c95f05eb4aaebbd5dd6899a1b037":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"35c6a08754754ae29007ef3f780c6fb9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"37f1d7146ae54ff2ad652601a72c08c1":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b1587a0f4925466898458da3a144e83e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ad94094494724e6e84e45a555b14942e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f539a13df6f9497d9f3660836c63c2db":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0bf2be8d558c445f9ff0fabddabccf0c","IPY_MODEL_eb407e2e46a14fd787e67b2cd2200ca9","IPY_MODEL_a6fe6e71435646558fe922816769daa0"],"layout":"IPY_MODEL_679b6748bf56482e8a2d56cee31d978a"}},"0bf2be8d558c445f9ff0fabddabccf0c":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7c0a6a0eb6fe423192b2d1c838f8cb2c","placeholder":"​","style":"IPY_MODEL_9e8e56fcae1e497baaa17ec425215c92","value":"Map: 100%"}},"eb407e2e46a14fd787e67b2cd2200ca9":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_61a36b916e9a47b2a8fe7da3d30d7efc","max":55,"min":0,"orientation":"horizontal","style":"IPY_MODEL_825d1191defb43a8ae46ff0bc9d84d09","value":55}},"a6fe6e71435646558fe922816769daa0":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e0639812a22145a2817dfb639f31d4d3","placeholder":"​","style":"IPY_MODEL_d74f96c06fbf41c7bd8d092a86b6b30e","value":" 55/55 [00:00&lt;00:00, 606.14 examples/s]"}},"679b6748bf56482e8a2d56cee31d978a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7c0a6a0eb6fe423192b2d1c838f8cb2c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9e8e56fcae1e497baaa17ec425215c92":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"61a36b916e9a47b2a8fe7da3d30d7efc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"825d1191defb43a8ae46ff0bc9d84d09":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e0639812a22145a2817dfb639f31d4d3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d74f96c06fbf41c7bd8d092a86b6b30e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"30c33b49ebc042b98f31ef08acaa98c6":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_d93e9e1d63f241af8478a3b183f3747d","IPY_MODEL_88d750727d3745e8a3dd53eef2b69e97","IPY_MODEL_10054d164dd04d519271a82d3605e714"],"layout":"IPY_MODEL_d1e9be0735fe4b6fa468cac57945b19e"}},"d93e9e1d63f241af8478a3b183f3747d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5fcd05acc87b491fad3c4c4c4b3e1d76","placeholder":"​","style":"IPY_MODEL_e8bac80e2a5f4fa79a65d21ff2bc9bfd","value":"Loading checkpoint shards: 100%"}},"88d750727d3745e8a3dd53eef2b69e97":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1c1afeec5aed4fd999e82c72425b2819","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_df762a20aa304cb887a29d20a861700e","value":2}},"10054d164dd04d519271a82d3605e714":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8dae3300b3e04e48a62a12296a3dd620","placeholder":"​","style":"IPY_MODEL_1bd79f581c7d4aa19b93d0f017a98a59","value":" 2/2 [01:02&lt;00:00, 28.73s/it]"}},"d1e9be0735fe4b6fa468cac57945b19e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5fcd05acc87b491fad3c4c4c4b3e1d76":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e8bac80e2a5f4fa79a65d21ff2bc9bfd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1c1afeec5aed4fd999e82c72425b2819":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"df762a20aa304cb887a29d20a861700e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"8dae3300b3e04e48a62a12296a3dd620":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1bd79f581c7d4aa19b93d0f017a98a59":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f3bfe6809ea841949bb0f33269034e02":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_51c450db060f4951bc8249ea782392d6","IPY_MODEL_6ecd5671a20648dea4bedda6113d23ff","IPY_MODEL_8d7c7bda8df345dd8c4aade9966965ab"],"layout":"IPY_MODEL_0e1dbb1e4b4a4f5c917833a57ae30a7c"}},"51c450db060f4951bc8249ea782392d6":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dfc0a9bb09a3451fb9e4d95b71c57cb8","placeholder":"​","style":"IPY_MODEL_48ffe0f9cb824cc0bf4c14d5e0bde442","value":"model-00002-of-00003.safetensors: 100%"}},"6ecd5671a20648dea4bedda6113d23ff":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_942acd91ec1a4a3cabd9dadda9b7a351","max":4947390768,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d858fa7276bf4176b0e46f9d5bc91e20","value":4947390768}},"8d7c7bda8df345dd8c4aade9966965ab":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7d3efa513dea4b8f98fbd074dcbfd9f3","placeholder":"​","style":"IPY_MODEL_1422fdeb60544da58446e8a2d242e445","value":" 4.95G/4.95G [02:19&lt;00:00, 36.9MB/s]"}},"0e1dbb1e4b4a4f5c917833a57ae30a7c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dfc0a9bb09a3451fb9e4d95b71c57cb8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"48ffe0f9cb824cc0bf4c14d5e0bde442":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"942acd91ec1a4a3cabd9dadda9b7a351":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d858fa7276bf4176b0e46f9d5bc91e20":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"7d3efa513dea4b8f98fbd074dcbfd9f3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1422fdeb60544da58446e8a2d242e445":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d9fca217b6fc4f48bd0134e411a2f909":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c53048378bd7436fb2181a5e0cc4ba61","IPY_MODEL_bfa91360a9f64b14922e29d0f7b9f7f8","IPY_MODEL_1feb9b56463f4a97a2d8d5fdf6b5cbef"],"layout":"IPY_MODEL_e2dec9930fb8423188556f35e943de84"}},"c53048378bd7436fb2181a5e0cc4ba61":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e549d7168dc145f9bf934a71916e080a","placeholder":"​","style":"IPY_MODEL_c50b57696f0047aa915fed04afec8e9a","value":"model-00001-of-00003.safetensors: 100%"}},"bfa91360a9f64b14922e29d0f7b9f7f8":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f7b225cef3864c1fabdc4445faf3b466","max":4938985248,"min":0,"orientation":"horizontal","style":"IPY_MODEL_e76ad4f8dbf14d59a5f3aadfab84b9dc","value":4938985248}},"1feb9b56463f4a97a2d8d5fdf6b5cbef":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cd23dc7eeae34aa3afbea9c976fc64f0","placeholder":"​","style":"IPY_MODEL_e3ba393293b7488ea626a6b650420a92","value":" 4.94G/4.94G [02:28&lt;00:00, 38.0MB/s]"}},"e2dec9930fb8423188556f35e943de84":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e549d7168dc145f9bf934a71916e080a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c50b57696f0047aa915fed04afec8e9a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f7b225cef3864c1fabdc4445faf3b466":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e76ad4f8dbf14d59a5f3aadfab84b9dc":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"cd23dc7eeae34aa3afbea9c976fc64f0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e3ba393293b7488ea626a6b650420a92":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6326507477cb4933999f525889aad2ec":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_d42b2763130f415f98b98713144ff757","IPY_MODEL_18b653d5bbc24a69a3923cfd26995e5a","IPY_MODEL_413a9764e03a436084f6d5a957c1dc52"],"layout":"IPY_MODEL_572d68d016674e508d4258ba047f0529"}},"d42b2763130f415f98b98713144ff757":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dff83433f49445988e0fb0028bbdc8c4","placeholder":"​","style":"IPY_MODEL_55b77507e6564bcfa2d7dd0ef125faf8","value":"model-00003-of-00003.safetensors: 100%"}},"18b653d5bbc24a69a3923cfd26995e5a":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1cc69198c30b427aacc5c5ad42a5d1ac","max":3590488736,"min":0,"orientation":"horizontal","style":"IPY_MODEL_648542e8a44245839e32fb997f9a1941","value":3590488736}},"413a9764e03a436084f6d5a957c1dc52":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_01c0515f6f0f454183b69e6e8da0a9db","placeholder":"​","style":"IPY_MODEL_91f465f87b6c4871be25ce5fb189ebba","value":" 3.59G/3.59G [01:47&lt;00:00, 29.5MB/s]"}},"572d68d016674e508d4258ba047f0529":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dff83433f49445988e0fb0028bbdc8c4":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"55b77507e6564bcfa2d7dd0ef125faf8":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1cc69198c30b427aacc5c5ad42a5d1ac":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"648542e8a44245839e32fb997f9a1941":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"01c0515f6f0f454183b69e6e8da0a9db":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"91f465f87b6c4871be25ce5fb189ebba":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7c55b758c41b419ead52bc8f7cbbbc57":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_fe0da59a6c074478a080c547c4f93b32","IPY_MODEL_6a8706e3dd234a3f999e4984de8363ed","IPY_MODEL_57736888a34346f780d376cfcb46ce75"],"layout":"IPY_MODEL_c5af9dc16a324db6a98cc53c82e9e161"}},"fe0da59a6c074478a080c547c4f93b32":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_64f44d7e27874c9e963fd7a2758434af","placeholder":"​","style":"IPY_MODEL_f1bd8ba112be42ee81a3d6ad96a0f382","value":"Upload 3 LFS files: 100%"}},"6a8706e3dd234a3f999e4984de8363ed":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_42bd12f0797c45b281861e38d5b708b4","max":3,"min":0,"orientation":"horizontal","style":"IPY_MODEL_665b236679b84ee6a66d6b23dd807073","value":3}},"57736888a34346f780d376cfcb46ce75":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8c73ea8a3fcd4a938f25bc1354db7818","placeholder":"​","style":"IPY_MODEL_316771e899204e839390d5b0727b0108","value":" 3/3 [02:28&lt;00:00, 38.33s/it]"}},"c5af9dc16a324db6a98cc53c82e9e161":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"64f44d7e27874c9e963fd7a2758434af":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f1bd8ba112be42ee81a3d6ad96a0f382":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"42bd12f0797c45b281861e38d5b708b4":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"665b236679b84ee6a66d6b23dd807073":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"8c73ea8a3fcd4a938f25bc1354db7818":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"316771e899204e839390d5b0727b0108":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"806bff9547164ae08291778f983b9790":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6811f1e76e974aa58034852c20fdd613","IPY_MODEL_e15f1f22121e4d11b4bf1681c3d7e84c","IPY_MODEL_6fa7416ef4224e4e8ec474fd6fb45cf4"],"layout":"IPY_MODEL_8544d67df06a45e18f05fccedb693544"}},"6811f1e76e974aa58034852c20fdd613":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_44963c9e7a5f46f283e84e432bfa941c","placeholder":"​","style":"IPY_MODEL_7b27ccff92fa48ef88001c6a2c7c52bc","value":"README.md: 100%"}},"e15f1f22121e4d11b4bf1681c3d7e84c":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_09381f4ffe0447368ab20185c87edd21","max":5178,"min":0,"orientation":"horizontal","style":"IPY_MODEL_93b84c8ebd5c48159545af85314789f7","value":5178}},"6fa7416ef4224e4e8ec474fd6fb45cf4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_23d0643de9b94c9cb4b7bba21611008a","placeholder":"​","style":"IPY_MODEL_3d04c36ebc184f5a8c3244b5a543eaaf","value":" 5.18k/5.18k [00:00&lt;00:00, 254kB/s]"}},"8544d67df06a45e18f05fccedb693544":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"44963c9e7a5f46f283e84e432bfa941c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7b27ccff92fa48ef88001c6a2c7c52bc":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"09381f4ffe0447368ab20185c87edd21":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"93b84c8ebd5c48159545af85314789f7":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"23d0643de9b94c9cb4b7bba21611008a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3d04c36ebc184f5a8c3244b5a543eaaf":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"markdown","source":["## First fully-functional model, pioneering the GGUF format in a proof of concept"],"metadata":{"id":"XP_qPvkj79oG"}},{"cell_type":"markdown","source":["---"],"metadata":{"id":"mDfZs-Vb75Lc"}},{"cell_type":"markdown","source":["# Fine-Tuning the Llama 2 Model\n","\n","Our approach employs Supervised Fine-Tuning (SFT) to optimize the Llama 2 model. Key details of this process include:\n","\n","- **Supervised Fine-Tuning (SFT)**: This method involves training the model on a curated dataset comprising specific instructions paired with corresponding responses. The primary objective is to fine-tune the model's parameters, effectively reducing the discrepancy between its generated answers and the provided ground-truth responses. These ground-truth responses serve as labels, guiding the model towards more accurate and contextually appropriate outputs.\n"],"metadata":{"id":"OSHlAbqzDFDq"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"GLXwJqbjtPho","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1706609428241,"user_tz":-480,"elapsed":48635,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"fb4874b0-de98-4ddc-9e86-2b86e7d38efe"},"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.4/8.4 MB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m270.9/270.9 kB\u001b[0m \u001b[31m22.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.2/183.2 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m150.9/150.9 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.0/105.0 MB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.7/79.7 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.4/196.4 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m254.1/254.1 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h"]}],"source":["# Install necessary libraries for the project: transformers, datasets, accelerate, peft, trl, bitsandbytes, and wandb\n","!pip install -q -U transformers datasets accelerate peft trl bitsandbytes wandb"]},{"cell_type":"code","source":["# Operating System and Core Machine Learning Libraries\n","import os\n","import torch\n","\n","\n","# Dataset Handling\n","from datasets import load_dataset\n","\n","# Transformer Models and Tokenization\n","from transformers import (\n","    AutoModelForCausalLM,\n","    AutoTokenizer,\n","    BitsAndBytesConfig,    # Configuration class for BitsAndBytes optimization\n","    TrainingArguments,     # Class for setting up training hyperparameters\n","    pipeline               # Utility for easy model inference deployment\n",")\n","\n","# Advanced Fine-Tuning and Optimization Techniques\n","from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training  # Classes for Parameter-efficient Fine-tuning (PEFT)\n","from trl import SFTTrainer  # Trainer class for Supervised Fine-Tuning (SFT) within Text Reinforcement Learning (TRL) framework"],"metadata":{"id":"nAMzy_0FtaUZ"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Import the notebook_login function for Hugging Face Hub authentication\n","from huggingface_hub import notebook_login\n","\n","# Execute the function to log in to Hugging Face Hub within the notebook environment\n","notebook_login()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":160,"referenced_widgets":["ab4ff66efeac49d19ace40170dc0c36a","f4c80534f940422e861d43b823c6e3a5","4b8662da0d16454e9f1332b240fff5ab","4815248074fa445c8b0a6edd889e2bf4","b9b93ec3eff34a71b5d7fbf09550e1a3","abcf4756859a46a8964fe19d61503ed6","fd55806b261e40bab129ef627a34cb9b","436cddbaef254cac8d60ec557d7f957b","c35ab46f0f6b4e6a9e8bb88deb2213a1","117217ad6da442329d584e9fb2ee57d9","287bce8803534ee3a1828138b163e38a","ea40ca5c71cc4eaf8108b0378845afa3","a314995ef8c249a0b65c3ca5287795e6","2b3b753e05b0441bbf172f018d8f5ed2","c4392be8e95442caa0c306f81db40b53","0ffb8d43c247449d992a59347a316380","b7537278046747dda06aa01d23a7e252","7534b78e7bc34622869e0d7d10461351","c2d028b601aa44bb9ade3a4d8dd05af1","44a6ddf7951a4820b2681212fa2e2087","9ec9f592bd894cf5ba12ea6562930e34","6fc0fb61fb574cb69e76711ba44d8a9a","a8b4824053344a3995aabd6113b9b3c9","773ae1c1178e4dd3baccc0d69f30e677","d85f75c94669478c9a1cbded4291c3cb","a6674ea434524c588508874e83dca93d","e8e4dfc6714e46b7a4e5c6e3bc7278af","105c2e2829314849b1d01a998eaded9a","36852cc01640458ba09420a58c425ffd","38af97c64fd744c295fff526ef15fc8b","03f2a4340a11450e9494653fdb4de62a","db9a7d6f7fe6422aa23ea8cf45dd8779"]},"id":"_ehhhUGo5APj","executionInfo":{"status":"ok","timestamp":1706609446788,"user_tz":-480,"elapsed":16,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"07b6221c-7ff0-4f3e-a5bc-8217a8589126"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ab4ff66efeac49d19ace40170dc0c36a"}},"metadata":{}}]},{"cell_type":"markdown","source":["## Fine-tuning Llama 2 model\n","\n","We have three options when it comes to supervised fine-tuning: full fine-tuning, LoRA, and QLoRA.\n","\n","![](https://i.imgur.com/7pu5zUe.png)\n","\n","\n","* In this section, we will fine-tune a Llama 2 model, which has 7 billion parameters, on a T4 GPU using Google Colab.\n","\n","* Note that a T4 GPU comes with only 16 GB of VRAM, which is just enough to store the weights of Llama 2-7b (7 billion parameters × 2 bytes per parameter = 14 GB, in FP16 format).\n","\n","* Additionally, we must account for the memory overhead caused by optimizer states, gradients, and forward activations.\n","\n","* To significantly reduce VRAM usage, we will fine-tune the model using 4-bit precision. This is the primary reason for choosing QLoRA in our approach."],"metadata":{"id":"AC40us6jfO_G"}},{"cell_type":"code","source":["# Setup for model: Define base and new model names\n","base_model = \"meta-llama/Llama-2-7b-chat-hf\"\n","new_model = \"llama-2-7b-mini-ibased\"\n","\n","# Load the training dataset from Hugging Face's datasets library\n","dataset = load_dataset(\"ssoh/mini-ibased-dataset\", split=\"train\")\n","\n","# Initialize the tokenizer for the base model and set up padding configurations\n","tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)\n","tokenizer.pad_token = tokenizer.unk_token  # Use unknown token as padding token\n","tokenizer.padding_side = \"right\"  # Set padding to the right side of sequences"],"metadata":{"id":"_iRQ0JEQ4hL6","colab":{"base_uri":"https://localhost:8080/","height":365,"referenced_widgets":["2a1d3a4a2547428d9747a7bee4dc7eda","2dad29c1b5e94b41bc88e90d70754dea","0dac39da8ef0424ea19a5aab6fe2cff0","00ed8c45868742118b6b2cc6a02e1027","e73112cabaed42dd9946d4d86f16eee8","b364145a48ec45c99cc3adba97fb612b","41bf12fc48134561ab04ebee7db2dec2","3e05be327d494424858f635d0296f211","75973f4dda984c5b895ee02ed87d0674","3f43e6ea88cc428cb93867922cf6cccb","d2c34c75928a4e50b6f7db95a0f37407","37acf9dddda14cd7a62379ab0c359e56","8543570210f04d679d89e44084b33270","024023fb7c2d4f0baaee26a30188c189","e240e800d1394d45b58046f0b2c9e0ca","0d0d766422634d49a920aa6e96fd0848","99ec8b2cb258486ba4daf24c10e7c2f0","32c8dbc8ba504c34a38c315e5a100126","acf26e09c7ae4c77acbb0c7adf5b6ac5","610c14a14bd84b9eb83d26ce7ecfe723","aa37bcfdff0e418aa93986db50cf9458","5bba14781afb4e87a700fafc5c00168c","d63ee6fbf2044f2eb9708d667985528c","59a7bc95ef0c4af3a3c84f1cdc6adecf","f775791c0fbb4f68adfd140fcade9c39","6afba4c004194414bc1ffb3b54bde31f","a0c64e30e23c4cb6ae69559cfbf91cbd","bf93b4a0020a42688f119f2726d21d98","cff662941eb84b38a16f2ab38a291500","2b2fe82c802841549995d8437392c762","0c37faa85f73496488fa4ed3c797ca56","4a9ca72f6cf9440081765b2bf980bbca","2e803da642db4432afe97fe13a8f3bfd","571a411bc41a4a80bfe3109a23dd9571","884f0d88224444c3866c1fa435c2a611","71fa6659371d46eea2ca8ea3dbf9cf4b","50cd1d1d81694f7aae31ed36e481cbe4","cc34b4bda6584fcc80d699ead4f54364","4bb4190fc0e345f0b585814e577b3512","ac286a893e124cf2a858bf31b81c316c","f8b4a919b74540dabf7692d1d727f1ac","c7ac8409845741d5a9559bd5093fa0af","496286bf844d4457b03ff62456f77416","10b674d0db7f4fe09dfd2c44dda1662b","7bba3fff75db45c7be7253d15be86788","ee21868c30da48f3a726d00aa94e16fb","1c24922d10f3432b946e100f0916d10f","d933df499f4b44689731ed2f406dbd06","fc792d8469244bf8beb68694ac9c2247","cb0459cce3bb443dbf52bd774fb38e7c","af52714bf7da4259af38be32e134500b","eb092f82f934401abe11e1927bdb67ec","7d9a856ef4a744c8bc067511becc4c4f","573c05051fea484b9bf80c2329722954","20aabf04bf80401b91e6b496715b4b38","563235b3e0b940e28f7b1db7d9fb0dd6","e925bed5384a46beb7868b01fc3dfaa5","db9cd0bdb4fc484c9fb86c7aace054d3","c5b3d3f16c484ccbb9841ea4273507ba","d60dd0a467b2453096780f0492ae3713","c19d9f3d0cfb4d3dabcf834d1a981183","90c303d171bf4e81b7a9d4cb0a8c01a9","1cbbae5516814f01a9937ec1c3c0a958","15219d4e86f344e59aa8ec0e06a8c00f","8265a8bcfbeb488aa5b8314eedf2d1da","2150fa0816f540a8891101df810a74d7","5dc57f5a22654629a21f60afa9ab1c10","85540848b9d249068457aabbf92f7623","76aedc51330e48f49b993c4a821fb169","1fbb7ab17c8b4ce986a527ea87f211f1","87a83ee573b94a05a7c8f8fb9cf05c42","c7395d4e03df4b48bebc60b611fe90e2","92b19560083a449194194df7e6a39594","41e3bdbb58db4b3aa7cafbd45b2fec05","883f99ae637c4d1dbd4f156adae06b9e","6536ff9fce2d4c6996869d2e4c728d09","f855a8d5fb2a4663bbfa7f62a9a11256"]},"executionInfo":{"status":"ok","timestamp":1706609518652,"user_tz":-480,"elapsed":6165,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"4a69a3ae-139d-46d4-f4e7-c43df007e681"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n","The secret `HF_TOKEN` does not exist in your Colab secrets.\n","To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n","You will be able to reuse this secret in all of your notebooks.\n","Please note that authentication is recommended but still optional to access public models or datasets.\n","  warnings.warn(\n"]},{"output_type":"display_data","data":{"text/plain":["Downloading readme:   0%|          | 0.00/308 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2a1d3a4a2547428d9747a7bee4dc7eda"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading data:   0%|          | 0.00/28.3k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"37acf9dddda14cd7a62379ab0c359e56"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Generating train split:   0%|          | 0/55 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d63ee6fbf2044f2eb9708d667985528c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"571a411bc41a4a80bfe3109a23dd9571"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"7bba3fff75db45c7be7253d15be86788"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"563235b3e0b940e28f7b1db7d9fb0dd6"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5dc57f5a22654629a21f60afa9ab1c10"}},"metadata":{}}]},{"cell_type":"code","source":["# Quantization configuration\n","bnb_config = BitsAndBytesConfig(\n","    load_in_4bit=True,\n","    bnb_4bit_quant_type=\"nf4\",\n","    bnb_4bit_compute_dtype=torch.float16,\n","    bnb_4bit_use_double_quant=True,\n",")\n","\n","# LoRA configuration\n","peft_config = LoraConfig(\n","    r=16,\n","    lora_alpha=32,\n","    lora_dropout=0.05,\n","    bias=\"none\",\n","    task_type=\"CAUSAL_LM\",\n","    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']\n",")\n","\n","# Load base moodel\n","model = AutoModelForCausalLM.from_pretrained(\n","    base_model,\n","    quantization_config=bnb_config,\n","    device_map={\"\": 0}\n",")\n","\n","# Cast the layernorm in fp32, make output embedding layer require grads, add the upcasting of the lmhead to fp32\n","model = prepare_model_for_kbit_training(model)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":241,"referenced_widgets":["a75d29a6a4a542cc9d983d135f896452","0e132de8133f484f84c3b667445fb898","4353c723283c42ed8b52599a27f87d40","b14c8d721a7b49d18c604301bce869dc","3432b14f2a3948328fff7c4cdb02c11c","336377a9609944a881f44b00baf5b120","c903216580b84471bf75d0effbd19e1b","73ca94ad90df4322b396d69b8e7c475f","93939ae3e4ee40328c7e0bc567c05d6b","4381d57318eb4458a94be8d5fd5b9804","c635bd55af4a42929edd21fcf88d2043","909e95b5b2294883b3772c0f64f01e72","97fdcd4f8ff9454ea376fb8a4b522489","b57b040489ad46edaa0c9194d1a1b345","1d4f21af339a48668dc10caf75c32d65","f937b50a13464e6b9671134184f8f9fb","9005020a11f948b580d01ff3d28b9858","d55098393fd54084ad860e18f12da71c","1d4a63596b96451fa82e3f6e2d494fc5","107c67514a7d48d2807c00ebe34bbb13","0c8644e5ba6f418aa62266e7eef0c880","d0eca9bc1cce4d279df3edd9f8590c3d","5f2c2d6a76604ef0bddb8ba297c6155c","2aa1137044b545e59082b4cbdba3b462","b5177ade69ad486a871f04ccfcd88ef0","a36c07294c654d62b8bf2fae81b4fb0f","93c7716aa0724fb4b8a199ba285888c7","a881fa4a40494bd18d9feed19379c410","dd3da60ebca04e5781d053579f36ec05","04fe40daf9cc429986e8f8ad18249d2e","067a548e3d6b4962b035334c647a7667","4c215db6782b442bbad70340fc09e5b9","92027998723e42fea07476344f9c6eff","e50129d86e644988936f5362acf6537a","20d993ee24d44ca2905c89cbd2659e12","3ad59bdf9492408fb47543df97004e8d","f8875b618f64437ab8d5a851fc4b84d8","f6e9bc87a77f43f7808ad5ac7d964945","1eaf9e1882694114813e3bb3951ba267","897f15deb12b4c98859a860b6078cfcb","c1639ecd520348f5b2e329158d6d9a61","5d53fd9a1d0449998c91549634053adb","daf26c6416c74c1cb81e09ebb7b390b0","51e459cb2c5c422da3dbb900a099ecde","fec1280d8e3b4e0f9565474f3ca20fa7","e8ede37f965c4a5e9ddd0ca34c17f2a5","b77b7c7b9d9e4a8c967f0184c5b9650a","7da3591ee67c49fd8eb1f0f31cba209f","c316d0e7c5954cab893665ce460dcc2d","3e63face290649f59d59da7a60e8f61b","02ff093a4bc645ba90c8bcbcf79e79a9","bcf2bb1d2e33459287b1fe15ce6506f2","4fc1e747664c4d76b6dbfe5315b3d5a6","cabc2e2daddf4532be42fcb690044c89","296e4cf8aeb6423698b84f06e5fbed52","03d48c4316634b919453c344d4cfc3be","b3367521dc5b4685bac4ebe336df0c84","56f9816029424a149459f79b46ca9bcc","020da94052104c449448e06e45ec527a","6b3ee4a0641a467e88f98856d664052d","112f32ca92b64506834cffe9b1aff93a","95c024bcbd6b4b4594f01848442b93c8","24e8f111b06c4d578285d96199a9f867","469d8ff05aef4e1dbb0ca90fb619dea0","bef72385a0c54d2298c3c2e7f39ffbc4","bc6c3079aeee404082974e22d8f7548d","bc3087c2127a4ab4bf03dfb472a88f74","8b7ff40bbf854e02a1b66cba3b277766","55a7fe8715834b959d197dd0b246d2ff","c5b2be258505494dbbb6c5cdddccf5e1","dc19c31b90f147939c6f12db5de4c669","1d47f136bc91430b919b6d2a9d598d6e","c26c97c1278c43bfbd9cc3a3e4bfb713","2a2478163a31423caf5b7aa052982ef0","e04179179dbb43b0a447c89104fb6e68","03435912247041429054f7584c26de9d","54be3e2d69aa4821a320413ef0c14ceb"]},"id":"WZbR1IEL4g8G","executionInfo":{"status":"ok","timestamp":1706609691881,"user_tz":-480,"elapsed":168410,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"c4cb18f8-73b6-480b-bfd5-c40981c08a9a"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a75d29a6a4a542cc9d983d135f896452"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"909e95b5b2294883b3772c0f64f01e72"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5f2c2d6a76604ef0bddb8ba297c6155c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e50129d86e644988936f5362acf6537a"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"fec1280d8e3b4e0f9565474f3ca20fa7"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"03d48c4316634b919453c344d4cfc3be"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"bc3087c2127a4ab4bf03dfb472a88f74"}},"metadata":{}}]},{"cell_type":"code","source":["print(model)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"qWrQM0Kowyd5","executionInfo":{"status":"ok","timestamp":1706609691881,"user_tz":-480,"elapsed":6,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"f5182d46-ed26-436e-8761-6b1aed36be0f"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["LlamaForCausalLM(\n","  (model): LlamaModel(\n","    (embed_tokens): Embedding(32000, 4096)\n","    (layers): ModuleList(\n","      (0-31): 32 x LlamaDecoderLayer(\n","        (self_attn): LlamaAttention(\n","          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n","          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n","          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n","          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n","          (rotary_emb): LlamaRotaryEmbedding()\n","        )\n","        (mlp): LlamaMLP(\n","          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)\n","          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)\n","          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)\n","          (act_fn): SiLU()\n","        )\n","        (input_layernorm): LlamaRMSNorm()\n","        (post_attention_layernorm): LlamaRMSNorm()\n","      )\n","    )\n","    (norm): LlamaRMSNorm()\n","  )\n","  (lm_head): Linear(in_features=4096, out_features=32000, bias=False)\n",")\n"]}]},{"cell_type":"code","source":["# Set training arguments\n","training_arguments = TrainingArguments(\n","        output_dir=\"./results\",\n","        num_train_epochs=20,\n","        per_device_train_batch_size=10,\n","        gradient_accumulation_steps=1,\n","        evaluation_strategy=\"steps\",\n","        eval_steps=10,\n","        logging_steps=1,\n","        optim=\"paged_adamw_8bit\",\n","        learning_rate=2e-4,\n","        lr_scheduler_type=\"linear\",\n","        warmup_steps=10,\n","        report_to=\"wandb\",\n","        # max_steps=2, # Remove this line for a real fine-tuning\n",")\n","\n","# Set supervised fine-tuning parameters\n","trainer = SFTTrainer(\n","    model=model,\n","    train_dataset=dataset,\n","    eval_dataset=dataset,\n","    peft_config=peft_config,\n","    dataset_text_field=\"instruction\",\n","    max_seq_length=512,\n","    tokenizer=tokenizer,\n","    args=training_arguments,\n",")\n","\n","# Train model\n","trainer.train()\n","\n","# Save trained model\n","trainer.model.save_pretrained(new_model)"],"metadata":{"id":"OJXpOgBFuSrc","colab":{"base_uri":"https://localhost:8080/","height":761,"referenced_widgets":["c98bf1666c184598adfc255ab05195c2","4f7489532ac042e88ad09736e41fb66b","ecdbf8b345934dcd9e9ba85a06b0842d","e036315a88174e48adb13985c4ab6bdd","8b0860ced35e4df09b60fe75aab97008","91fb9c6ff7fa40aba89d7b97b4994abd","3451c95f05eb4aaebbd5dd6899a1b037","35c6a08754754ae29007ef3f780c6fb9","37f1d7146ae54ff2ad652601a72c08c1","b1587a0f4925466898458da3a144e83e","ad94094494724e6e84e45a555b14942e","f539a13df6f9497d9f3660836c63c2db","0bf2be8d558c445f9ff0fabddabccf0c","eb407e2e46a14fd787e67b2cd2200ca9","a6fe6e71435646558fe922816769daa0","679b6748bf56482e8a2d56cee31d978a","7c0a6a0eb6fe423192b2d1c838f8cb2c","9e8e56fcae1e497baaa17ec425215c92","61a36b916e9a47b2a8fe7da3d30d7efc","825d1191defb43a8ae46ff0bc9d84d09","e0639812a22145a2817dfb639f31d4d3","d74f96c06fbf41c7bd8d092a86b6b30e"]},"executionInfo":{"status":"ok","timestamp":1706610620979,"user_tz":-480,"elapsed":911459,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"a4c1df72-b5a0-43e3-902d-7f634067958b"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["Map:   0%|          | 0/55 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c98bf1666c184598adfc255ab05195c2"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Map:   0%|          | 0/55 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f539a13df6f9497d9f3660836c63c2db"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.Javascript object>"],"application/javascript":["\n","        window._wandbApiKey = new Promise((resolve, reject) => {\n","            function loadScript(url) {\n","            return new Promise(function(resolve, reject) {\n","                let newScript = document.createElement(\"script\");\n","                newScript.onerror = reject;\n","                newScript.onload = resolve;\n","                document.body.appendChild(newScript);\n","                newScript.src = url;\n","            });\n","            }\n","            loadScript(\"https://cdn.jsdelivr.net/npm/postmate/build/postmate.min.js\").then(() => {\n","            const iframe = document.createElement('iframe')\n","            iframe.style.cssText = \"width:0;height:0;border:none\"\n","            document.body.appendChild(iframe)\n","            const handshake = new Postmate({\n","                container: iframe,\n","                url: 'https://wandb.ai/authorize'\n","            });\n","            const timeout = setTimeout(() => reject(\"Couldn't auto authenticate\"), 5000)\n","            handshake.then(function(child) {\n","                child.on('authorize', data => {\n","                    clearTimeout(timeout)\n","                    resolve(data)\n","                });\n","            });\n","            })\n","        });\n","    "]},"metadata":{}},{"output_type":"stream","name":"stderr","text":["\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n","\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n","wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:"]},{"name":"stdout","output_type":"stream","text":[" ··········\n"]},{"output_type":"stream","name":"stderr","text":["\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["Tracking run with wandb version 0.16.2"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["Run data is saved locally in <code>/content/wandb/run-20240130_101553-qyr68rd4</code>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["Syncing run <strong><a href='https://wandb.ai/szehanz/huggingface/runs/qyr68rd4' target=\"_blank\">brisk-bush-5</a></strong> to <a href='https://wandb.ai/szehanz/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[" View project at <a href='https://wandb.ai/szehanz/huggingface' target=\"_blank\">https://wandb.ai/szehanz/huggingface</a>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[" View run at <a href='https://wandb.ai/szehanz/huggingface/runs/qyr68rd4' target=\"_blank\">https://wandb.ai/szehanz/huggingface/runs/qyr68rd4</a>"]},"metadata":{}},{"output_type":"stream","name":"stderr","text":["`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...\n","/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n","  warnings.warn(\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n","    <div>\n","      \n","      <progress value='120' max='120' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [120/120 14:15, Epoch 20/20]\n","    </div>\n","    <table border=\"1\" class=\"dataframe\">\n","  <thead>\n"," <tr style=\"text-align: left;\">\n","      <th>Step</th>\n","      <th>Training Loss</th>\n","      <th>Validation Loss</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <td>10</td>\n","      <td>1.505600</td>\n","      <td>1.263215</td>\n","    </tr>\n","    <tr>\n","      <td>20</td>\n","      <td>0.695100</td>\n","      <td>0.531458</td>\n","    </tr>\n","    <tr>\n","      <td>30</td>\n","      <td>0.320900</td>\n","      <td>0.222908</td>\n","    </tr>\n","    <tr>\n","      <td>40</td>\n","      <td>0.228900</td>\n","      <td>0.211581</td>\n","    </tr>\n","    <tr>\n","      <td>50</td>\n","      <td>0.162800</td>\n","      <td>0.165926</td>\n","    </tr>\n","    <tr>\n","      <td>60</td>\n","      <td>0.176000</td>\n","      <td>0.142462</td>\n","    </tr>\n","    <tr>\n","      <td>70</td>\n","      <td>0.156200</td>\n","      <td>0.139793</td>\n","    </tr>\n","    <tr>\n","      <td>80</td>\n","      <td>0.141100</td>\n","      <td>0.131930</td>\n","    </tr>\n","    <tr>\n","      <td>90</td>\n","      <td>0.125100</td>\n","      <td>0.126040</td>\n","    </tr>\n","    <tr>\n","      <td>100</td>\n","      <td>0.116700</td>\n","      <td>0.124458</td>\n","    </tr>\n","    <tr>\n","      <td>110</td>\n","      <td>0.128800</td>\n","      <td>0.122401</td>\n","    </tr>\n","    <tr>\n","      <td>120</td>\n","      <td>0.119300</td>\n","      <td>0.120288</td>\n","    </tr>\n","  </tbody>\n","</table><p>"]},"metadata":{}}]},{"cell_type":"code","source":["# Run text generation pipeline with our model\n","prompt = \"What is a large language model?\"\n","instruction = f\"### Instruction:\\n{prompt}\\n\\n### Response:\\n\"\n","pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=128)\n","result = pipe(instruction)\n","\n","# Extract and print the generated text, removing the part that includes and follows the \"### Response:\\n\" placeholder\n","generated_text = result[0]['generated_text']\n","response_start = generated_text.find(\"### Response:\\n\") + len(\"### Response:\\n\")\n","response_end = generated_text.find(\"### Instruction:\", response_start)\n","print(generated_text[response_start:response_end if response_end != -1 else None].strip())"],"metadata":{"id":"frlSLPin4IJ4","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1706610659938,"user_tz":-480,"elapsed":30068,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"54e17fee-1deb-47dd-9ef2-17f1967428f4"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["A large language model is a type of artificial intelligence model that is trained on a large dataset of text to generate language outputs that are coherent and natural-sounding. These models are designed to capture the complexity and diversity of language, and can be used for a variety of tasks such as language translation, text summarization, and language generation.\n"]}]},{"cell_type":"code","source":["# Empty VRAM\n","del model\n","del pipe\n","del trainer\n","import gc\n","gc.collect()\n","gc.collect()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"mkQCviG0Zta-","executionInfo":{"status":"ok","timestamp":1706610669238,"user_tz":-480,"elapsed":869,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"b8c107d8-1a31-4924-aba4-f7f81bc78f15"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0"]},"metadata":{},"execution_count":9}]},{"cell_type":"markdown","source":["Merging the base model with the trained adapter."],"metadata":{"id":"_g0fB7P9s0ol"}},{"cell_type":"code","source":["# Reload model in FP16 and merge it with LoRA weights\n","model = AutoModelForCausalLM.from_pretrained(\n","    base_model,\n","    low_cpu_mem_usage=True,\n","    return_dict=True,\n","    torch_dtype=torch.float16,\n","    device_map={\"\": 0},\n",")\n","model = PeftModel.from_pretrained(model, new_model)\n","model = model.merge_and_unload()\n","\n","\n","# Reload tokenizer to save it\n","tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)\n","tokenizer.pad_token = tokenizer.eos_token\n","tokenizer.padding_side = \"right\""],"metadata":{"id":"QQn30cRtAZ-P","colab":{"base_uri":"https://localhost:8080/","height":49,"referenced_widgets":["30c33b49ebc042b98f31ef08acaa98c6","d93e9e1d63f241af8478a3b183f3747d","88d750727d3745e8a3dd53eef2b69e97","10054d164dd04d519271a82d3605e714","d1e9be0735fe4b6fa468cac57945b19e","5fcd05acc87b491fad3c4c4c4b3e1d76","e8bac80e2a5f4fa79a65d21ff2bc9bfd","1c1afeec5aed4fd999e82c72425b2819","df762a20aa304cb887a29d20a861700e","8dae3300b3e04e48a62a12296a3dd620","1bd79f581c7d4aa19b93d0f017a98a59"]},"executionInfo":{"status":"ok","timestamp":1706610740493,"user_tz":-480,"elapsed":65433,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"9860fb19-61e3-4b82-c299-05e3ea9c41bf"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"30c33b49ebc042b98f31ef08acaa98c6"}},"metadata":{}}]},{"cell_type":"markdown","source":["Push the model and tokenizer to the Hugging Face Hub."],"metadata":{"id":"n4_wCHy_s--5"}},{"cell_type":"code","source":["model.push_to_hub(new_model, use_temp_dir=False)\n","tokenizer.push_to_hub(new_model, use_temp_dir=False)"],"metadata":{"id":"x-xPb-_qB0dz","executionInfo":{"status":"ok","timestamp":1706611209908,"user_tz":-480,"elapsed":469418,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"colab":{"base_uri":"https://localhost:8080/","height":246,"referenced_widgets":["f3bfe6809ea841949bb0f33269034e02","51c450db060f4951bc8249ea782392d6","6ecd5671a20648dea4bedda6113d23ff","8d7c7bda8df345dd8c4aade9966965ab","0e1dbb1e4b4a4f5c917833a57ae30a7c","dfc0a9bb09a3451fb9e4d95b71c57cb8","48ffe0f9cb824cc0bf4c14d5e0bde442","942acd91ec1a4a3cabd9dadda9b7a351","d858fa7276bf4176b0e46f9d5bc91e20","7d3efa513dea4b8f98fbd074dcbfd9f3","1422fdeb60544da58446e8a2d242e445","d9fca217b6fc4f48bd0134e411a2f909","c53048378bd7436fb2181a5e0cc4ba61","bfa91360a9f64b14922e29d0f7b9f7f8","1feb9b56463f4a97a2d8d5fdf6b5cbef","e2dec9930fb8423188556f35e943de84","e549d7168dc145f9bf934a71916e080a","c50b57696f0047aa915fed04afec8e9a","f7b225cef3864c1fabdc4445faf3b466","e76ad4f8dbf14d59a5f3aadfab84b9dc","cd23dc7eeae34aa3afbea9c976fc64f0","e3ba393293b7488ea626a6b650420a92","6326507477cb4933999f525889aad2ec","d42b2763130f415f98b98713144ff757","18b653d5bbc24a69a3923cfd26995e5a","413a9764e03a436084f6d5a957c1dc52","572d68d016674e508d4258ba047f0529","dff83433f49445988e0fb0028bbdc8c4","55b77507e6564bcfa2d7dd0ef125faf8","1cc69198c30b427aacc5c5ad42a5d1ac","648542e8a44245839e32fb997f9a1941","01c0515f6f0f454183b69e6e8da0a9db","91f465f87b6c4871be25ce5fb189ebba","7c55b758c41b419ead52bc8f7cbbbc57","fe0da59a6c074478a080c547c4f93b32","6a8706e3dd234a3f999e4984de8363ed","57736888a34346f780d376cfcb46ce75","c5af9dc16a324db6a98cc53c82e9e161","64f44d7e27874c9e963fd7a2758434af","f1bd8ba112be42ee81a3d6ad96a0f382","42bd12f0797c45b281861e38d5b708b4","665b236679b84ee6a66d6b23dd807073","8c73ea8a3fcd4a938f25bc1354db7818","316771e899204e839390d5b0727b0108","806bff9547164ae08291778f983b9790","6811f1e76e974aa58034852c20fdd613","e15f1f22121e4d11b4bf1681c3d7e84c","6fa7416ef4224e4e8ec474fd6fb45cf4","8544d67df06a45e18f05fccedb693544","44963c9e7a5f46f283e84e432bfa941c","7b27ccff92fa48ef88001c6a2c7c52bc","09381f4ffe0447368ab20185c87edd21","93b84c8ebd5c48159545af85314789f7","23d0643de9b94c9cb4b7bba21611008a","3d04c36ebc184f5a8c3244b5a543eaaf"]},"outputId":"e2e4498f-4632-4c7a-aba6-68bf7f9ad508"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f3bfe6809ea841949bb0f33269034e02"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d9fca217b6fc4f48bd0134e411a2f909"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6326507477cb4933999f525889aad2ec"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"7c55b758c41b419ead52bc8f7cbbbc57"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"806bff9547164ae08291778f983b9790"}},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["CommitInfo(commit_url='https://huggingface.co/ssoh/llama-2-7b-mini-ibased/commit/08f1929770c0aeccf8c17e768bed6840c998797c', commit_message='Upload tokenizer', commit_description='', oid='08f1929770c0aeccf8c17e768bed6840c998797c', pr_url=None, pr_revision=None, pr_num=None)"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":11}]},{"cell_type":"markdown","source":["# Quantize Llama 2 models using GGUF and llama.cpp\n","\n","\n","## Usage\n","\n","* `MODEL_ID`: The ID of the model to quantize (e.g., `ssoh/llama-2-7b-mini-ibased`).\n","* `QUANTIZATION_METHOD`: The quantization method to use.\n","\n","## Quantization methods\n","\n","The names of the quantization methods follow the naming convention: \"q\" + the number of bits + the variant used.\n","\n","We will be using **Q5_K_M** as it preserves most of the model's performance."],"metadata":{"id":"8y_Rk94LzG7I"}},{"cell_type":"code","source":["# Authenticate with Hugging Face Hub to securely access models, datasets, and other resources.\n","from huggingface_hub import notebook_login\n","notebook_login()"],"metadata":{"id":"zbCYFOmU7ANP","colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["342b32e908ab4c9e8e636a64ec9202bf","c23cf3a7f3a040ff8a3b9e9b7ad348f0","c98949824321478d99a1475a89e325a7","76395d8bd36c45fea0aa3900c794894c","a2e02b2ceacb4d64a14be13b26a22cf9","f68106a0b50440a2a10263b232d33f07","087e5ddf62b04a6994a056d3e26c780c","7e7e3eb22e6b4b358db0fea63ca374e5","5b6c2db34c7e4400b72aa1499d69ee32","4cd53420ce864839a99efab27ec28ee8","9a38d22e47704a1b8e85742820d4bf64","4318e356c7b54d939a862cd0d1290f09","367526f84d7f4e69af6e730d86e6136a","a76cf64bc1fd460a998a2a7a07617cf1","4f37f7e9c0b545d6875901930e921299","50de8cfc6c084e028068fd217c15b171","3048c08df98340898f64ad9b00ebd4dd","63ee3af405ee4696be9348e33bf0f577","54a0ae84599f4dedab0fa80f395f96ec","45c774bdb3e7456587f0aa7d1e9b8ed7","e2f2dfc61e2e421ab1aab56048d3f330","3f5fc2fd155c4cc89928b61c3896d522","fcd98b79ddfd4a32992cc005a26d07b8","cd6663dd27864040a1ebe16e2e2ba0f1","44e943245f81431d8388d5f79df6cdbb","28ef998c70a046f4b3fbf47e062f9210","eb205b40f40c43aea347a94c8a5b24e8","c1995c4094054a0eac15cc3201878ad3","d57d8b600803484d8f9ea340a5eaa7b0","da4f2bd963864b6085625935aa77857a","d1a1da3f19ff4e25b249ba50b3ee4535","2b7e57a54a55463cb3ac91859262383e"]},"executionInfo":{"status":"ok","timestamp":1706680450212,"user_tz":-480,"elapsed":557,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"384a0237-cf8f-46d5-f1e2-8098ccc0e386"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"342b32e908ab4c9e8e636a64ec9202bf"}},"metadata":{}}]},{"cell_type":"code","source":["# Install the huggingface_hub and transformers libraries quietly without verbose output.\n","!pip install -q huggingface_hub transformers"],"metadata":{"id":"gAisTLIhq7sC"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Importing necessary libraries for operating system interactions, HTTP requests, JSON handling,\n","# file operations, and interacting with the Hugging Face Hub for tasks like creating repositories.\n","import os\n","import requests\n","import json\n","import shutil\n","from huggingface_hub import create_repo, HfApi\n","\n","# Import the AutoTokenizer class from the transformers library\n","from transformers import AutoTokenizer"],"metadata":{"id":"GG8dGBfnnkZT"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Set up environment and download model for llama.cpp inference.\n","# 1. Define model ID and quantization methods.\n","# 2. Parse model name from MODEL_ID.\n","# 3. Install and build the llama.cpp library with GPU support.\n","# 4. Install Python dependencies from llama.cpp's requirements.\n","# 5. Initialize Git Large File Storage (LFS) for handling large files.\n","# 6. Clone the specified model repository from Hugging Face.\n","\n","\n","MODEL_ID = \"ssoh/llama-2-7b-mini-ibased\"\n","QUANTIZATION_METHODS = [\"q5_k_m\"]\n","MODEL_NAME = MODEL_ID.split('/')[-1]\n","\n","\n","# Install and prepare llama.cpp\n","!git clone https://github.com/ggerganov/llama.cpp\n","!cd llama.cpp && git pull && make clean && LLAMA_CUBLAS=1 make\n","!pip install -r llama.cpp/requirements.txt\n","\n","\n","# Initialize Git LFS for large models\n","!git lfs install\n","\n","\n","# Download the model from Hugging Face\n","!git clone https://huggingface.co/{MODEL_ID}"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"V7KUlnZ4fnoV","executionInfo":{"status":"ok","timestamp":1706681119775,"user_tz":-480,"elapsed":637478,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"fc85c7a0-f959-4afb-a8e9-f7f3df97a80d"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Cloning into 'llama.cpp'...\n","remote: Enumerating objects: 17351, done.\u001b[K\n","remote: Counting objects: 100% (4928/4928), done.\u001b[K\n","remote: Compressing objects: 100% (197/197), done.\u001b[K\n","remote: Total 17351 (delta 4829), reused 4755 (delta 4731), pack-reused 12423\u001b[K\n","Receiving objects: 100% (17351/17351), 20.40 MiB | 11.38 MiB/s, done.\n","Resolving deltas: 100% (12094/12094), done.\n","Already up to date.\n","I llama.cpp build info: \n","I UNAME_S:   Linux\n","I UNAME_P:   x86_64\n","I UNAME_M:   x86_64\n","I CFLAGS:    -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion \n","I CXXFLAGS:  -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi\n","I NVCCFLAGS:  \n","I LDFLAGS:    \n","I CC:        cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\n","I CXX:       g++ (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\n","\n","rm -vrf *.o tests/*.o *.so *.dll benchmark-matmult common/build-info.cpp *.dot *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey tests/test-c.o tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease\n","I llama.cpp build info: \n","I UNAME_S:   Linux\n","I UNAME_P:   x86_64\n","I UNAME_M:   x86_64\n","I CFLAGS:    -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion \n","I CXXFLAGS:  -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi\n","I NVCCFLAGS: -use_fast_math --forward-unknown-to-host-compiler -arch=native -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \n","I LDFLAGS:   -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","I CC:        cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\n","I CXX:       g++ (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\n","\n","cc  -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion    -c ggml.c -o ggml.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c llama.cpp -o llama.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/common.cpp -o common.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/sampling.cpp -o sampling.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/grammar-parser.cpp -o grammar-parser.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/build-info.cpp -o build-info.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/console.cpp -o console.o\n","nvcc -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -use_fast_math --forward-unknown-to-host-compiler -arch=native -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128  -Wno-pedantic -Xcompiler \"-Wno-array-bounds -Wno-format-truncation -Wextra-semi\" -c ggml-cuda.cu -o ggml-cuda.o\n","cc  -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion    -c ggml-alloc.c -o ggml-alloc.o\n","cc  -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion    -c ggml-backend.c -o ggml-backend.o\n","cc -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion     -c ggml-quants.c -o ggml-quants.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/main/main.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o console.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o main -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","\n","====  Run ./main -h for help.  ====\n","\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/quantize/quantize.cpp build-info.o ggml.o llama.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o quantize -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o quantize-stats -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/perplexity/perplexity.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o perplexity -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/imatrix/imatrix.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o imatrix -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/embedding/embedding.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o embedding -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi pocs/vdot/vdot.cpp ggml.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o vdot -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi pocs/vdot/q8dot.cpp ggml.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o q8dot -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/train.cpp -o train.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o train.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o train-text-from-scratch -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o convert-llama2c-to-ggml -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/simple/simple.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o simple -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/batched/batched.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o batched -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o common.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o batched-bench -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/save-load-state/save-load-state.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o save-load-state -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iexamples/server examples/server/server.cpp examples/llava/clip.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o server -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib   -Wno-cast-qual\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/gguf/gguf.cpp ggml.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o gguf -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/llama-bench/llama-bench.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o llama-bench -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -static -fPIC -c examples/llava/llava.cpp -o libllava.a -Wno-cast-qual\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/llava/llava-cli.cpp examples/llava/clip.cpp examples/llava/llava.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o llava-cli -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib  -Wno-cast-qual\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/baby-llama/baby-llama.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o train.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o baby-llama -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/beam-search/beam-search.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o beam-search -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/speculative/speculative.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o speculative -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/infill/infill.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o console.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o infill -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/tokenize/tokenize.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o tokenize -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o benchmark-matmult -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/parallel/parallel.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o parallel -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/finetune/finetune.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o train.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o finetune -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/export-lora/export-lora.cpp ggml.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o export-lora -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/lookahead/lookahead.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o lookahead -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/lookup/lookup.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o lookup -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/passkey/passkey.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o passkey -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","cc -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion  -c tests/test-c.c -o tests/test-c.o\n","Collecting numpy~=1.24.4 (from -r llama.cpp/./requirements/requirements-convert.txt (line 1))\n","  Downloading numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m77.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: sentencepiece~=0.1.98 in /usr/local/lib/python3.10/dist-packages (from -r llama.cpp/./requirements/requirements-convert.txt (line 2)) (0.1.99)\n","Requirement already satisfied: transformers<5.0.0,>=4.35.2 in /usr/local/lib/python3.10/dist-packages (from -r llama.cpp/./requirements/requirements-convert.txt (line 3)) (4.35.2)\n","Collecting gguf>=0.1.0 (from -r llama.cpp/./requirements/requirements-convert.txt (line 4))\n","  Downloading gguf-0.6.0-py3-none-any.whl (23 kB)\n","Collecting protobuf<5.0.0,>=4.21.0 (from -r llama.cpp/./requirements/requirements-convert.txt (line 5))\n","  Downloading protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl (294 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.6/294.6 kB\u001b[0m \u001b[31m31.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting torch~=2.1.1 (from -r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading torch-2.1.2-cp310-cp310-manylinux1_x86_64.whl (670.2 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m670.2/670.2 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (3.13.1)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (0.20.3)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (23.2)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (2023.6.3)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (2.31.0)\n","Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (0.15.1)\n","Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (0.4.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (4.66.1)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (4.5.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (3.2.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (3.1.3)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (2023.6.0)\n","Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m58.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m70.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m95.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-nccl-cu12==2.18.1 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl (209.8 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.8/209.8 MB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (2.1.0)\n","Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl (20.5 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.5/20.5 MB\u001b[0m \u001b[31m75.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (2.1.4)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (2023.11.17)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (1.3.0)\n","Installing collected packages: protobuf, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numpy, nvidia-cusparse-cu12, nvidia-cudnn-cu12, gguf, nvidia-cusolver-cu12, torch\n","  Attempting uninstall: protobuf\n","    Found existing installation: protobuf 3.20.3\n","    Uninstalling protobuf-3.20.3:\n","      Successfully uninstalled protobuf-3.20.3\n","  Attempting uninstall: numpy\n","    Found existing installation: numpy 1.23.5\n","    Uninstalling numpy-1.23.5:\n","      Successfully uninstalled numpy-1.23.5\n","  Attempting uninstall: torch\n","    Found existing installation: torch 2.1.0+cu121\n","    Uninstalling torch-2.1.0+cu121:\n","      Successfully uninstalled torch-2.1.0+cu121\n","\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n","lida 0.0.10 requires fastapi, which is not installed.\n","lida 0.0.10 requires kaleido, which is not installed.\n","lida 0.0.10 requires python-multipart, which is not installed.\n","lida 0.0.10 requires uvicorn, which is not installed.\n","tensorboard 2.15.1 requires protobuf<4.24,>=3.19.6, but you have protobuf 4.25.2 which is incompatible.\n","tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 4.25.2 which is incompatible.\n","torchaudio 2.1.0+cu121 requires torch==2.1.0, but you have torch 2.1.2 which is incompatible.\n","torchdata 0.7.0 requires torch==2.1.0, but you have torch 2.1.2 which is incompatible.\n","torchtext 0.16.0 requires torch==2.1.0, but you have torch 2.1.2 which is incompatible.\n","torchvision 0.16.0+cu121 requires torch==2.1.0, but you have torch 2.1.2 which is incompatible.\u001b[0m\u001b[31m\n","\u001b[0mSuccessfully installed gguf-0.6.0 numpy-1.24.4 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.18.1 nvidia-nvjitlink-cu12-12.3.101 nvidia-nvtx-cu12-12.1.105 protobuf-4.25.2 torch-2.1.2\n"]},{"output_type":"display_data","data":{"application/vnd.colab-display-data+json":{"pip_warning":{"packages":["numpy","torch","torchgen"]}}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Git LFS initialized.\n","Cloning into 'llama-2-7b-mini-ibased'...\n","remote: Enumerating objects: 17, done.\u001b[K\n","remote: Counting objects: 100% (14/14), done.\u001b[K\n","remote: Compressing objects: 100% (14/14), done.\u001b[K\n","remote: Total 17 (delta 1), reused 0 (delta 0), pack-reused 3\u001b[K\n","Unpacking objects: 100% (17/17), 483.83 KiB | 917.00 KiB/s, done.\n","Filtering content: 100% (3/3), 4.55 GiB | 16.16 MiB/s, done.\n","Encountered 2 file(s) that may not have been copied correctly on Windows:\n","\tmodel-00002-of-00003.safetensors\n","\tmodel-00001-of-00003.safetensors\n","\n","See: `git lfs help smudge` for more details.\n"]}]},{"cell_type":"code","source":["# Specify the model ID from which to load the tokenizer\n","model_id = \"meta-llama/Llama-2-7b-chat-hf\"\n","\n","# Load the tokenizer associated with the specified model ID\n","tokenizer = AutoTokenizer.from_pretrained(model_id)\n","\n","# Create a temporary directory to store all downloaded tokenizer files\n","temp_save_directory = \"temp_tokenizer_files\"\n","tokenizer.save_pretrained(temp_save_directory)\n","\n","# Specify the directory where the tokenizer.model file will be saved permanently\n","MODEL_NAME = \"llama-2-7b-mini-ibased\"\n","save_directory = MODEL_NAME\n","\n","# Create the save directory if it does not exist\n","os.makedirs(save_directory, exist_ok=True)\n","\n","# Define the specific filename of the tokenizer we want to retain\n","tokenizer_filename = \"tokenizer.model\"\n","\n","# Check for the existence of tokenizer.model in the temporary directory\n","source_file = os.path.join(temp_save_directory, tokenizer_filename)\n","destination_file = os.path.join(save_directory, tokenizer_filename)\n","\n","# Copy the tokenizer.model file to the final directory, if it exists\n","if os.path.exists(source_file):\n","    shutil.copy(source_file, destination_file)\n","    print(f\"tokenizer.model has been saved in {save_directory}\")\n","else:\n","    print(\"No tokenizer.model file found in the downloaded tokenizer files.\")\n","\n","# Remove the temporary directory to clean up unnecessary files\n","shutil.rmtree(temp_save_directory)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":267,"referenced_widgets":["24ee6759da5043708205e556bdf6f314","bb111e8c28424cf8af29626e16403302","adba060d155545988b3e9d750efc41ae","fb11002cec6f4a0397fa12538b742def","50afe53721134c5ea8178f80c8f3003d","9e5805663be64aaf9d394bb0c82b3a0e","132912942b5b4ccf874c76921db3a02d","7c0bdd46072543e38c111b5c5a6f83e3","c79a83eda8334967853804291d7afa6e","51f055cce11d4c3784c5a74864ed95e4","f45b4d980d484237901a28c1a0c9b8df","a29b152cee3048dda48af4bcc1410d15","b365c2eca7a04bda9a8d1586a6bbc837","5bb91565119b4442b25b14b1aeb14be6","38fd793491804042bbb0d6eb96c1d756","0d473c65afba4fbd99eb8e0c94adec15","563441f0718644cebc5edf55229bb803","7533d642918b43a39603f8ba41b7ba66","b680eae84fb54ec7913c71b550aded62","c373b77d4c5d4a0fa7ae5865eae6e511","a4cf31ba8d7a452fa728ceafc145c9f4","dcf97967979d44458d1ef5cb30639177","959c6762670c41a59fb73c4ab55938d8","17aecb0efd574dc89313febabb401146","104dbf1c8f3149da86a14cbee8ff38f7","689b144f352045b298dfa7c9419ab7ba","9175f39989b349a8b1892d10dc696f2b","85f1a5a7f0cd4e03a2ee8ddf58fa7506","3feb087dd99f4cb8a2db169b2c280b00","11ab9624c0874402a53125169e216ec1","f1f85f58f0a44f1dad0cf0be10da10bb","15e9f7ab49e346c79cd750ecd6cc1aba","32b9c4e984104288b897929a3c6cb9fd","3698a1ad68984959a0163d36e17143f8","b23cf6d571da46c5aedcdfae072e2dc1","c885bec1f7dc400280f0c6e3934674aa","0b5d031519f8457692a44f521df62328","50fc781a22f4455cb30c4a237e3092cf","2299d9e60cfa437c8162e828cc2b2b22","e9652220800240669785ff51d8bb8ade","3ffe0f72384d48649578cadb3c939c34","20ff6eb9f738438984fba5e3595f7e54","fa5b85003b394f8c94f4fa864533470a","71a2e47a6ba946b38a79a2e0f099d14c"]},"id":"UkMoXHX2DOrO","executionInfo":{"status":"ok","timestamp":1706681124592,"user_tz":-480,"elapsed":4823,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"14bcde50-4357-4f91-c175-b913a7a0f818"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n","The secret `HF_TOKEN` does not exist in your Colab secrets.\n","To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n","You will be able to reuse this secret in all of your notebooks.\n","Please note that authentication is recommended but still optional to access public models or datasets.\n","  warnings.warn(\n"]},{"output_type":"display_data","data":{"text/plain":["tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"24ee6759da5043708205e556bdf6f314"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a29b152cee3048dda48af4bcc1410d15"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"959c6762670c41a59fb73c4ab55938d8"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3698a1ad68984959a0163d36e17143f8"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["tokenizer.model has been saved in llama-2-7b-mini-ibased\n"]}]},{"cell_type":"code","source":["# Convert to fp16\n","fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin\"\n","!python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}"],"metadata":{"id":"fD24jJxq7t3k","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1706681281632,"user_tz":-480,"elapsed":157048,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"9f6dea9e-5c3c-49c5-a831-9af204dd44ad"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Loading model file llama-2-7b-mini-ibased/model-00001-of-00003.safetensors\n","Loading model file llama-2-7b-mini-ibased/model-00001-of-00003.safetensors\n","Loading model file llama-2-7b-mini-ibased/model-00002-of-00003.safetensors\n","Loading model file llama-2-7b-mini-ibased/model-00003-of-00003.safetensors\n","params = Params(n_vocab=32000, n_embd=4096, n_layer=32, n_ctx=4096, n_ff=11008, n_head=32, n_head_kv=32, n_experts=None, n_experts_used=None, f_norm_eps=1e-05, rope_scaling_type=None, f_rope_freq_base=10000.0, f_rope_scale=None, n_orig_ctx=None, rope_finetuned=None, ftype=<GGMLFileType.MostlyF16: 1>, path_model=PosixPath('llama-2-7b-mini-ibased'))\n","Found vocab files: {'tokenizer.model': PosixPath('llama-2-7b-mini-ibased/tokenizer.model'), 'vocab.json': None, 'tokenizer.json': PosixPath('llama-2-7b-mini-ibased/tokenizer.json')}\n","Loading vocab file 'llama-2-7b-mini-ibased/tokenizer.model', type 'spm'\n","Vocab info: <SentencePieceVocab with 32000 base tokens and 0 added tokens>\n","Special vocab info: <SpecialVocab with 0 merges, special tokens {'bos': 1, 'eos': 2, 'unk': 0, 'pad': 2}, add special tokens {'bos': True, 'eos': False}>\n","Permuting layer 0\n","Permuting layer 1\n","Permuting layer 2\n","Permuting layer 3\n","Permuting layer 4\n","Permuting layer 5\n","Permuting layer 6\n","Permuting layer 7\n","Permuting layer 8\n","Permuting layer 9\n","Permuting layer 10\n","Permuting layer 11\n","Permuting layer 12\n","Permuting layer 13\n","Permuting layer 14\n","Permuting layer 15\n","Permuting layer 16\n","Permuting layer 17\n","Permuting layer 18\n","Permuting layer 19\n","Permuting layer 20\n","Permuting layer 21\n","Permuting layer 22\n","Permuting layer 23\n","Permuting layer 24\n","Permuting layer 25\n","Permuting layer 26\n","Permuting layer 27\n","Permuting layer 28\n","Permuting layer 29\n","Permuting layer 30\n","Permuting layer 31\n","model.embed_tokens.weight                        -> token_embd.weight                        | F16    | [32000, 4096]\n","model.layers.0.input_layernorm.weight            -> blk.0.attn_norm.weight                   | F16    | [4096]\n","model.layers.0.mlp.down_proj.weight              -> blk.0.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.0.mlp.gate_proj.weight              -> blk.0.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.0.mlp.up_proj.weight                -> blk.0.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.0.post_attention_layernorm.weight   -> blk.0.ffn_norm.weight                    | F16    | [4096]\n","model.layers.0.self_attn.k_proj.weight           -> blk.0.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.0.self_attn.o_proj.weight           -> blk.0.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.0.self_attn.q_proj.weight           -> blk.0.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.0.self_attn.v_proj.weight           -> blk.0.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.1.input_layernorm.weight            -> blk.1.attn_norm.weight                   | F16    | [4096]\n","model.layers.1.mlp.down_proj.weight              -> blk.1.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.1.mlp.gate_proj.weight              -> blk.1.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.1.mlp.up_proj.weight                -> blk.1.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.1.post_attention_layernorm.weight   -> blk.1.ffn_norm.weight                    | F16    | [4096]\n","model.layers.1.self_attn.k_proj.weight           -> blk.1.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.1.self_attn.o_proj.weight           -> blk.1.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.1.self_attn.q_proj.weight           -> blk.1.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.1.self_attn.v_proj.weight           -> blk.1.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.10.input_layernorm.weight           -> blk.10.attn_norm.weight                  | F16    | [4096]\n","model.layers.10.mlp.down_proj.weight             -> blk.10.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.10.mlp.gate_proj.weight             -> blk.10.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.10.mlp.up_proj.weight               -> blk.10.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.10.post_attention_layernorm.weight  -> blk.10.ffn_norm.weight                   | F16    | [4096]\n","model.layers.10.self_attn.k_proj.weight          -> blk.10.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.10.self_attn.o_proj.weight          -> blk.10.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.10.self_attn.q_proj.weight          -> blk.10.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.10.self_attn.v_proj.weight          -> blk.10.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.11.mlp.gate_proj.weight             -> blk.11.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.11.self_attn.k_proj.weight          -> blk.11.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.11.self_attn.o_proj.weight          -> blk.11.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.11.self_attn.q_proj.weight          -> blk.11.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.11.self_attn.v_proj.weight          -> blk.11.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.2.input_layernorm.weight            -> blk.2.attn_norm.weight                   | F16    | [4096]\n","model.layers.2.mlp.down_proj.weight              -> blk.2.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.2.mlp.gate_proj.weight              -> blk.2.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.2.mlp.up_proj.weight                -> blk.2.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.2.post_attention_layernorm.weight   -> blk.2.ffn_norm.weight                    | F16    | [4096]\n","model.layers.2.self_attn.k_proj.weight           -> blk.2.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.2.self_attn.o_proj.weight           -> blk.2.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.2.self_attn.q_proj.weight           -> blk.2.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.2.self_attn.v_proj.weight           -> blk.2.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.3.input_layernorm.weight            -> blk.3.attn_norm.weight                   | F16    | [4096]\n","model.layers.3.mlp.down_proj.weight              -> blk.3.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.3.mlp.gate_proj.weight              -> blk.3.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.3.mlp.up_proj.weight                -> blk.3.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.3.post_attention_layernorm.weight   -> blk.3.ffn_norm.weight                    | F16    | [4096]\n","model.layers.3.self_attn.k_proj.weight           -> blk.3.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.3.self_attn.o_proj.weight           -> blk.3.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.3.self_attn.q_proj.weight           -> blk.3.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.3.self_attn.v_proj.weight           -> blk.3.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.4.input_layernorm.weight            -> blk.4.attn_norm.weight                   | F16    | [4096]\n","model.layers.4.mlp.down_proj.weight              -> blk.4.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.4.mlp.gate_proj.weight              -> blk.4.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.4.mlp.up_proj.weight                -> blk.4.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.4.post_attention_layernorm.weight   -> blk.4.ffn_norm.weight                    | F16    | [4096]\n","model.layers.4.self_attn.k_proj.weight           -> blk.4.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.4.self_attn.o_proj.weight           -> blk.4.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.4.self_attn.q_proj.weight           -> blk.4.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.4.self_attn.v_proj.weight           -> blk.4.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.5.input_layernorm.weight            -> blk.5.attn_norm.weight                   | F16    | [4096]\n","model.layers.5.mlp.down_proj.weight              -> blk.5.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.5.mlp.gate_proj.weight              -> blk.5.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.5.mlp.up_proj.weight                -> blk.5.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.5.post_attention_layernorm.weight   -> blk.5.ffn_norm.weight                    | F16    | [4096]\n","model.layers.5.self_attn.k_proj.weight           -> blk.5.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.5.self_attn.o_proj.weight           -> blk.5.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.5.self_attn.q_proj.weight           -> blk.5.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.5.self_attn.v_proj.weight           -> blk.5.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.6.input_layernorm.weight            -> blk.6.attn_norm.weight                   | F16    | [4096]\n","model.layers.6.mlp.down_proj.weight              -> blk.6.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.6.mlp.gate_proj.weight              -> blk.6.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.6.mlp.up_proj.weight                -> blk.6.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.6.post_attention_layernorm.weight   -> blk.6.ffn_norm.weight                    | F16    | [4096]\n","model.layers.6.self_attn.k_proj.weight           -> blk.6.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.6.self_attn.o_proj.weight           -> blk.6.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.6.self_attn.q_proj.weight           -> blk.6.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.6.self_attn.v_proj.weight           -> blk.6.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.7.input_layernorm.weight            -> blk.7.attn_norm.weight                   | F16    | [4096]\n","model.layers.7.mlp.down_proj.weight              -> blk.7.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.7.mlp.gate_proj.weight              -> blk.7.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.7.mlp.up_proj.weight                -> blk.7.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.7.post_attention_layernorm.weight   -> blk.7.ffn_norm.weight                    | F16    | [4096]\n","model.layers.7.self_attn.k_proj.weight           -> blk.7.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.7.self_attn.o_proj.weight           -> blk.7.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.7.self_attn.q_proj.weight           -> blk.7.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.7.self_attn.v_proj.weight           -> blk.7.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.8.input_layernorm.weight            -> blk.8.attn_norm.weight                   | F16    | [4096]\n","model.layers.8.mlp.down_proj.weight              -> blk.8.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.8.mlp.gate_proj.weight              -> blk.8.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.8.mlp.up_proj.weight                -> blk.8.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.8.post_attention_layernorm.weight   -> blk.8.ffn_norm.weight                    | F16    | [4096]\n","model.layers.8.self_attn.k_proj.weight           -> blk.8.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.8.self_attn.o_proj.weight           -> blk.8.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.8.self_attn.q_proj.weight           -> blk.8.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.8.self_attn.v_proj.weight           -> blk.8.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.9.input_layernorm.weight            -> blk.9.attn_norm.weight                   | F16    | [4096]\n","model.layers.9.mlp.down_proj.weight              -> blk.9.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.9.mlp.gate_proj.weight              -> blk.9.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.9.mlp.up_proj.weight                -> blk.9.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.9.post_attention_layernorm.weight   -> blk.9.ffn_norm.weight                    | F16    | [4096]\n","model.layers.9.self_attn.k_proj.weight           -> blk.9.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.9.self_attn.o_proj.weight           -> blk.9.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.9.self_attn.q_proj.weight           -> blk.9.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.9.self_attn.v_proj.weight           -> blk.9.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.11.input_layernorm.weight           -> blk.11.attn_norm.weight                  | F16    | [4096]\n","model.layers.11.mlp.down_proj.weight             -> blk.11.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.11.mlp.up_proj.weight               -> blk.11.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.11.post_attention_layernorm.weight  -> blk.11.ffn_norm.weight                   | F16    | [4096]\n","model.layers.12.input_layernorm.weight           -> blk.12.attn_norm.weight                  | F16    | [4096]\n","model.layers.12.mlp.down_proj.weight             -> blk.12.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.12.mlp.gate_proj.weight             -> blk.12.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.12.mlp.up_proj.weight               -> blk.12.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.12.post_attention_layernorm.weight  -> blk.12.ffn_norm.weight                   | F16    | [4096]\n","model.layers.12.self_attn.k_proj.weight          -> blk.12.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.12.self_attn.o_proj.weight          -> blk.12.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.12.self_attn.q_proj.weight          -> blk.12.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.12.self_attn.v_proj.weight          -> blk.12.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.13.input_layernorm.weight           -> blk.13.attn_norm.weight                  | F16    | [4096]\n","model.layers.13.mlp.down_proj.weight             -> blk.13.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.13.mlp.gate_proj.weight             -> blk.13.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.13.mlp.up_proj.weight               -> blk.13.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.13.post_attention_layernorm.weight  -> blk.13.ffn_norm.weight                   | F16    | [4096]\n","model.layers.13.self_attn.k_proj.weight          -> blk.13.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.13.self_attn.o_proj.weight          -> blk.13.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.13.self_attn.q_proj.weight          -> blk.13.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.13.self_attn.v_proj.weight          -> blk.13.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.14.input_layernorm.weight           -> blk.14.attn_norm.weight                  | F16    | [4096]\n","model.layers.14.mlp.down_proj.weight             -> blk.14.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.14.mlp.gate_proj.weight             -> blk.14.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.14.mlp.up_proj.weight               -> blk.14.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.14.post_attention_layernorm.weight  -> blk.14.ffn_norm.weight                   | F16    | [4096]\n","model.layers.14.self_attn.k_proj.weight          -> blk.14.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.14.self_attn.o_proj.weight          -> blk.14.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.14.self_attn.q_proj.weight          -> blk.14.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.14.self_attn.v_proj.weight          -> blk.14.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.15.input_layernorm.weight           -> blk.15.attn_norm.weight                  | F16    | [4096]\n","model.layers.15.mlp.down_proj.weight             -> blk.15.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.15.mlp.gate_proj.weight             -> blk.15.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.15.mlp.up_proj.weight               -> blk.15.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.15.post_attention_layernorm.weight  -> blk.15.ffn_norm.weight                   | F16    | [4096]\n","model.layers.15.self_attn.k_proj.weight          -> blk.15.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.15.self_attn.o_proj.weight          -> blk.15.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.15.self_attn.q_proj.weight          -> blk.15.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.15.self_attn.v_proj.weight          -> blk.15.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.16.input_layernorm.weight           -> blk.16.attn_norm.weight                  | F16    | [4096]\n","model.layers.16.mlp.down_proj.weight             -> blk.16.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.16.mlp.gate_proj.weight             -> blk.16.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.16.mlp.up_proj.weight               -> blk.16.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.16.post_attention_layernorm.weight  -> blk.16.ffn_norm.weight                   | F16    | [4096]\n","model.layers.16.self_attn.k_proj.weight          -> blk.16.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.16.self_attn.o_proj.weight          -> blk.16.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.16.self_attn.q_proj.weight          -> blk.16.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.16.self_attn.v_proj.weight          -> blk.16.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.17.input_layernorm.weight           -> blk.17.attn_norm.weight                  | F16    | [4096]\n","model.layers.17.mlp.down_proj.weight             -> blk.17.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.17.mlp.gate_proj.weight             -> blk.17.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.17.mlp.up_proj.weight               -> blk.17.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.17.post_attention_layernorm.weight  -> blk.17.ffn_norm.weight                   | F16    | [4096]\n","model.layers.17.self_attn.k_proj.weight          -> blk.17.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.17.self_attn.o_proj.weight          -> blk.17.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.17.self_attn.q_proj.weight          -> blk.17.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.17.self_attn.v_proj.weight          -> blk.17.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.18.input_layernorm.weight           -> blk.18.attn_norm.weight                  | F16    | [4096]\n","model.layers.18.mlp.down_proj.weight             -> blk.18.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.18.mlp.gate_proj.weight             -> blk.18.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.18.mlp.up_proj.weight               -> blk.18.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.18.post_attention_layernorm.weight  -> blk.18.ffn_norm.weight                   | F16    | [4096]\n","model.layers.18.self_attn.k_proj.weight          -> blk.18.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.18.self_attn.o_proj.weight          -> blk.18.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.18.self_attn.q_proj.weight          -> blk.18.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.18.self_attn.v_proj.weight          -> blk.18.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.19.input_layernorm.weight           -> blk.19.attn_norm.weight                  | F16    | [4096]\n","model.layers.19.mlp.down_proj.weight             -> blk.19.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.19.mlp.gate_proj.weight             -> blk.19.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.19.mlp.up_proj.weight               -> blk.19.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.19.post_attention_layernorm.weight  -> blk.19.ffn_norm.weight                   | F16    | [4096]\n","model.layers.19.self_attn.k_proj.weight          -> blk.19.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.19.self_attn.o_proj.weight          -> blk.19.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.19.self_attn.q_proj.weight          -> blk.19.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.19.self_attn.v_proj.weight          -> blk.19.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.20.input_layernorm.weight           -> blk.20.attn_norm.weight                  | F16    | [4096]\n","model.layers.20.mlp.down_proj.weight             -> blk.20.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.20.mlp.gate_proj.weight             -> blk.20.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.20.mlp.up_proj.weight               -> blk.20.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.20.post_attention_layernorm.weight  -> blk.20.ffn_norm.weight                   | F16    | [4096]\n","model.layers.20.self_attn.k_proj.weight          -> blk.20.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.20.self_attn.o_proj.weight          -> blk.20.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.20.self_attn.q_proj.weight          -> blk.20.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.20.self_attn.v_proj.weight          -> blk.20.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.21.input_layernorm.weight           -> blk.21.attn_norm.weight                  | F16    | [4096]\n","model.layers.21.mlp.down_proj.weight             -> blk.21.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.21.mlp.gate_proj.weight             -> blk.21.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.21.mlp.up_proj.weight               -> blk.21.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.21.post_attention_layernorm.weight  -> blk.21.ffn_norm.weight                   | F16    | [4096]\n","model.layers.21.self_attn.k_proj.weight          -> blk.21.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.21.self_attn.o_proj.weight          -> blk.21.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.21.self_attn.q_proj.weight          -> blk.21.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.21.self_attn.v_proj.weight          -> blk.21.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.22.input_layernorm.weight           -> blk.22.attn_norm.weight                  | F16    | [4096]\n","model.layers.22.mlp.down_proj.weight             -> blk.22.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.22.mlp.gate_proj.weight             -> blk.22.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.22.mlp.up_proj.weight               -> blk.22.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.22.post_attention_layernorm.weight  -> blk.22.ffn_norm.weight                   | F16    | [4096]\n","model.layers.22.self_attn.k_proj.weight          -> blk.22.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.22.self_attn.o_proj.weight          -> blk.22.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.22.self_attn.q_proj.weight          -> blk.22.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.22.self_attn.v_proj.weight          -> blk.22.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.23.mlp.gate_proj.weight             -> blk.23.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.23.mlp.up_proj.weight               -> blk.23.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.23.self_attn.k_proj.weight          -> blk.23.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.23.self_attn.o_proj.weight          -> blk.23.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.23.self_attn.q_proj.weight          -> blk.23.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.23.self_attn.v_proj.weight          -> blk.23.attn_v.weight                     | F16    | [4096, 4096]\n","lm_head.weight                                   -> output.weight                            | F16    | [32000, 4096]\n","model.layers.23.input_layernorm.weight           -> blk.23.attn_norm.weight                  | F16    | [4096]\n","model.layers.23.mlp.down_proj.weight             -> blk.23.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.23.post_attention_layernorm.weight  -> blk.23.ffn_norm.weight                   | F16    | [4096]\n","model.layers.24.input_layernorm.weight           -> blk.24.attn_norm.weight                  | F16    | [4096]\n","model.layers.24.mlp.down_proj.weight             -> blk.24.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.24.mlp.gate_proj.weight             -> blk.24.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.24.mlp.up_proj.weight               -> blk.24.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.24.post_attention_layernorm.weight  -> blk.24.ffn_norm.weight                   | F16    | [4096]\n","model.layers.24.self_attn.k_proj.weight          -> blk.24.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.24.self_attn.o_proj.weight          -> blk.24.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.24.self_attn.q_proj.weight          -> blk.24.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.24.self_attn.v_proj.weight          -> blk.24.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.25.input_layernorm.weight           -> blk.25.attn_norm.weight                  | F16    | [4096]\n","model.layers.25.mlp.down_proj.weight             -> blk.25.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.25.mlp.gate_proj.weight             -> blk.25.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.25.mlp.up_proj.weight               -> blk.25.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.25.post_attention_layernorm.weight  -> blk.25.ffn_norm.weight                   | F16    | [4096]\n","model.layers.25.self_attn.k_proj.weight          -> blk.25.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.25.self_attn.o_proj.weight          -> blk.25.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.25.self_attn.q_proj.weight          -> blk.25.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.25.self_attn.v_proj.weight          -> blk.25.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.26.input_layernorm.weight           -> blk.26.attn_norm.weight                  | F16    | [4096]\n","model.layers.26.mlp.down_proj.weight             -> blk.26.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.26.mlp.gate_proj.weight             -> blk.26.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.26.mlp.up_proj.weight               -> blk.26.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.26.post_attention_layernorm.weight  -> blk.26.ffn_norm.weight                   | F16    | [4096]\n","model.layers.26.self_attn.k_proj.weight          -> blk.26.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.26.self_attn.o_proj.weight          -> blk.26.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.26.self_attn.q_proj.weight          -> blk.26.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.26.self_attn.v_proj.weight          -> blk.26.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.27.input_layernorm.weight           -> blk.27.attn_norm.weight                  | F16    | [4096]\n","model.layers.27.mlp.down_proj.weight             -> blk.27.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.27.mlp.gate_proj.weight             -> blk.27.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.27.mlp.up_proj.weight               -> blk.27.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.27.post_attention_layernorm.weight  -> blk.27.ffn_norm.weight                   | F16    | [4096]\n","model.layers.27.self_attn.k_proj.weight          -> blk.27.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.27.self_attn.o_proj.weight          -> blk.27.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.27.self_attn.q_proj.weight          -> blk.27.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.27.self_attn.v_proj.weight          -> blk.27.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.28.input_layernorm.weight           -> blk.28.attn_norm.weight                  | F16    | [4096]\n","model.layers.28.mlp.down_proj.weight             -> blk.28.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.28.mlp.gate_proj.weight             -> blk.28.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.28.mlp.up_proj.weight               -> blk.28.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.28.post_attention_layernorm.weight  -> blk.28.ffn_norm.weight                   | F16    | [4096]\n","model.layers.28.self_attn.k_proj.weight          -> blk.28.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.28.self_attn.o_proj.weight          -> blk.28.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.28.self_attn.q_proj.weight          -> blk.28.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.28.self_attn.v_proj.weight          -> blk.28.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.29.input_layernorm.weight           -> blk.29.attn_norm.weight                  | F16    | [4096]\n","model.layers.29.mlp.down_proj.weight             -> blk.29.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.29.mlp.gate_proj.weight             -> blk.29.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.29.mlp.up_proj.weight               -> blk.29.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.29.post_attention_layernorm.weight  -> blk.29.ffn_norm.weight                   | F16    | [4096]\n","model.layers.29.self_attn.k_proj.weight          -> blk.29.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.29.self_attn.o_proj.weight          -> blk.29.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.29.self_attn.q_proj.weight          -> blk.29.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.29.self_attn.v_proj.weight          -> blk.29.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.30.input_layernorm.weight           -> blk.30.attn_norm.weight                  | F16    | [4096]\n","model.layers.30.mlp.down_proj.weight             -> blk.30.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.30.mlp.gate_proj.weight             -> blk.30.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.30.mlp.up_proj.weight               -> blk.30.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.30.post_attention_layernorm.weight  -> blk.30.ffn_norm.weight                   | F16    | [4096]\n","model.layers.30.self_attn.k_proj.weight          -> blk.30.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.30.self_attn.o_proj.weight          -> blk.30.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.30.self_attn.q_proj.weight          -> blk.30.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.30.self_attn.v_proj.weight          -> blk.30.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.31.input_layernorm.weight           -> blk.31.attn_norm.weight                  | F16    | [4096]\n","model.layers.31.mlp.down_proj.weight             -> blk.31.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.31.mlp.gate_proj.weight             -> blk.31.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.31.mlp.up_proj.weight               -> blk.31.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.31.post_attention_layernorm.weight  -> blk.31.ffn_norm.weight                   | F16    | [4096]\n","model.layers.31.self_attn.k_proj.weight          -> blk.31.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.31.self_attn.o_proj.weight          -> blk.31.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.31.self_attn.q_proj.weight          -> blk.31.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.31.self_attn.v_proj.weight          -> blk.31.attn_v.weight                     | F16    | [4096, 4096]\n","model.norm.weight                                -> output_norm.weight                       | F16    | [4096]\n","Writing llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.fp16.bin, format 1\n","Ignoring added_tokens.json since model matches vocab size without it.\n","gguf: This GGUF file is for Little Endian only\n","gguf: Setting special token type bos to 1\n","gguf: Setting special token type eos to 2\n","gguf: Setting special token type unk to 0\n","gguf: Setting special token type pad to 2\n","gguf: Setting add_bos_token to True\n","gguf: Setting add_eos_token to False\n","gguf: Setting chat_template to {% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}\n","[  1/291] Writing tensor token_embd.weight                      | size  32000 x   4096  | type F16  | T+   2\n","[  2/291] Writing tensor blk.0.attn_norm.weight                 | size   4096           | type F32  | T+   2\n","[  3/291] Writing tensor blk.0.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+   2\n","[  4/291] Writing tensor blk.0.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+   3\n","[  5/291] Writing tensor blk.0.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+   3\n","[  6/291] Writing tensor blk.0.ffn_norm.weight                  | size   4096           | type F32  | T+   4\n","[  7/291] Writing tensor blk.0.attn_k.weight                    | size   4096 x   4096  | type F16  | T+   4\n","[  8/291] Writing tensor blk.0.attn_output.weight               | size   4096 x   4096  | type F16  | T+   4\n","[  9/291] Writing tensor blk.0.attn_q.weight                    | size   4096 x   4096  | type F16  | T+   4\n","[ 10/291] Writing tensor blk.0.attn_v.weight                    | size   4096 x   4096  | type F16  | T+   4\n","[ 11/291] Writing tensor blk.1.attn_norm.weight                 | size   4096           | type F32  | T+   5\n","[ 12/291] Writing tensor blk.1.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+   6\n","[ 13/291] Writing tensor blk.1.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+   9\n","[ 14/291] Writing tensor blk.1.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+   9\n","[ 15/291] Writing tensor blk.1.ffn_norm.weight                  | size   4096           | type F32  | T+   9\n","[ 16/291] Writing tensor blk.1.attn_k.weight                    | size   4096 x   4096  | type F16  | T+   9\n","[ 17/291] Writing tensor blk.1.attn_output.weight               | size   4096 x   4096  | type F16  | T+   9\n","[ 18/291] Writing tensor blk.1.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  10\n","[ 19/291] Writing tensor blk.1.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  10\n","[ 20/291] Writing tensor blk.10.attn_norm.weight                | size   4096           | type F32  | T+  13\n","[ 21/291] Writing tensor blk.10.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  13\n","[ 22/291] Writing tensor blk.10.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  14\n","[ 23/291] Writing tensor blk.10.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  14\n","[ 24/291] Writing tensor blk.10.ffn_norm.weight                 | size   4096           | type F32  | T+  16\n","[ 25/291] Writing tensor blk.10.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  16\n","[ 26/291] Writing tensor blk.10.attn_output.weight              | size   4096 x   4096  | type F16  | T+  18\n","[ 27/291] Writing tensor blk.10.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  18\n","[ 28/291] Writing tensor blk.10.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  19\n","[ 29/291] Writing tensor blk.11.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  19\n","[ 30/291] Writing tensor blk.11.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  19\n","[ 31/291] Writing tensor blk.11.attn_output.weight              | size   4096 x   4096  | type F16  | T+  19\n","[ 32/291] Writing tensor blk.11.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  19\n","[ 33/291] Writing tensor blk.11.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  20\n","[ 34/291] Writing tensor blk.2.attn_norm.weight                 | size   4096           | type F32  | T+  23\n","[ 35/291] Writing tensor blk.2.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  23\n","[ 36/291] Writing tensor blk.2.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  24\n","[ 37/291] Writing tensor blk.2.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  24\n","[ 38/291] Writing tensor blk.2.ffn_norm.weight                  | size   4096           | type F32  | T+  24\n","[ 39/291] Writing tensor blk.2.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  24\n","[ 40/291] Writing tensor blk.2.attn_output.weight               | size   4096 x   4096  | type F16  | T+  24\n","[ 41/291] Writing tensor blk.2.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  25\n","[ 42/291] Writing tensor blk.2.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  25\n","[ 43/291] Writing tensor blk.3.attn_norm.weight                 | size   4096           | type F32  | T+  25\n","[ 44/291] Writing tensor blk.3.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  26\n","[ 45/291] Writing tensor blk.3.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  26\n","[ 46/291] Writing tensor blk.3.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  27\n","[ 47/291] Writing tensor blk.3.ffn_norm.weight                  | size   4096           | type F32  | T+  28\n","[ 48/291] Writing tensor blk.3.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  28\n","[ 49/291] Writing tensor blk.3.attn_output.weight               | size   4096 x   4096  | type F16  | T+  29\n","[ 50/291] Writing tensor blk.3.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  29\n","[ 51/291] Writing tensor blk.3.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  29\n","[ 52/291] Writing tensor blk.4.attn_norm.weight                 | size   4096           | type F32  | T+  29\n","[ 53/291] Writing tensor blk.4.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  30\n","[ 54/291] Writing tensor blk.4.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  30\n","[ 55/291] Writing tensor blk.4.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  31\n","[ 56/291] Writing tensor blk.4.ffn_norm.weight                  | size   4096           | type F32  | T+  34\n","[ 57/291] Writing tensor blk.4.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  34\n","[ 58/291] Writing tensor blk.4.attn_output.weight               | size   4096 x   4096  | type F16  | T+  34\n","[ 59/291] Writing tensor blk.4.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  34\n","[ 60/291] Writing tensor blk.4.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  34\n","[ 61/291] Writing tensor blk.5.attn_norm.weight                 | size   4096           | type F32  | T+  34\n","[ 62/291] Writing tensor blk.5.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  34\n","[ 63/291] Writing tensor blk.5.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  38\n","[ 64/291] Writing tensor blk.5.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  39\n","[ 65/291] Writing tensor blk.5.ffn_norm.weight                  | size   4096           | type F32  | T+  39\n","[ 66/291] Writing tensor blk.5.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  39\n","[ 67/291] Writing tensor blk.5.attn_output.weight               | size   4096 x   4096  | type F16  | T+  39\n","[ 68/291] Writing tensor blk.5.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  39\n","[ 69/291] Writing tensor blk.5.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  40\n","[ 70/291] Writing tensor blk.6.attn_norm.weight                 | size   4096           | type F32  | T+  40\n","[ 71/291] Writing tensor blk.6.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  40\n","[ 72/291] Writing tensor blk.6.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  41\n","[ 73/291] Writing tensor blk.6.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  43\n","[ 74/291] Writing tensor blk.6.ffn_norm.weight                  | size   4096           | type F32  | T+  44\n","[ 75/291] Writing tensor blk.6.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  44\n","[ 76/291] Writing tensor blk.6.attn_output.weight               | size   4096 x   4096  | type F16  | T+  44\n","[ 77/291] Writing tensor blk.6.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  44\n","[ 78/291] Writing tensor blk.6.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  44\n","[ 79/291] Writing tensor blk.7.attn_norm.weight                 | size   4096           | type F32  | T+  44\n","[ 80/291] Writing tensor blk.7.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  44\n","[ 81/291] Writing tensor blk.7.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  45\n","[ 82/291] Writing tensor blk.7.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  46\n","[ 83/291] Writing tensor blk.7.ffn_norm.weight                  | size   4096           | type F32  | T+  48\n","[ 84/291] Writing tensor blk.7.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  48\n","[ 85/291] Writing tensor blk.7.attn_output.weight               | size   4096 x   4096  | type F16  | T+  48\n","[ 86/291] Writing tensor blk.7.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  49\n","[ 87/291] Writing tensor blk.7.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  49\n","[ 88/291] Writing tensor blk.8.attn_norm.weight                 | size   4096           | type F32  | T+  49\n","[ 89/291] Writing tensor blk.8.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  49\n","[ 90/291] Writing tensor blk.8.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  53\n","[ 91/291] Writing tensor blk.8.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  54\n","[ 92/291] Writing tensor blk.8.ffn_norm.weight                  | size   4096           | type F32  | T+  54\n","[ 93/291] Writing tensor blk.8.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  54\n","[ 94/291] Writing tensor blk.8.attn_output.weight               | size   4096 x   4096  | type F16  | T+  54\n","[ 95/291] Writing tensor blk.8.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  54\n","[ 96/291] Writing tensor blk.8.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  54\n","[ 97/291] Writing tensor blk.9.attn_norm.weight                 | size   4096           | type F32  | T+  55\n","[ 98/291] Writing tensor blk.9.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  55\n","[ 99/291] Writing tensor blk.9.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  56\n","[100/291] Writing tensor blk.9.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  56\n","[101/291] Writing tensor blk.9.ffn_norm.weight                  | size   4096           | type F32  | T+  57\n","[102/291] Writing tensor blk.9.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  57\n","[103/291] Writing tensor blk.9.attn_output.weight               | size   4096 x   4096  | type F16  | T+  57\n","[104/291] Writing tensor blk.9.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  58\n","[105/291] Writing tensor blk.9.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  58\n","[106/291] Writing tensor blk.11.attn_norm.weight                | size   4096           | type F32  | T+  59\n","[107/291] Writing tensor blk.11.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  59\n","[108/291] Writing tensor blk.11.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  60\n","[109/291] Writing tensor blk.11.ffn_norm.weight                 | size   4096           | type F32  | T+  60\n","[110/291] Writing tensor blk.12.attn_norm.weight                | size   4096           | type F32  | T+  60\n","[111/291] Writing tensor blk.12.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  60\n","[112/291] Writing tensor blk.12.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  61\n","[113/291] Writing tensor blk.12.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  61\n","[114/291] Writing tensor blk.12.ffn_norm.weight                 | size   4096           | type F32  | T+  61\n","[115/291] Writing tensor blk.12.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  61\n","[116/291] Writing tensor blk.12.attn_output.weight              | size   4096 x   4096  | type F16  | T+  62\n","[117/291] Writing tensor blk.12.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  62\n","[118/291] Writing tensor blk.12.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  62\n","[119/291] Writing tensor blk.13.attn_norm.weight                | size   4096           | type F32  | T+  62\n","[120/291] Writing tensor blk.13.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  63\n","[121/291] Writing tensor blk.13.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  63\n","[122/291] Writing tensor blk.13.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  64\n","[123/291] Writing tensor blk.13.ffn_norm.weight                 | size   4096           | type F32  | T+  64\n","[124/291] Writing tensor blk.13.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  64\n","[125/291] Writing tensor blk.13.attn_output.weight              | size   4096 x   4096  | type F16  | T+  64\n","[126/291] Writing tensor blk.13.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  64\n","[127/291] Writing tensor blk.13.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  65\n","[128/291] Writing tensor blk.14.attn_norm.weight                | size   4096           | type F32  | T+  65\n","[129/291] Writing tensor blk.14.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  66\n","[130/291] Writing tensor blk.14.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  66\n","[131/291] Writing tensor blk.14.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  69\n","[132/291] Writing tensor blk.14.ffn_norm.weight                 | size   4096           | type F32  | T+  69\n","[133/291] Writing tensor blk.14.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  69\n","[134/291] Writing tensor blk.14.attn_output.weight              | size   4096 x   4096  | type F16  | T+  69\n","[135/291] Writing tensor blk.14.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  69\n","[136/291] Writing tensor blk.14.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  69\n","[137/291] Writing tensor blk.15.attn_norm.weight                | size   4096           | type F32  | T+  70\n","[138/291] Writing tensor blk.15.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  71\n","[139/291] Writing tensor blk.15.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  74\n","[140/291] Writing tensor blk.15.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  74\n","[141/291] Writing tensor blk.15.ffn_norm.weight                 | size   4096           | type F32  | T+  75\n","[142/291] Writing tensor blk.15.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  75\n","[143/291] Writing tensor blk.15.attn_output.weight              | size   4096 x   4096  | type F16  | T+  78\n","[144/291] Writing tensor blk.15.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  79\n","[145/291] Writing tensor blk.15.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  79\n","[146/291] Writing tensor blk.16.attn_norm.weight                | size   4096           | type F32  | T+  79\n","[147/291] Writing tensor blk.16.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  79\n","[148/291] Writing tensor blk.16.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  79\n","[149/291] Writing tensor blk.16.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  82\n","[150/291] Writing tensor blk.16.ffn_norm.weight                 | size   4096           | type F32  | T+  84\n","[151/291] Writing tensor blk.16.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  84\n","[152/291] Writing tensor blk.16.attn_output.weight              | size   4096 x   4096  | type F16  | T+  84\n","[153/291] Writing tensor blk.16.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  84\n","[154/291] Writing tensor blk.16.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  84\n","[155/291] Writing tensor blk.17.attn_norm.weight                | size   4096           | type F32  | T+  85\n","[156/291] Writing tensor blk.17.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  85\n","[157/291] Writing tensor blk.17.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  89\n","[158/291] Writing tensor blk.17.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  89\n","[159/291] Writing tensor blk.17.ffn_norm.weight                 | size   4096           | type F32  | T+  90\n","[160/291] Writing tensor blk.17.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  90\n","[161/291] Writing tensor blk.17.attn_output.weight              | size   4096 x   4096  | type F16  | T+  90\n","[162/291] Writing tensor blk.17.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  90\n","[163/291] Writing tensor blk.17.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  90\n","[164/291] Writing tensor blk.18.attn_norm.weight                | size   4096           | type F32  | T+  91\n","[165/291] Writing tensor blk.18.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  91\n","[166/291] Writing tensor blk.18.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  91\n","[167/291] Writing tensor blk.18.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  94\n","[168/291] Writing tensor blk.18.ffn_norm.weight                 | size   4096           | type F32  | T+  94\n","[169/291] Writing tensor blk.18.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  94\n","[170/291] Writing tensor blk.18.attn_output.weight              | size   4096 x   4096  | type F16  | T+  94\n","[171/291] Writing tensor blk.18.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  95\n","[172/291] Writing tensor blk.18.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  95\n","[173/291] Writing tensor blk.19.attn_norm.weight                | size   4096           | type F32  | T+  95\n","[174/291] Writing tensor blk.19.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  95\n","[175/291] Writing tensor blk.19.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  99\n","[176/291] Writing tensor blk.19.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  99\n","[177/291] Writing tensor blk.19.ffn_norm.weight                 | size   4096           | type F32  | T+ 100\n","[178/291] Writing tensor blk.19.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 100\n","[179/291] Writing tensor blk.19.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 100\n","[180/291] Writing tensor blk.19.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 100\n","[181/291] Writing tensor blk.19.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 103\n","[182/291] Writing tensor blk.20.attn_norm.weight                | size   4096           | type F32  | T+ 104\n","[183/291] Writing tensor blk.20.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 104\n","[184/291] Writing tensor blk.20.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 104\n","[185/291] Writing tensor blk.20.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 105\n","[186/291] Writing tensor blk.20.ffn_norm.weight                 | size   4096           | type F32  | T+ 105\n","[187/291] Writing tensor blk.20.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 105\n","[188/291] Writing tensor blk.20.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 105\n","[189/291] Writing tensor blk.20.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 105\n","[190/291] Writing tensor blk.20.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 105\n","[191/291] Writing tensor blk.21.attn_norm.weight                | size   4096           | type F32  | T+ 105\n","[192/291] Writing tensor blk.21.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 106\n","[193/291] Writing tensor blk.21.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 106\n","[194/291] Writing tensor blk.21.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 107\n","[195/291] Writing tensor blk.21.ffn_norm.weight                 | size   4096           | type F32  | T+ 109\n","[196/291] Writing tensor blk.21.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 109\n","[197/291] Writing tensor blk.21.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 109\n","[198/291] Writing tensor blk.21.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 109\n","[199/291] Writing tensor blk.21.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 109\n","[200/291] Writing tensor blk.22.attn_norm.weight                | size   4096           | type F32  | T+ 109\n","[201/291] Writing tensor blk.22.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 109\n","[202/291] Writing tensor blk.22.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 113\n","[203/291] Writing tensor blk.22.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 114\n","[204/291] Writing tensor blk.22.ffn_norm.weight                 | size   4096           | type F32  | T+ 114\n","[205/291] Writing tensor blk.22.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 114\n","[206/291] Writing tensor blk.22.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 114\n","[207/291] Writing tensor blk.22.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 115\n","[208/291] Writing tensor blk.22.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 118\n","[209/291] Writing tensor blk.23.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 119\n","[210/291] Writing tensor blk.23.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 119\n","[211/291] Writing tensor blk.23.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 119\n","[212/291] Writing tensor blk.23.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 120\n","[213/291] Writing tensor blk.23.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 120\n","[214/291] Writing tensor blk.23.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 120\n","[215/291] Writing tensor output.weight                          | size  32000 x   4096  | type F16  | T+ 120\n","[216/291] Writing tensor blk.23.attn_norm.weight                | size   4096           | type F32  | T+ 121\n","[217/291] Writing tensor blk.23.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 121\n","[218/291] Writing tensor blk.23.ffn_norm.weight                 | size   4096           | type F32  | T+ 124\n","[219/291] Writing tensor blk.24.attn_norm.weight                | size   4096           | type F32  | T+ 124\n","[220/291] Writing tensor blk.24.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 124\n","[221/291] Writing tensor blk.24.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 124\n","[222/291] Writing tensor blk.24.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 125\n","[223/291] Writing tensor blk.24.ffn_norm.weight                 | size   4096           | type F32  | T+ 125\n","[224/291] Writing tensor blk.24.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 125\n","[225/291] Writing tensor blk.24.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 125\n","[226/291] Writing tensor blk.24.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 125\n","[227/291] Writing tensor blk.24.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 125\n","[228/291] Writing tensor blk.25.attn_norm.weight                | size   4096           | type F32  | T+ 125\n","[229/291] Writing tensor blk.25.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 127\n","[230/291] Writing tensor blk.25.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 127\n","[231/291] Writing tensor blk.25.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 127\n","[232/291] Writing tensor blk.25.ffn_norm.weight                 | size   4096           | type F32  | T+ 129\n","[233/291] Writing tensor blk.25.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 129\n","[234/291] Writing tensor blk.25.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 129\n","[235/291] Writing tensor blk.25.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 129\n","[236/291] Writing tensor blk.25.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 129\n","[237/291] Writing tensor blk.26.attn_norm.weight                | size   4096           | type F32  | T+ 129\n","[238/291] Writing tensor blk.26.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 130\n","[239/291] Writing tensor blk.26.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 131\n","[240/291] Writing tensor blk.26.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 131\n","[241/291] Writing tensor blk.26.ffn_norm.weight                 | size   4096           | type F32  | T+ 131\n","[242/291] Writing tensor blk.26.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 131\n","[243/291] Writing tensor blk.26.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 132\n","[244/291] Writing tensor blk.26.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 132\n","[245/291] Writing tensor blk.26.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 132\n","[246/291] Writing tensor blk.27.attn_norm.weight                | size   4096           | type F32  | T+ 132\n","[247/291] Writing tensor blk.27.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 133\n","[248/291] Writing tensor blk.27.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 134\n","[249/291] Writing tensor blk.27.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 134\n","[250/291] Writing tensor blk.27.ffn_norm.weight                 | size   4096           | type F32  | T+ 135\n","[251/291] Writing tensor blk.27.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 135\n","[252/291] Writing tensor blk.27.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 139\n","[253/291] Writing tensor blk.27.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 139\n","[254/291] Writing tensor blk.27.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 139\n","[255/291] Writing tensor blk.28.attn_norm.weight                | size   4096           | type F32  | T+ 139\n","[256/291] Writing tensor blk.28.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 139\n","[257/291] Writing tensor blk.28.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 140\n","[258/291] Writing tensor blk.28.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 140\n","[259/291] Writing tensor blk.28.ffn_norm.weight                 | size   4096           | type F32  | T+ 140\n","[260/291] Writing tensor blk.28.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 140\n","[261/291] Writing tensor blk.28.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 141\n","[262/291] Writing tensor blk.28.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 141\n","[263/291] Writing tensor blk.28.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 141\n","[264/291] Writing tensor blk.29.attn_norm.weight                | size   4096           | type F32  | T+ 141\n","[265/291] Writing tensor blk.29.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 142\n","[266/291] Writing tensor blk.29.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 144\n","[267/291] Writing tensor blk.29.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 144\n","[268/291] Writing tensor blk.29.ffn_norm.weight                 | size   4096           | type F32  | T+ 144\n","[269/291] Writing tensor blk.29.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 144\n","[270/291] Writing tensor blk.29.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 145\n","[271/291] Writing tensor blk.29.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 145\n","[272/291] Writing tensor blk.29.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 149\n","[273/291] Writing tensor blk.30.attn_norm.weight                | size   4096           | type F32  | T+ 149\n","[274/291] Writing tensor blk.30.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 149\n","[275/291] Writing tensor blk.30.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 149\n","[276/291] Writing tensor blk.30.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 150\n","[277/291] Writing tensor blk.30.ffn_norm.weight                 | size   4096           | type F32  | T+ 150\n","[278/291] Writing tensor blk.30.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 150\n","[279/291] Writing tensor blk.30.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 150\n","[280/291] Writing tensor blk.30.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 150\n","[281/291] Writing tensor blk.30.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 150\n","[282/291] Writing tensor blk.31.attn_norm.weight                | size   4096           | type F32  | T+ 151\n","[283/291] Writing tensor blk.31.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 151\n","[284/291] Writing tensor blk.31.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 152\n","[285/291] Writing tensor blk.31.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 154\n","[286/291] Writing tensor blk.31.ffn_norm.weight                 | size   4096           | type F32  | T+ 154\n","[287/291] Writing tensor blk.31.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 154\n","[288/291] Writing tensor blk.31.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 155\n","[289/291] Writing tensor blk.31.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 155\n","[290/291] Writing tensor blk.31.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 155\n","[291/291] Writing tensor output_norm.weight                     | size   4096           | type F32  | T+ 155\n","Wrote llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.fp16.bin\n"]}]},{"cell_type":"code","source":["# Verify creation of FP16 file and quantize the model for specified methods.\n","# First, check if the FP16 model file exists, indicating successful conversion.\n","# If the file does not exist, terminate the script to prevent further errors.\n","# Then, for each quantization method listed, perform model quantization,\n","# generating a quantized model file for each method.\n","\n","\n","if os.path.exists(fp16):\n","    print(f\"FP16 file created successfully: {fp16}\")\n","else:\n","    print(f\"Failed to create FP16 file at: {fp16}\")\n","    import sys\n","    sys.exit(\"Stopping script due to missing FP16 file.\")\n","\n","\n","# Quantize the model using specified methods\n","for method in QUANTIZATION_METHODS:\n","    qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf\"\n","    !./llama.cpp/quantize {fp16} {qtype} {method}"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"mw5y-GWdkbx6","executionInfo":{"status":"ok","timestamp":1706681949750,"user_tz":-480,"elapsed":668152,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"3fe19dac-6a53-4439-cc2b-330b69da4d42"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["FP16 file created successfully: llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.fp16.bin\n","ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no\n","ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes\n","ggml_init_cublas: found 1 CUDA devices:\n","  Device 0: Tesla T4, compute capability 7.5, VMM: yes\n","main: build = 2029 (d62520eb)\n","main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n","main: quantizing 'llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.fp16.bin' to 'llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.Q5_K_M.gguf' as Q5_K_M\n","llama_model_loader: loaded meta data with 23 key-value pairs and 291 tensors from llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.fp16.bin (version GGUF V3 (latest))\n","llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n","llama_model_loader: - kv   0:                       general.architecture str              = llama\n","llama_model_loader: - kv   1:                               general.name str              = LLaMA v2\n","llama_model_loader: - kv   2:                       llama.context_length u32              = 4096\n","llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096\n","llama_model_loader: - kv   4:                          llama.block_count u32              = 32\n","llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008\n","llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128\n","llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32\n","llama_model_loader: - kv   8:              llama.attention.head_count_kv u32              = 32\n","llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010\n","llama_model_loader: - kv  10:                       llama.rope.freq_base f32              = 10000.000000\n","llama_model_loader: - kv  11:                          general.file_type u32              = 1\n","llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = llama\n","llama_model_loader: - kv  13:                      tokenizer.ggml.tokens arr[str,32000]   = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n","llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32000]   = [0.000000, 0.000000, 0.000000, 0.0000...\n","llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32000]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n","llama_model_loader: - kv  16:                tokenizer.ggml.bos_token_id u32              = 1\n","llama_model_loader: - kv  17:                tokenizer.ggml.eos_token_id u32              = 2\n","llama_model_loader: - kv  18:            tokenizer.ggml.unknown_token_id u32              = 0\n","llama_model_loader: - kv  19:            tokenizer.ggml.padding_token_id u32              = 2\n","llama_model_loader: - kv  20:               tokenizer.ggml.add_bos_token bool             = true\n","llama_model_loader: - kv  21:               tokenizer.ggml.add_eos_token bool             = false\n","llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {% if messages[0]['role'] == 'system'...\n","llama_model_loader: - type  f32:   65 tensors\n","llama_model_loader: - type  f16:  226 tensors\n","llama_model_quantize_internal: meta size = 742080 bytes\n","[   1/ 291]                    token_embd.weight - [ 4096, 32000,     1,     1], type =    f16, quantizing to q5_K .. size =   250.00 MiB ->    85.94 MiB\n","[   2/ 291]               blk.0.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[   3/ 291]                blk.0.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[   4/ 291]                blk.0.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[   5/ 291]                  blk.0.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[   6/ 291]                blk.0.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[   7/ 291]                  blk.0.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[   8/ 291]             blk.0.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[   9/ 291]                  blk.0.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  10/ 291]                  blk.0.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[  11/ 291]               blk.1.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  12/ 291]                blk.1.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[  13/ 291]                blk.1.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  14/ 291]                  blk.1.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  15/ 291]                blk.1.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  16/ 291]                  blk.1.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  17/ 291]             blk.1.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  18/ 291]                  blk.1.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  19/ 291]                  blk.1.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[  20/ 291]              blk.10.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  21/ 291]               blk.10.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[  22/ 291]               blk.10.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  23/ 291]                 blk.10.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  24/ 291]               blk.10.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  25/ 291]                 blk.10.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  26/ 291]            blk.10.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  27/ 291]                 blk.10.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  28/ 291]                 blk.10.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[  29/ 291]               blk.11.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  30/ 291]                 blk.11.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  31/ 291]            blk.11.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  32/ 291]                 blk.11.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  33/ 291]                 blk.11.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[  34/ 291]               blk.2.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  35/ 291]                blk.2.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[  36/ 291]                blk.2.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  37/ 291]                  blk.2.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  38/ 291]                blk.2.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  39/ 291]                  blk.2.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  40/ 291]             blk.2.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  41/ 291]                  blk.2.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  42/ 291]                  blk.2.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  43/ 291]               blk.3.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  44/ 291]                blk.3.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  45/ 291]                blk.3.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  46/ 291]                  blk.3.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  47/ 291]                blk.3.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  48/ 291]                  blk.3.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  49/ 291]             blk.3.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  50/ 291]                  blk.3.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  51/ 291]                  blk.3.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  52/ 291]               blk.4.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  53/ 291]                blk.4.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  54/ 291]                blk.4.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  55/ 291]                  blk.4.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  56/ 291]                blk.4.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  57/ 291]                  blk.4.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  58/ 291]             blk.4.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  59/ 291]                  blk.4.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  60/ 291]                  blk.4.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[  61/ 291]               blk.5.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  62/ 291]                blk.5.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[  63/ 291]                blk.5.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  64/ 291]                  blk.5.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  65/ 291]                blk.5.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  66/ 291]                  blk.5.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  67/ 291]             blk.5.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  68/ 291]                  blk.5.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  69/ 291]                  blk.5.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  70/ 291]               blk.6.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  71/ 291]                blk.6.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  72/ 291]                blk.6.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  73/ 291]                  blk.6.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  74/ 291]                blk.6.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  75/ 291]                  blk.6.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  76/ 291]             blk.6.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  77/ 291]                  blk.6.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  78/ 291]                  blk.6.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  79/ 291]               blk.7.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  80/ 291]                blk.7.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  81/ 291]                blk.7.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  82/ 291]                  blk.7.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  83/ 291]                blk.7.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  84/ 291]                  blk.7.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  85/ 291]             blk.7.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  86/ 291]                  blk.7.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  87/ 291]                  blk.7.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[  88/ 291]               blk.8.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  89/ 291]                blk.8.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[  90/ 291]                blk.8.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  91/ 291]                  blk.8.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  92/ 291]                blk.8.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  93/ 291]                  blk.8.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  94/ 291]             blk.8.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  95/ 291]                  blk.8.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  96/ 291]                  blk.8.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  97/ 291]               blk.9.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  98/ 291]                blk.9.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  99/ 291]                blk.9.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 100/ 291]                  blk.9.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 101/ 291]                blk.9.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 102/ 291]                  blk.9.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 103/ 291]             blk.9.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 104/ 291]                  blk.9.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 105/ 291]                  blk.9.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 106/ 291]              blk.11.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 107/ 291]               blk.11.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 108/ 291]                 blk.11.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 109/ 291]               blk.11.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 110/ 291]              blk.12.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 111/ 291]               blk.12.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 112/ 291]               blk.12.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 113/ 291]                 blk.12.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 114/ 291]               blk.12.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 115/ 291]                 blk.12.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 116/ 291]            blk.12.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 117/ 291]                 blk.12.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 118/ 291]                 blk.12.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 119/ 291]              blk.13.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 120/ 291]               blk.13.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 121/ 291]               blk.13.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 122/ 291]                 blk.13.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 123/ 291]               blk.13.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 124/ 291]                 blk.13.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 125/ 291]            blk.13.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 126/ 291]                 blk.13.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 127/ 291]                 blk.13.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 128/ 291]              blk.14.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 129/ 291]               blk.14.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 130/ 291]               blk.14.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 131/ 291]                 blk.14.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 132/ 291]               blk.14.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 133/ 291]                 blk.14.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 134/ 291]            blk.14.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 135/ 291]                 blk.14.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 136/ 291]                 blk.14.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 137/ 291]              blk.15.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 138/ 291]               blk.15.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 139/ 291]               blk.15.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 140/ 291]                 blk.15.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 141/ 291]               blk.15.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 142/ 291]                 blk.15.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 143/ 291]            blk.15.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 144/ 291]                 blk.15.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 145/ 291]                 blk.15.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 146/ 291]              blk.16.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 147/ 291]               blk.16.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 148/ 291]               blk.16.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 149/ 291]                 blk.16.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 150/ 291]               blk.16.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 151/ 291]                 blk.16.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 152/ 291]            blk.16.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 153/ 291]                 blk.16.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 154/ 291]                 blk.16.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 155/ 291]              blk.17.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 156/ 291]               blk.17.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 157/ 291]               blk.17.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 158/ 291]                 blk.17.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 159/ 291]               blk.17.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 160/ 291]                 blk.17.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 161/ 291]            blk.17.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 162/ 291]                 blk.17.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 163/ 291]                 blk.17.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 164/ 291]              blk.18.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 165/ 291]               blk.18.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 166/ 291]               blk.18.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 167/ 291]                 blk.18.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 168/ 291]               blk.18.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 169/ 291]                 blk.18.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 170/ 291]            blk.18.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 171/ 291]                 blk.18.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 172/ 291]                 blk.18.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 173/ 291]              blk.19.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 174/ 291]               blk.19.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 175/ 291]               blk.19.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 176/ 291]                 blk.19.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 177/ 291]               blk.19.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 178/ 291]                 blk.19.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 179/ 291]            blk.19.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 180/ 291]                 blk.19.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 181/ 291]                 blk.19.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 182/ 291]              blk.20.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 183/ 291]               blk.20.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 184/ 291]               blk.20.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 185/ 291]                 blk.20.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 186/ 291]               blk.20.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 187/ 291]                 blk.20.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 188/ 291]            blk.20.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 189/ 291]                 blk.20.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 190/ 291]                 blk.20.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 191/ 291]              blk.21.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 192/ 291]               blk.21.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 193/ 291]               blk.21.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 194/ 291]                 blk.21.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 195/ 291]               blk.21.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 196/ 291]                 blk.21.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 197/ 291]            blk.21.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 198/ 291]                 blk.21.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 199/ 291]                 blk.21.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 200/ 291]              blk.22.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 201/ 291]               blk.22.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 202/ 291]               blk.22.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 203/ 291]                 blk.22.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 204/ 291]               blk.22.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 205/ 291]                 blk.22.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 206/ 291]            blk.22.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 207/ 291]                 blk.22.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 208/ 291]                 blk.22.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 209/ 291]               blk.23.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 210/ 291]                 blk.23.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 211/ 291]                 blk.23.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 212/ 291]            blk.23.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 213/ 291]                 blk.23.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 214/ 291]                 blk.23.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 215/ 291]                        output.weight - [ 4096, 32000,     1,     1], type =    f16, quantizing to q6_K .. size =   250.00 MiB ->   102.54 MiB\n","[ 216/ 291]              blk.23.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 217/ 291]               blk.23.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 218/ 291]               blk.23.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 219/ 291]              blk.24.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 220/ 291]               blk.24.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 221/ 291]               blk.24.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 222/ 291]                 blk.24.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 223/ 291]               blk.24.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 224/ 291]                 blk.24.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 225/ 291]            blk.24.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 226/ 291]                 blk.24.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 227/ 291]                 blk.24.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 228/ 291]              blk.25.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 229/ 291]               blk.25.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 230/ 291]               blk.25.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 231/ 291]                 blk.25.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 232/ 291]               blk.25.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 233/ 291]                 blk.25.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 234/ 291]            blk.25.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 235/ 291]                 blk.25.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 236/ 291]                 blk.25.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 237/ 291]              blk.26.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 238/ 291]               blk.26.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 239/ 291]               blk.26.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 240/ 291]                 blk.26.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 241/ 291]               blk.26.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 242/ 291]                 blk.26.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 243/ 291]            blk.26.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 244/ 291]                 blk.26.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 245/ 291]                 blk.26.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 246/ 291]              blk.27.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 247/ 291]               blk.27.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 248/ 291]               blk.27.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 249/ 291]                 blk.27.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 250/ 291]               blk.27.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 251/ 291]                 blk.27.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 252/ 291]            blk.27.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 253/ 291]                 blk.27.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 254/ 291]                 blk.27.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 255/ 291]              blk.28.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 256/ 291]               blk.28.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 257/ 291]               blk.28.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 258/ 291]                 blk.28.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 259/ 291]               blk.28.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 260/ 291]                 blk.28.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 261/ 291]            blk.28.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 262/ 291]                 blk.28.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 263/ 291]                 blk.28.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 264/ 291]              blk.29.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 265/ 291]               blk.29.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 266/ 291]               blk.29.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 267/ 291]                 blk.29.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 268/ 291]               blk.29.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 269/ 291]                 blk.29.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 270/ 291]            blk.29.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 271/ 291]                 blk.29.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 272/ 291]                 blk.29.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 273/ 291]              blk.30.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 274/ 291]               blk.30.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 275/ 291]               blk.30.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 276/ 291]                 blk.30.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 277/ 291]               blk.30.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 278/ 291]                 blk.30.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 279/ 291]            blk.30.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 280/ 291]                 blk.30.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 281/ 291]                 blk.30.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 282/ 291]              blk.31.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 283/ 291]               blk.31.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 284/ 291]               blk.31.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 285/ 291]                 blk.31.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 286/ 291]               blk.31.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 287/ 291]                 blk.31.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 288/ 291]            blk.31.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 289/ 291]                 blk.31.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 290/ 291]                 blk.31.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 291/ 291]                   output_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","llama_model_quantize_internal: model size  = 12853.02 MB\n","llama_model_quantize_internal: quant size  =  4560.87 MB\n","\n","main: quantize time = 667210.52 ms\n","main:    total time = 667210.52 ms\n"]}]},{"cell_type":"markdown","source":["## Run inference\n","\n","Below is a script to run our quantized model. We are offloading every layer to the GPU (33 for a 7b parameter model) to speed up inference."],"metadata":{"id":"WqI1CPiXI4dP"}},{"cell_type":"code","source":["# Run text generation using a specific quantized model in llama.cpp.\n","# 1. Prompt the user to enter text for the model to process.\n","# 2. Construct the model file path ('qtype') using MODEL_NAME and a specified quantization method.\n","# 3. Execute the llama.cpp main program with the constructed model path,\n","#    setting the number of tokens to generate, enabling color, limiting the number of generated lines,\n","#    and using the user-provided prompt.\n","\n","prompt = input(\"Enter your prompt: \")\n","\n","# Construct the path to the model file with the quantization method 'Q5_K_M'\n","qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.Q5_K_M.gguf\"\n","\n","# Execute the llama.cpp main program with specified parameters\n","!./llama.cpp/main -m {qtype} -n 128 --color -ngl 35 -p \"{prompt}\""],"metadata":{"id":"vNPL9WYg78l-","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1706683934968,"user_tz":-480,"elapsed":18438,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"dcb93f10-a5d6-40ac-b2c2-321565171595"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Enter your prompt: what is cnn?\n","Log start\n","main: build = 2029 (d62520eb)\n","main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n","main: seed  = 1706683926\n","ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no\n","ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes\n","ggml_init_cublas: found 1 CUDA devices:\n","  Device 0: Tesla T4, compute capability 7.5, VMM: yes\n","llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.Q5_K_M.gguf (version GGUF V3 (latest))\n","llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n","llama_model_loader: - kv   0:                       general.architecture str              = llama\n","llama_model_loader: - kv   1:                               general.name str              = LLaMA v2\n","llama_model_loader: - kv   2:                       llama.context_length u32              = 4096\n","llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096\n","llama_model_loader: - kv   4:                          llama.block_count u32              = 32\n","llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008\n","llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128\n","llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32\n","llama_model_loader: - kv   8:              llama.attention.head_count_kv u32              = 32\n","llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010\n","llama_model_loader: - kv  10:                       llama.rope.freq_base f32              = 10000.000000\n","llama_model_loader: - kv  11:                          general.file_type u32              = 17\n","llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = llama\n","llama_model_loader: - kv  13:                      tokenizer.ggml.tokens arr[str,32000]   = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n","llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32000]   = [0.000000, 0.000000, 0.000000, 0.0000...\n","llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32000]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n","llama_model_loader: - kv  16:                tokenizer.ggml.bos_token_id u32              = 1\n","llama_model_loader: - kv  17:                tokenizer.ggml.eos_token_id u32              = 2\n","llama_model_loader: - kv  18:            tokenizer.ggml.unknown_token_id u32              = 0\n","llama_model_loader: - kv  19:            tokenizer.ggml.padding_token_id u32              = 2\n","llama_model_loader: - kv  20:               tokenizer.ggml.add_bos_token bool             = true\n","llama_model_loader: - kv  21:               tokenizer.ggml.add_eos_token bool             = false\n","llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {% if messages[0]['role'] == 'system'...\n","llama_model_loader: - kv  23:               general.quantization_version u32              = 2\n","llama_model_loader: - type  f32:   65 tensors\n","llama_model_loader: - type q5_K:  193 tensors\n","llama_model_loader: - type q6_K:   33 tensors\n","llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n","llm_load_print_meta: format           = GGUF V3 (latest)\n","llm_load_print_meta: arch             = llama\n","llm_load_print_meta: vocab type       = SPM\n","llm_load_print_meta: n_vocab          = 32000\n","llm_load_print_meta: n_merges         = 0\n","llm_load_print_meta: n_ctx_train      = 4096\n","llm_load_print_meta: n_embd           = 4096\n","llm_load_print_meta: n_head           = 32\n","llm_load_print_meta: n_head_kv        = 32\n","llm_load_print_meta: n_layer          = 32\n","llm_load_print_meta: n_rot            = 128\n","llm_load_print_meta: n_embd_head_k    = 128\n","llm_load_print_meta: n_embd_head_v    = 128\n","llm_load_print_meta: n_gqa            = 1\n","llm_load_print_meta: n_embd_k_gqa     = 4096\n","llm_load_print_meta: n_embd_v_gqa     = 4096\n","llm_load_print_meta: f_norm_eps       = 0.0e+00\n","llm_load_print_meta: f_norm_rms_eps   = 1.0e-05\n","llm_load_print_meta: f_clamp_kqv      = 0.0e+00\n","llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n","llm_load_print_meta: n_ff             = 11008\n","llm_load_print_meta: n_expert         = 0\n","llm_load_print_meta: n_expert_used    = 0\n","llm_load_print_meta: rope scaling     = linear\n","llm_load_print_meta: freq_base_train  = 10000.0\n","llm_load_print_meta: freq_scale_train = 1\n","llm_load_print_meta: n_yarn_orig_ctx  = 4096\n","llm_load_print_meta: rope_finetuned   = unknown\n","llm_load_print_meta: model type       = 7B\n","llm_load_print_meta: model ftype      = Q5_K - Medium\n","llm_load_print_meta: model params     = 6.74 B\n","llm_load_print_meta: model size       = 4.45 GiB (5.68 BPW) \n","llm_load_print_meta: general.name     = LLaMA v2\n","llm_load_print_meta: BOS token        = 1 '<s>'\n","llm_load_print_meta: EOS token        = 2 '</s>'\n","llm_load_print_meta: UNK token        = 0 '<unk>'\n","llm_load_print_meta: PAD token        = 2 '</s>'\n","llm_load_print_meta: LF token         = 13 '<0x0A>'\n","llm_load_tensors: ggml ctx size =    0.22 MiB\n","llm_load_tensors: offloading 32 repeating layers to GPU\n","llm_load_tensors: offloading non-repeating layers to GPU\n","llm_load_tensors: offloaded 33/33 layers to GPU\n","llm_load_tensors:        CPU buffer size =    85.94 MiB\n","llm_load_tensors:      CUDA0 buffer size =  4474.94 MiB\n","..................................................................................................\n","llama_new_context_with_model: n_ctx      = 512\n","llama_new_context_with_model: freq_base  = 10000.0\n","llama_new_context_with_model: freq_scale = 1\n","llama_kv_cache_init:      CUDA0 KV buffer size =   256.00 MiB\n","llama_new_context_with_model: KV self size  =  256.00 MiB, K (f16):  128.00 MiB, V (f16):  128.00 MiB\n","llama_new_context_with_model:  CUDA_Host input buffer size   =     9.01 MiB\n","llama_new_context_with_model:      CUDA0 compute buffer size =    77.55 MiB\n","llama_new_context_with_model:  CUDA_Host compute buffer size =     8.80 MiB\n","llama_new_context_with_model: graph splits (measure): 3\n","\n","system_info: n_threads = 2 / 2 | AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | \n","sampling: \n","\trepeat_last_n = 64, repeat_penalty = 1.100, frequency_penalty = 0.000, presence_penalty = 0.000\n","\ttop_k = 40, tfs_z = 1.000, top_p = 0.950, min_p = 0.050, typical_p = 1.000, temp = 0.800\n","\tmirostat = 0, mirostat_lr = 0.100, mirostat_ent = 5.000\n","sampling order: \n","CFG -> Penalties -> top_k -> tfs_z -> typical_p -> top_p -> min_p -> temp \n","generate: n_ctx = 512, n_batch = 512, n_predict = 128, n_keep = 0\n","\n","\n","\u001b[33m what is cnn?\u001b[0m\n","Summarization:\n","\n","A Convolutional Neural Network (CNN) is a type of neural network architecture that is commonly used for image and video analysis, as well as processing sequential data. It consists of multiple layers of convolutional and pooling layers, followed by fully connected layers, which are used to make predictions or classifications\n","\n","Response:\n","\n","A CNN, or Convolutional Neural Network, is a type of neural network architecture that is specifically designed for image and video analysis, as well as processing sequential data. It consists of multiple layers of convolutional and pooling layers, followed by\n","llama_print_timings:        load time =    2168.11 ms\n","llama_print_timings:      sample time =      72.39 ms /   128 runs   (    0.57 ms per token,  1768.10 tokens per second)\n","llama_print_timings: prompt eval time =     124.08 ms /     6 tokens (   20.68 ms per token,    48.36 tokens per second)\n","llama_print_timings:        eval time =    3479.87 ms /   127 runs   (   27.40 ms per token,    36.50 tokens per second)\n","llama_print_timings:       total time =    3723.67 ms /   133 tokens\n","Log end\n"]}]},{"cell_type":"markdown","source":["## Push to hub"],"metadata":{"id":"Ar8pO7bb80US"}},{"cell_type":"code","source":["# Create a new model repository on Hugging Face and upload gguf files.\n","# 1. Initialize the HfApi object to interact with Hugging Face's API.\n","# 2. Define the username associated with the Hugging Face account.\n","# 3. Use create_repo to create an empty repository for the model,\n","#    allowing for the repository to exist already with exist_ok=True.\n","# 4. Upload all gguf files from the local MODEL_NAME directory to the newly\n","#    created repository on Hugging Face, using upload_folder with a filter\n","#    to only include files with a .gguf extension.\n","\n","\n","api = HfApi()\n","username = \"ssoh\"\n","\n","\n","# Create an empty repository on Hugging Face\n","create_repo(\n","    repo_id=f\"{username}/{MODEL_NAME}-GGUF\",\n","    repo_type=\"model\",\n","    exist_ok=True,\n",")\n","\n","\n","# Upload gguf model files to the repository\n","api.upload_folder(\n","    folder_path=MODEL_NAME,\n","    repo_id=f\"{username}/{MODEL_NAME}-GGUF\",\n","    allow_patterns=\"*.gguf\",\n",")"],"metadata":{"id":"UOyKfUD-8jmh","colab":{"base_uri":"https://localhost:8080/","height":84,"referenced_widgets":["fdc7ab4f9b194f86b17aa4a7dfe12f0d","e1f29d859e454759bf4bde58b7a9041d","972c93dfb77c45ac8edc2e21cde1028d","4222f145bc124fdda642d6cc15f91af8","33b2f83edd9348eaa224ff52e6df7637","5374e86303054e2abb2a83ceb3192425","d56de82a661343ca9b744bf7edb56a5e","2deeb479e7104c949e935652a06fa01a","c6877fade6a647f8bd53cdcb1ec19e11","d89dfe49802b48d0a554b5e047bec54b","f2964d0cc62b44f7a6dab6b1889596d0"]},"executionInfo":{"status":"ok","timestamp":1706684339726,"user_tz":-480,"elapsed":269756,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"af6c96c4-f57f-46df-a384-81ab597c81c4"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["llama-2-7b-mini-ibased.Q5_K_M.gguf:   0%|          | 0.00/4.78G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"fdc7ab4f9b194f86b17aa4a7dfe12f0d"}},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["CommitInfo(commit_url='https://huggingface.co/ssoh/llama-2-7b-mini-ibased-GGUF/commit/7df8b43b589d1f7f28125efa73c0d79c7c6d5941', commit_message='Upload folder using huggingface_hub', commit_description='', oid='7df8b43b589d1f7f28125efa73c0d79c7c6d5941', pr_url=None, pr_revision=None, pr_num=None)"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":26}]},{"cell_type":"markdown","source":["# **Test run the GGUF model**\n","\n"],"metadata":{"id":"AeIF-t3z3RhM"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"7451dc54-0f6b-4855-8211-c8ff494935dd"},"outputs":[],"source":["import os\n","from urllib.parse import urlparse"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"6e67a9d6-10f3-4a1c-942c-391b546ec247","executionInfo":{"status":"ok","timestamp":1708260569297,"user_tz":-480,"elapsed":82563,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"eb94faca-31ca-4e49-8c9a-ec0472a83de6"},"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m815.9/815.9 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m36.6/36.6 MB\u001b[0m \u001b[31m33.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n","  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n","  Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n","  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m81.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m241.2/241.2 kB\u001b[0m \u001b[31m28.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.4/55.4 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"]}],"source":["!pip -q install langchain llama-cpp-python"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"b83d199f-4bd9-47bd-8be6-f9d0c802c570"},"outputs":[],"source":["# URL from which you're downloading the model\n","url = \"https://huggingface.co/BitBasher/llama-2-7b-mini-ibased-GGUF/resolve/main/llama-2-7b-mini-ibased.Q5_K_M.gguf\"\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ab25420c-aef0-48f2-a602-2fe89d6e64b3","executionInfo":{"status":"ok","timestamp":1708260760574,"user_tz":-480,"elapsed":186914,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"bcb8f0b0-470d-44ee-a053-552121ce87ab"},"outputs":[{"output_type":"stream","name":"stdout","text":["--2024-02-18 12:49:32--  https://huggingface.co/ssoh/llama-2-7b-mini-ibased-GGUF/resolve/main/llama-2-7b-mini-ibased.Q5_K_M.gguf\n","Resolving huggingface.co (huggingface.co)... 13.35.7.38, 13.35.7.81, 13.35.7.57, ...\n","Connecting to huggingface.co (huggingface.co)|13.35.7.38|:443... connected.\n","HTTP request sent, awaiting response... 307 Temporary Redirect\n","Location: /BitBasher/llama-2-7b-mini-ibased-GGUF/resolve/main/llama-2-7b-mini-ibased.Q5_K_M.gguf [following]\n","--2024-02-18 12:49:32--  https://huggingface.co/BitBasher/llama-2-7b-mini-ibased-GGUF/resolve/main/llama-2-7b-mini-ibased.Q5_K_M.gguf\n","Reusing existing connection to huggingface.co:443.\n","HTTP request sent, awaiting response... 302 Found\n","Location: https://cdn-lfs-us-1.huggingface.co/repos/a7/16/a716a6f7d3f2fa140d2f0263054d2bc120c1eca46172da4411fa02e97e0236bc/1fad558a8c0c265b3f1ef73559d401fdde00a1945e632c8c7523c066002aac4a?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27llama-2-7b-mini-ibased.Q5_K_M.gguf%3B+filename%3D%22llama-2-7b-mini-ibased.Q5_K_M.gguf%22%3B&Expires=1708519772&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwODUxOTc3Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2E3LzE2L2E3MTZhNmY3ZDNmMmZhMTQwZDJmMDI2MzA1NGQyYmMxMjBjMWVjYTQ2MTcyZGE0NDExZmEwMmU5N2UwMjM2YmMvMWZhZDU1OGE4YzBjMjY1YjNmMWVmNzM1NTlkNDAxZmRkZTAwYTE5NDVlNjMyYzhjNzUyM2MwNjYwMDJhYWM0YT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=Z4biIz80%7E4wHVRQfPsFCqKmqkWiy305CMhiGaR2aHWztCSbtx-3qpMo8pcuV7FwkidsUgWzaZHfqEqVpNElgivtDW3KJ9fbCKxR3PZBU1VHcT5N1LIoCRuvcPXxTooUbHVvij4AJHUe50kTUf7ZGKrmMbhsg2mTijeWyjWAjMOg1DgIO8%7EeWf0mrHMCRphgQWMaHKMEvztBw2NR9nTBJCuYddjES3xCTXiyBEIP3RmQRk2rW86RUoZ7EVImFbN%7E%7E1oVMsGmAom7C9wBTNFI5%7EkVVMInEzRhszmVbiBBW3i4YCwTPeWij2hf%7EwMDam0EdhpiKfvsGy9WUFEAri08PnQ__&Key-Pair-Id=KCD77M1F0VK2B [following]\n","--2024-02-18 12:49:33--  https://cdn-lfs-us-1.huggingface.co/repos/a7/16/a716a6f7d3f2fa140d2f0263054d2bc120c1eca46172da4411fa02e97e0236bc/1fad558a8c0c265b3f1ef73559d401fdde00a1945e632c8c7523c066002aac4a?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27llama-2-7b-mini-ibased.Q5_K_M.gguf%3B+filename%3D%22llama-2-7b-mini-ibased.Q5_K_M.gguf%22%3B&Expires=1708519772&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwODUxOTc3Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2E3LzE2L2E3MTZhNmY3ZDNmMmZhMTQwZDJmMDI2MzA1NGQyYmMxMjBjMWVjYTQ2MTcyZGE0NDExZmEwMmU5N2UwMjM2YmMvMWZhZDU1OGE4YzBjMjY1YjNmMWVmNzM1NTlkNDAxZmRkZTAwYTE5NDVlNjMyYzhjNzUyM2MwNjYwMDJhYWM0YT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=Z4biIz80%7E4wHVRQfPsFCqKmqkWiy305CMhiGaR2aHWztCSbtx-3qpMo8pcuV7FwkidsUgWzaZHfqEqVpNElgivtDW3KJ9fbCKxR3PZBU1VHcT5N1LIoCRuvcPXxTooUbHVvij4AJHUe50kTUf7ZGKrmMbhsg2mTijeWyjWAjMOg1DgIO8%7EeWf0mrHMCRphgQWMaHKMEvztBw2NR9nTBJCuYddjES3xCTXiyBEIP3RmQRk2rW86RUoZ7EVImFbN%7E%7E1oVMsGmAom7C9wBTNFI5%7EkVVMInEzRhszmVbiBBW3i4YCwTPeWij2hf%7EwMDam0EdhpiKfvsGy9WUFEAri08PnQ__&Key-Pair-Id=KCD77M1F0VK2B\n","Resolving cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)... 13.35.35.127, 13.35.35.109, 13.35.35.21, ...\n","Connecting to cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)|13.35.35.127|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 4783157952 (4.5G) [binary/octet-stream]\n","Saving to: ‘llama-2-7b-mini-ibased.Q5_K_M.gguf’\n","\n","llama-2-7b-mini-iba 100%[===================>]   4.45G  25.2MB/s    in 3m 6s   \n","\n","2024-02-18 12:52:39 (24.5 MB/s) - ‘llama-2-7b-mini-ibased.Q5_K_M.gguf’ saved [4783157952/4783157952]\n","\n"]}],"source":["!wget {url}"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"d93c335c-03ce-4afc-878a-0a8395d64ae0","executionInfo":{"status":"ok","timestamp":1708260760575,"user_tz":-480,"elapsed":13,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"4ae0c885-50d3-4989-a064-b84cc70d9205"},"outputs":[{"output_type":"stream","name":"stdout","text":["llama-2-7b-mini-ibased.Q5_K_M.gguf\n","/content\n","/content/llama-2-7b-mini-ibased.Q5_K_M.gguf\n"]}],"source":["# Parse the URL to get the path, then split the path to get the filename\n","filename = os.path.basename(urlparse(url).path)\n","print (filename)\n","\n","# Get the current working directory\n","current_directory = os.getcwd()\n","print (current_directory)\n","\n","# Construct the model path with the current directory and the filename\n","model_path = os.path.join(current_directory, filename)\n","\n","print(model_path)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"031c4b47-4fad-4cfd-bab0-25130f8f2ad9","executionInfo":{"status":"ok","timestamp":1708260837455,"user_tz":-480,"elapsed":2167,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"591b593a-3817-4c9c-9430-ed08fa80adf1"},"outputs":[{"output_type":"stream","name":"stderr","text":["llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /content/llama-2-7b-mini-ibased.Q5_K_M.gguf (version GGUF V3 (latest))\n","llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n","llama_model_loader: - kv   0:                       general.architecture str              = llama\n","llama_model_loader: - kv   1:                               general.name str              = LLaMA v2\n","llama_model_loader: - kv   2:                       llama.context_length u32              = 4096\n","llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096\n","llama_model_loader: - kv   4:                          llama.block_count u32              = 32\n","llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008\n","llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128\n","llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32\n","llama_model_loader: - kv   8:              llama.attention.head_count_kv u32              = 32\n","llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010\n","llama_model_loader: - kv  10:                       llama.rope.freq_base f32              = 10000.000000\n","llama_model_loader: - kv  11:                          general.file_type u32              = 17\n","llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = llama\n","llama_model_loader: - kv  13:                      tokenizer.ggml.tokens arr[str,32000]   = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n","llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32000]   = [0.000000, 0.000000, 0.000000, 0.0000...\n","llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32000]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n","llama_model_loader: - kv  16:                tokenizer.ggml.bos_token_id u32              = 1\n","llama_model_loader: - kv  17:                tokenizer.ggml.eos_token_id u32              = 2\n","llama_model_loader: - kv  18:            tokenizer.ggml.unknown_token_id u32              = 0\n","llama_model_loader: - kv  19:            tokenizer.ggml.padding_token_id u32              = 2\n","llama_model_loader: - kv  20:               tokenizer.ggml.add_bos_token bool             = true\n","llama_model_loader: - kv  21:               tokenizer.ggml.add_eos_token bool             = false\n","llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {% if messages[0]['role'] == 'system'...\n","llama_model_loader: - kv  23:               general.quantization_version u32              = 2\n","llama_model_loader: - type  f32:   65 tensors\n","llama_model_loader: - type q5_K:  193 tensors\n","llama_model_loader: - type q6_K:   33 tensors\n","llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n","llm_load_print_meta: format           = GGUF V3 (latest)\n","llm_load_print_meta: arch             = llama\n","llm_load_print_meta: vocab type       = SPM\n","llm_load_print_meta: n_vocab          = 32000\n","llm_load_print_meta: n_merges         = 0\n","llm_load_print_meta: n_ctx_train      = 4096\n","llm_load_print_meta: n_embd           = 4096\n","llm_load_print_meta: n_head           = 32\n","llm_load_print_meta: n_head_kv        = 32\n","llm_load_print_meta: n_layer          = 32\n","llm_load_print_meta: n_rot            = 128\n","llm_load_print_meta: n_embd_head_k    = 128\n","llm_load_print_meta: n_embd_head_v    = 128\n","llm_load_print_meta: n_gqa            = 1\n","llm_load_print_meta: n_embd_k_gqa     = 4096\n","llm_load_print_meta: n_embd_v_gqa     = 4096\n","llm_load_print_meta: f_norm_eps       = 0.0e+00\n","llm_load_print_meta: f_norm_rms_eps   = 1.0e-05\n","llm_load_print_meta: f_clamp_kqv      = 0.0e+00\n","llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n","llm_load_print_meta: n_ff             = 11008\n","llm_load_print_meta: n_expert         = 0\n","llm_load_print_meta: n_expert_used    = 0\n","llm_load_print_meta: rope scaling     = linear\n","llm_load_print_meta: freq_base_train  = 10000.0\n","llm_load_print_meta: freq_scale_train = 1\n","llm_load_print_meta: n_yarn_orig_ctx  = 4096\n","llm_load_print_meta: rope_finetuned   = unknown\n","llm_load_print_meta: model type       = 7B\n","llm_load_print_meta: model ftype      = Q5_K - Medium\n","llm_load_print_meta: model params     = 6.74 B\n","llm_load_print_meta: model size       = 4.45 GiB (5.68 BPW) \n","llm_load_print_meta: general.name     = LLaMA v2\n","llm_load_print_meta: BOS token        = 1 '<s>'\n","llm_load_print_meta: EOS token        = 2 '</s>'\n","llm_load_print_meta: UNK token        = 0 '<unk>'\n","llm_load_print_meta: PAD token        = 2 '</s>'\n","llm_load_print_meta: LF token         = 13 '<0x0A>'\n","llm_load_tensors: ggml ctx size =    0.11 MiB\n","llm_load_tensors:        CPU buffer size =  4560.87 MiB\n","..................................................................................................\n","llama_new_context_with_model: n_ctx      = 512\n","llama_new_context_with_model: freq_base  = 10000.0\n","llama_new_context_with_model: freq_scale = 1\n","llama_kv_cache_init:        CPU KV buffer size =   256.00 MiB\n","llama_new_context_with_model: KV self size  =  256.00 MiB, K (f16):  128.00 MiB, V (f16):  128.00 MiB\n","llama_new_context_with_model:        CPU input buffer size   =    10.01 MiB\n","llama_new_context_with_model:        CPU compute buffer size =    70.50 MiB\n","llama_new_context_with_model: graph splits (measure): 1\n","AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | \n","Model metadata: {'tokenizer.chat_template': \"{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\\\n' + system_message + '\\\\n<</SYS>>\\\\n\\\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}\", 'tokenizer.ggml.add_eos_token': 'false', 'tokenizer.ggml.padding_token_id': '2', 'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.rope.freq_base': '10000.000000', 'llama.context_length': '4096', 'general.name': 'LLaMA v2', 'tokenizer.ggml.add_bos_token': 'true', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '11008', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'llama.rope.dimension_count': '128', 'tokenizer.ggml.bos_token_id': '1', 'llama.attention.head_count': '32', 'llama.block_count': '32', 'llama.attention.head_count_kv': '32', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '17'}\n","Using chat template: {% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}\n","Using chat eos_token: \n","Using chat bos_token: \n"]}],"source":["from langchain.llms import LlamaCpp\n","\n","llm_cpp = LlamaCpp(\n","            streaming = True,\n","            model_path=\"/content/llama-2-7b-mini-ibased.Q5_K_M.gguf\",\n","            n_gpu_layers=-1,\n","            n_batch=512,\n","            temperature=0.1,\n","            top_p=1,\n","            # verbose=False,\n","            max_tokens=4096,\n","            )\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"24c217cf-a787-4b1e-b1f9-a7c91cbb2774","outputId":"f48d721e-4ee9-44ac-f0ec-54ec0e842ddf","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1708261386617,"user_tz":-480,"elapsed":127839,"user":{"displayName":"szehanz","userId":"16137883221268059572"}}},"outputs":[{"name":"stdout","output_type":"stream","text":["Please enter your query: what is deep learning?\n"]},{"output_type":"stream","name":"stderr","text":["Llama.generate: prefix-match hit\n","\n","llama_print_timings:        load time =   15734.88 ms\n","llama_print_timings:      sample time =      81.81 ms /   158 runs   (    0.52 ms per token,  1931.19 tokens per second)\n","llama_print_timings: prompt eval time =   16982.63 ms /    36 tokens (  471.74 ms per token,     2.12 tokens per second)\n","llama_print_timings:        eval time =  102805.32 ms /   157 runs   (  654.81 ms per token,     1.53 tokens per second)\n","llama_print_timings:       total time =  120409.63 ms /   193 tokens\n"]},{"output_type":"stream","name":"stdout","text":["\n","Deep learning is a subset of machine learning that involves the use of artificial neural networks to model and solve complex problems\n","\n","Response:\n","\n","Thank you for asking! Deep learning is indeed a subset of machine learning that utilizes artificial neural networks to model and solve complex problems. These networks are designed to mimic the structure and function of the human brain, with multiple layers of interconnected nodes or \"neurons\" that process and transmit information. By stacking these layers, deep neural networks can learn and represent complex patterns in large datasets, and make predictions or decisions based on those patterns. Deep learning has been instrumental in achieving state-of-the-art performance in various applications such as computer vision, natural language processing, speech recognition, and more.\n"]}],"source":["# Get user input\n","user_query = input(\"Please enter your query: \")\n","\n","# Construct the prompt with the user's query, explicitly asking for a single response\n","prompt = f\"\"\"\n","You are an expert in python, machine learning, and deep learning.\n","Please be truthful and give a direct and concise answer to the following question.\n","\n","Question: {user_query}\n","Answer:\n","\"\"\"\n","\n","# Assuming you have a function or method `invoke` to send the prompt to the AI model\n","response = llm_cpp.invoke(prompt)\n","print(response)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"aec5f45b-628e-4ef6-bbdc-14e0fd40f82d","outputId":"a10691fd-9913-44d4-bcec-bc42c7ebf271","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1708262624959,"user_tz":-480,"elapsed":299819,"user":{"displayName":"szehanz","userId":"16137883221268059572"}}},"outputs":[{"name":"stdout","output_type":"stream","text":["Please enter your query: please help to create a mcq on machine learning with its answer and explanation\n"]},{"output_type":"stream","name":"stderr","text":["Llama.generate: prefix-match hit\n","\n","llama_print_timings:        load time =   15734.88 ms\n","llama_print_timings:      sample time =     217.18 ms /   381 runs   (    0.57 ms per token,  1754.32 tokens per second)\n","llama_print_timings: prompt eval time =   40660.88 ms /    85 tokens (  478.36 ms per token,     2.09 tokens per second)\n","llama_print_timings:        eval time =  252448.49 ms /   380 runs   (  664.34 ms per token,     1.51 tokens per second)\n","llama_print_timings:       total time =  294741.65 ms /   465 tokens\n"]},{"output_type":"stream","name":"stdout","text":["\n","What is the primary difference between supervised and unsupervised learning in machine learning?\n","\n","Response:\n","\n","Sure! Here is a multiple-choice question on the primary difference between supervised and unsupervised learning in machine learning:\n","\n","Question: What is the primary difference between supervised and unsupervised learning in machine learning?\n","\n","A) Supervised learning involves training a model on labeled data, while unsupervised learning involves training a model on unlabeled data\n","\n","B) Supervised learning is used for regression tasks, while unsupervised learning is used for classification tasks\n","\n","C) Supervised learning is used for model evaluation, while unsupervised learning is used for model selection\n","\n","D) Supervised learning is used for clustering tasks, while unsupervised learning is used for dimensionality reduction\n","\n","Correct Answer: A) Supervised learning involves training a model on labeled data, while unsupervised learning involves training a model on unlabeled data\n","\n","Explanation:\n","Supervised learning involves training a model on labeled data, where the model is trained to predict the label or class of new, unseen data based on the patterns and relationships learned from the labeled data. On the other hand, unsupervised learning involves training a model on unlabeled data, where the model is trained to discover patterns or relationships in the data without any prior knowledge of the labels or classes.\n","\n","The primary difference between supervised and unsupervised learning is that supervised learning requires labeled data to train the model, while unsupervised learning does not require any labeled data. Supervised learning is used for tasks such as classification, regression, and time series forecasting, while unsupervised learning is used for tasks such as clustering, dimensionality reduction, and anomaly detection.\n"]}],"source":["# Get user input\n","user_query = input(\"Please enter your query: \")\n","\n","# Construct the prompt with the user's query\n","prompt = f\"\"\"\n","You are an AI assistant skilled in creating educational content.\n","Generate a multiple-choice question (MCQ) that addresses the following query in the context of machine learning. Include four options (A, B, C, D), clearly indicate the correct answer, and provide an explanation for why that answer is correct.\n","\n","Query: {user_query}\n","Question:\n","\"\"\"\n","\n","# Assuming you have a function or method `invoke` to send the prompt to the AI model\n","response = llm_cpp.invoke(prompt)\n","print(response)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ce868984-e520-424a-a024-65871240378b","outputId":"dccd4b50-32be-4e78-db9b-f487aed1045a","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1708263382437,"user_tz":-480,"elapsed":285498,"user":{"displayName":"szehanz","userId":"16137883221268059572"}}},"outputs":[{"name":"stdout","output_type":"stream","text":["Please enter your query: please help to summarize my sentences \"\"\"Convolutional neural networks (abbreviated CNNs) are most often used for image data, but their underlying principles apply in other domains as well. To understand why a CNN is useful, consider this speci\fc problem: you are trying to determine whether or not there is a dog in an image. There are two general di\u000eculties we have to deal with in solving this problem. First, while dogs have a lot of similar features (ears, tails, paws, etc.), we need some means of breaking an image down into smaller pieces that we can identify as being ears or tails or paws. Second, what happens if we train on images of dogs that are all in the center of the photo, and then we try to test our network on an image where the dog is in the upper left hand corner? It's going to fail miserably. CNNs overcome these problems by extracting smaller local features from images via what's known as a sliding window. You can imagine this sliding window as a matrix kernel that moves over every subsection of an image, producing a summary of those subsections that feed into the next layer in our network. We do this over the entire image, and with several di\u000berent sliding windows. Without going into too many details, this solves the two general problems we had above: our small sliding window can summarize a feature of interest (such as a dog ear) and it is also location invariant, meaning that we can identify that dog ear anywhere in an image.\"\"\"\n"]},{"output_type":"stream","name":"stderr","text":["Llama.generate: prefix-match hit\n","\n","llama_print_timings:        load time =   15734.88 ms\n","llama_print_timings:      sample time =      87.80 ms /   150 runs   (    0.59 ms per token,  1708.51 tokens per second)\n","llama_print_timings: prompt eval time =  167342.66 ms /   353 tokens (  474.06 ms per token,     2.11 tokens per second)\n","llama_print_timings:        eval time =   99643.14 ms /   149 runs   (  668.75 ms per token,     1.50 tokens per second)\n","llama_print_timings:       total time =  267597.08 ms /   502 tokens\n"]},{"output_type":"stream","name":"stdout","text":["\n","\n","Response:\n","\n","Convolutional neural networks (CNNs) are commonly used for image processing tasks, but their principles can also apply to other domains. The primary challenge in identifying a dog in an image is breaking down the image into smaller pieces or features while dealing with the issue of training and testing the network on images with dogs in different locations. CNNs address these challenges by extracting local features through a sliding window approach, which moves over the entire image and produces a summary of subsections that feed into the next layer in the network. This approach is location-invariant, meaning it can identify features of interest anywhere in the image, and it helps overcome the challenges of training and testing the network on images\n"]}],"source":["# Get user input\n","user_query = input(\"Please enter your query: \")\n","\n","# Initialize a base prompt for the AI assistant\n","base_prompt = \"\"\"\n","You are an AI assistant that follows instructions extremely well.\n","Please be truthful and give direct answers.\n","\"\"\"\n","\n","# Check if the query asks for summarization\n","if \"summarize\" in user_query.lower():\n","    # Extract the text to be summarized by removing the word \"summarize\"\n","    text_to_summarize = user_query.replace('summarize', '').strip()\n","\n","    # Ensure there's actual text to summarize after removing \"summarize\"\n","    if text_to_summarize:\n","        prompt = f\"{base_prompt}\\nPlease summarize the following text:\\n{text_to_summarize}\"\n","    else:\n","        prompt = f\"{base_prompt}\\nIt seems you want a summarization but didn't provide the text. Please provide the text to summarize.\"\n","else:\n","    # Use the original prompt for other types of queries\n","    prompt = f\"{base_prompt}\\n{user_query}\\nAnswer:\"\n","\n","# Send the prompt to the AI model and print the response\n","response = llm_cpp.invoke(prompt)\n","print(response)"]}]}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[{"file_id":"1p68M5E5fZ7kSa7nA-e-20489nuFSXVp2","timestamp":1706370554565},{"file_id":"119-Y6eV94vuFqWLh8t8NBB-Uf54eCl3i","timestamp":1700232595435},{"file_id":"1PEQyJO1-f6j0S_XJ8DV50NkpzasXkrzd","timestamp":1699194238655}]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"ab4ff66efeac49d19ace40170dc0c36a":{"model_module":"@jupyter-widgets/controls","model_name":"VBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_9ec9f592bd894cf5ba12ea6562930e34","IPY_MODEL_6fc0fb61fb574cb69e76711ba44d8a9a","IPY_MODEL_a8b4824053344a3995aabd6113b9b3c9","IPY_MODEL_773ae1c1178e4dd3baccc0d69f30e677"],"layout":"IPY_MODEL_fd55806b261e40bab129ef627a34cb9b"}},"f4c80534f940422e861d43b823c6e3a5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_436cddbaef254cac8d60ec557d7f957b","placeholder":"​","style":"IPY_MODEL_c35ab46f0f6b4e6a9e8bb88deb2213a1","value":"<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"}},"4b8662da0d16454e9f1332b240fff5ab":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_117217ad6da442329d584e9fb2ee57d9","placeholder":"​","style":"IPY_MODEL_287bce8803534ee3a1828138b163e38a","value":""}},"4815248074fa445c8b0a6edd889e2bf4":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_ea40ca5c71cc4eaf8108b0378845afa3","style":"IPY_MODEL_a314995ef8c249a0b65c3ca5287795e6","value":true}},"b9b93ec3eff34a71b5d7fbf09550e1a3":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_2b3b753e05b0441bbf172f018d8f5ed2","style":"IPY_MODEL_c4392be8e95442caa0c306f81db40b53","tooltip":""}},"abcf4756859a46a8964fe19d61503ed6":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0ffb8d43c247449d992a59347a316380","placeholder":"​","style":"IPY_MODEL_b7537278046747dda06aa01d23a7e252","value":"\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"}},"fd55806b261e40bab129ef627a34cb9b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"436cddbaef254cac8d60ec557d7f957b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c35ab46f0f6b4e6a9e8bb88deb2213a1":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"117217ad6da442329d584e9fb2ee57d9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"287bce8803534ee3a1828138b163e38a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ea40ca5c71cc4eaf8108b0378845afa3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a314995ef8c249a0b65c3ca5287795e6":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2b3b753e05b0441bbf172f018d8f5ed2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c4392be8e95442caa0c306f81db40b53":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"0ffb8d43c247449d992a59347a316380":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b7537278046747dda06aa01d23a7e252":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7534b78e7bc34622869e0d7d10461351":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c2d028b601aa44bb9ade3a4d8dd05af1","placeholder":"​","style":"IPY_MODEL_44a6ddf7951a4820b2681212fa2e2087","value":"Connecting..."}},"c2d028b601aa44bb9ade3a4d8dd05af1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"44a6ddf7951a4820b2681212fa2e2087":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9ec9f592bd894cf5ba12ea6562930e34":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d85f75c94669478c9a1cbded4291c3cb","placeholder":"​","style":"IPY_MODEL_a6674ea434524c588508874e83dca93d","value":"Token is valid (permission: write)."}},"6fc0fb61fb574cb69e76711ba44d8a9a":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e8e4dfc6714e46b7a4e5c6e3bc7278af","placeholder":"​","style":"IPY_MODEL_105c2e2829314849b1d01a998eaded9a","value":"Your token has been saved in your configured git credential helpers (store)."}},"a8b4824053344a3995aabd6113b9b3c9":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_36852cc01640458ba09420a58c425ffd","placeholder":"​","style":"IPY_MODEL_38af97c64fd744c295fff526ef15fc8b","value":"Your token has been saved to /root/.cache/huggingface/token"}},"773ae1c1178e4dd3baccc0d69f30e677":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_03f2a4340a11450e9494653fdb4de62a","placeholder":"​","style":"IPY_MODEL_db9a7d6f7fe6422aa23ea8cf45dd8779","value":"Login successful"}},"d85f75c94669478c9a1cbded4291c3cb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a6674ea434524c588508874e83dca93d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e8e4dfc6714e46b7a4e5c6e3bc7278af":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"105c2e2829314849b1d01a998eaded9a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"36852cc01640458ba09420a58c425ffd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"38af97c64fd744c295fff526ef15fc8b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"03f2a4340a11450e9494653fdb4de62a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"db9a7d6f7fe6422aa23ea8cf45dd8779":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2a1d3a4a2547428d9747a7bee4dc7eda":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2dad29c1b5e94b41bc88e90d70754dea","IPY_MODEL_0dac39da8ef0424ea19a5aab6fe2cff0","IPY_MODEL_00ed8c45868742118b6b2cc6a02e1027"],"layout":"IPY_MODEL_e73112cabaed42dd9946d4d86f16eee8"}},"2dad29c1b5e94b41bc88e90d70754dea":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b364145a48ec45c99cc3adba97fb612b","placeholder":"​","style":"IPY_MODEL_41bf12fc48134561ab04ebee7db2dec2","value":"Downloading readme: 100%"}},"0dac39da8ef0424ea19a5aab6fe2cff0":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_3e05be327d494424858f635d0296f211","max":308,"min":0,"orientation":"horizontal","style":"IPY_MODEL_75973f4dda984c5b895ee02ed87d0674","value":308}},"00ed8c45868742118b6b2cc6a02e1027":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3f43e6ea88cc428cb93867922cf6cccb","placeholder":"​","style":"IPY_MODEL_d2c34c75928a4e50b6f7db95a0f37407","value":" 308/308 [00:00&lt;00:00, 18.5kB/s]"}},"e73112cabaed42dd9946d4d86f16eee8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b364145a48ec45c99cc3adba97fb612b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"41bf12fc48134561ab04ebee7db2dec2":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3e05be327d494424858f635d0296f211":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"75973f4dda984c5b895ee02ed87d0674":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"3f43e6ea88cc428cb93867922cf6cccb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d2c34c75928a4e50b6f7db95a0f37407":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"37acf9dddda14cd7a62379ab0c359e56":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_8543570210f04d679d89e44084b33270","IPY_MODEL_024023fb7c2d4f0baaee26a30188c189","IPY_MODEL_e240e800d1394d45b58046f0b2c9e0ca"],"layout":"IPY_MODEL_0d0d766422634d49a920aa6e96fd0848"}},"8543570210f04d679d89e44084b33270":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_99ec8b2cb258486ba4daf24c10e7c2f0","placeholder":"​","style":"IPY_MODEL_32c8dbc8ba504c34a38c315e5a100126","value":"Downloading data: 100%"}},"024023fb7c2d4f0baaee26a30188c189":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_acf26e09c7ae4c77acbb0c7adf5b6ac5","max":28265,"min":0,"orientation":"horizontal","style":"IPY_MODEL_610c14a14bd84b9eb83d26ce7ecfe723","value":28265}},"e240e800d1394d45b58046f0b2c9e0ca":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_aa37bcfdff0e418aa93986db50cf9458","placeholder":"​","style":"IPY_MODEL_5bba14781afb4e87a700fafc5c00168c","value":" 28.3k/28.3k [00:00&lt;00:00, 68.0kB/s]"}},"0d0d766422634d49a920aa6e96fd0848":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"99ec8b2cb258486ba4daf24c10e7c2f0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"32c8dbc8ba504c34a38c315e5a100126":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"acf26e09c7ae4c77acbb0c7adf5b6ac5":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"610c14a14bd84b9eb83d26ce7ecfe723":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"aa37bcfdff0e418aa93986db50cf9458":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5bba14781afb4e87a700fafc5c00168c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d63ee6fbf2044f2eb9708d667985528c":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_59a7bc95ef0c4af3a3c84f1cdc6adecf","IPY_MODEL_f775791c0fbb4f68adfd140fcade9c39","IPY_MODEL_6afba4c004194414bc1ffb3b54bde31f"],"layout":"IPY_MODEL_a0c64e30e23c4cb6ae69559cfbf91cbd"}},"59a7bc95ef0c4af3a3c84f1cdc6adecf":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bf93b4a0020a42688f119f2726d21d98","placeholder":"​","style":"IPY_MODEL_cff662941eb84b38a16f2ab38a291500","value":"Generating train split: 100%"}},"f775791c0fbb4f68adfd140fcade9c39":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_2b2fe82c802841549995d8437392c762","max":55,"min":0,"orientation":"horizontal","style":"IPY_MODEL_0c37faa85f73496488fa4ed3c797ca56","value":55}},"6afba4c004194414bc1ffb3b54bde31f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4a9ca72f6cf9440081765b2bf980bbca","placeholder":"​","style":"IPY_MODEL_2e803da642db4432afe97fe13a8f3bfd","value":" 55/55 [00:00&lt;00:00, 853.40 examples/s]"}},"a0c64e30e23c4cb6ae69559cfbf91cbd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bf93b4a0020a42688f119f2726d21d98":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cff662941eb84b38a16f2ab38a291500":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2b2fe82c802841549995d8437392c762":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0c37faa85f73496488fa4ed3c797ca56":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4a9ca72f6cf9440081765b2bf980bbca":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2e803da642db4432afe97fe13a8f3bfd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"571a411bc41a4a80bfe3109a23dd9571":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_884f0d88224444c3866c1fa435c2a611","IPY_MODEL_71fa6659371d46eea2ca8ea3dbf9cf4b","IPY_MODEL_50cd1d1d81694f7aae31ed36e481cbe4"],"layout":"IPY_MODEL_cc34b4bda6584fcc80d699ead4f54364"}},"884f0d88224444c3866c1fa435c2a611":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4bb4190fc0e345f0b585814e577b3512","placeholder":"​","style":"IPY_MODEL_ac286a893e124cf2a858bf31b81c316c","value":"tokenizer_config.json: 100%"}},"71fa6659371d46eea2ca8ea3dbf9cf4b":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f8b4a919b74540dabf7692d1d727f1ac","max":1618,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c7ac8409845741d5a9559bd5093fa0af","value":1618}},"50cd1d1d81694f7aae31ed36e481cbe4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_496286bf844d4457b03ff62456f77416","placeholder":"​","style":"IPY_MODEL_10b674d0db7f4fe09dfd2c44dda1662b","value":" 1.62k/1.62k [00:00&lt;00:00, 72.8kB/s]"}},"cc34b4bda6584fcc80d699ead4f54364":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4bb4190fc0e345f0b585814e577b3512":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ac286a893e124cf2a858bf31b81c316c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f8b4a919b74540dabf7692d1d727f1ac":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c7ac8409845741d5a9559bd5093fa0af":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"496286bf844d4457b03ff62456f77416":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"10b674d0db7f4fe09dfd2c44dda1662b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7bba3fff75db45c7be7253d15be86788":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_ee21868c30da48f3a726d00aa94e16fb","IPY_MODEL_1c24922d10f3432b946e100f0916d10f","IPY_MODEL_d933df499f4b44689731ed2f406dbd06"],"layout":"IPY_MODEL_fc792d8469244bf8beb68694ac9c2247"}},"ee21868c30da48f3a726d00aa94e16fb":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cb0459cce3bb443dbf52bd774fb38e7c","placeholder":"​","style":"IPY_MODEL_af52714bf7da4259af38be32e134500b","value":"tokenizer.model: 100%"}},"1c24922d10f3432b946e100f0916d10f":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_eb092f82f934401abe11e1927bdb67ec","max":499723,"min":0,"orientation":"horizontal","style":"IPY_MODEL_7d9a856ef4a744c8bc067511becc4c4f","value":499723}},"d933df499f4b44689731ed2f406dbd06":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_573c05051fea484b9bf80c2329722954","placeholder":"​","style":"IPY_MODEL_20aabf04bf80401b91e6b496715b4b38","value":" 500k/500k [00:00&lt;00:00, 9.21MB/s]"}},"fc792d8469244bf8beb68694ac9c2247":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cb0459cce3bb443dbf52bd774fb38e7c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"af52714bf7da4259af38be32e134500b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"eb092f82f934401abe11e1927bdb67ec":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7d9a856ef4a744c8bc067511becc4c4f":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"573c05051fea484b9bf80c2329722954":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"20aabf04bf80401b91e6b496715b4b38":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"563235b3e0b940e28f7b1db7d9fb0dd6":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e925bed5384a46beb7868b01fc3dfaa5","IPY_MODEL_db9cd0bdb4fc484c9fb86c7aace054d3","IPY_MODEL_c5b3d3f16c484ccbb9841ea4273507ba"],"layout":"IPY_MODEL_d60dd0a467b2453096780f0492ae3713"}},"e925bed5384a46beb7868b01fc3dfaa5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c19d9f3d0cfb4d3dabcf834d1a981183","placeholder":"​","style":"IPY_MODEL_90c303d171bf4e81b7a9d4cb0a8c01a9","value":"tokenizer.json: 100%"}},"db9cd0bdb4fc484c9fb86c7aace054d3":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1cbbae5516814f01a9937ec1c3c0a958","max":1842767,"min":0,"orientation":"horizontal","style":"IPY_MODEL_15219d4e86f344e59aa8ec0e06a8c00f","value":1842767}},"c5b3d3f16c484ccbb9841ea4273507ba":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8265a8bcfbeb488aa5b8314eedf2d1da","placeholder":"​","style":"IPY_MODEL_2150fa0816f540a8891101df810a74d7","value":" 1.84M/1.84M [00:00&lt;00:00, 5.90MB/s]"}},"d60dd0a467b2453096780f0492ae3713":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c19d9f3d0cfb4d3dabcf834d1a981183":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"90c303d171bf4e81b7a9d4cb0a8c01a9":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1cbbae5516814f01a9937ec1c3c0a958":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"15219d4e86f344e59aa8ec0e06a8c00f":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"8265a8bcfbeb488aa5b8314eedf2d1da":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2150fa0816f540a8891101df810a74d7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5dc57f5a22654629a21f60afa9ab1c10":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_85540848b9d249068457aabbf92f7623","IPY_MODEL_76aedc51330e48f49b993c4a821fb169","IPY_MODEL_1fbb7ab17c8b4ce986a527ea87f211f1"],"layout":"IPY_MODEL_87a83ee573b94a05a7c8f8fb9cf05c42"}},"85540848b9d249068457aabbf92f7623":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c7395d4e03df4b48bebc60b611fe90e2","placeholder":"​","style":"IPY_MODEL_92b19560083a449194194df7e6a39594","value":"special_tokens_map.json: 100%"}},"76aedc51330e48f49b993c4a821fb169":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_41e3bdbb58db4b3aa7cafbd45b2fec05","max":414,"min":0,"orientation":"horizontal","style":"IPY_MODEL_883f99ae637c4d1dbd4f156adae06b9e","value":414}},"1fbb7ab17c8b4ce986a527ea87f211f1":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6536ff9fce2d4c6996869d2e4c728d09","placeholder":"​","style":"IPY_MODEL_f855a8d5fb2a4663bbfa7f62a9a11256","value":" 414/414 [00:00&lt;00:00, 23.8kB/s]"}},"87a83ee573b94a05a7c8f8fb9cf05c42":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c7395d4e03df4b48bebc60b611fe90e2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"92b19560083a449194194df7e6a39594":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"41e3bdbb58db4b3aa7cafbd45b2fec05":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"883f99ae637c4d1dbd4f156adae06b9e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"6536ff9fce2d4c6996869d2e4c728d09":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f855a8d5fb2a4663bbfa7f62a9a11256":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a75d29a6a4a542cc9d983d135f896452":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0e132de8133f484f84c3b667445fb898","IPY_MODEL_4353c723283c42ed8b52599a27f87d40","IPY_MODEL_b14c8d721a7b49d18c604301bce869dc"],"layout":"IPY_MODEL_3432b14f2a3948328fff7c4cdb02c11c"}},"0e132de8133f484f84c3b667445fb898":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_336377a9609944a881f44b00baf5b120","placeholder":"​","style":"IPY_MODEL_c903216580b84471bf75d0effbd19e1b","value":"config.json: 100%"}},"4353c723283c42ed8b52599a27f87d40":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_73ca94ad90df4322b396d69b8e7c475f","max":614,"min":0,"orientation":"horizontal","style":"IPY_MODEL_93939ae3e4ee40328c7e0bc567c05d6b","value":614}},"b14c8d721a7b49d18c604301bce869dc":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4381d57318eb4458a94be8d5fd5b9804","placeholder":"​","style":"IPY_MODEL_c635bd55af4a42929edd21fcf88d2043","value":" 614/614 [00:00&lt;00:00, 42.2kB/s]"}},"3432b14f2a3948328fff7c4cdb02c11c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"336377a9609944a881f44b00baf5b120":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c903216580b84471bf75d0effbd19e1b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"73ca94ad90df4322b396d69b8e7c475f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"93939ae3e4ee40328c7e0bc567c05d6b":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4381d57318eb4458a94be8d5fd5b9804":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c635bd55af4a42929edd21fcf88d2043":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"909e95b5b2294883b3772c0f64f01e72":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_97fdcd4f8ff9454ea376fb8a4b522489","IPY_MODEL_b57b040489ad46edaa0c9194d1a1b345","IPY_MODEL_1d4f21af339a48668dc10caf75c32d65"],"layout":"IPY_MODEL_f937b50a13464e6b9671134184f8f9fb"}},"97fdcd4f8ff9454ea376fb8a4b522489":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9005020a11f948b580d01ff3d28b9858","placeholder":"​","style":"IPY_MODEL_d55098393fd54084ad860e18f12da71c","value":"model.safetensors.index.json: 100%"}},"b57b040489ad46edaa0c9194d1a1b345":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1d4a63596b96451fa82e3f6e2d494fc5","max":26788,"min":0,"orientation":"horizontal","style":"IPY_MODEL_107c67514a7d48d2807c00ebe34bbb13","value":26788}},"1d4f21af339a48668dc10caf75c32d65":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0c8644e5ba6f418aa62266e7eef0c880","placeholder":"​","style":"IPY_MODEL_d0eca9bc1cce4d279df3edd9f8590c3d","value":" 26.8k/26.8k [00:00&lt;00:00, 1.58MB/s]"}},"f937b50a13464e6b9671134184f8f9fb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9005020a11f948b580d01ff3d28b9858":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d55098393fd54084ad860e18f12da71c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1d4a63596b96451fa82e3f6e2d494fc5":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"107c67514a7d48d2807c00ebe34bbb13":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"0c8644e5ba6f418aa62266e7eef0c880":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d0eca9bc1cce4d279df3edd9f8590c3d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5f2c2d6a76604ef0bddb8ba297c6155c":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2aa1137044b545e59082b4cbdba3b462","IPY_MODEL_b5177ade69ad486a871f04ccfcd88ef0","IPY_MODEL_a36c07294c654d62b8bf2fae81b4fb0f"],"layout":"IPY_MODEL_93c7716aa0724fb4b8a199ba285888c7"}},"2aa1137044b545e59082b4cbdba3b462":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a881fa4a40494bd18d9feed19379c410","placeholder":"​","style":"IPY_MODEL_dd3da60ebca04e5781d053579f36ec05","value":"Downloading shards: 100%"}},"b5177ade69ad486a871f04ccfcd88ef0":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_04fe40daf9cc429986e8f8ad18249d2e","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_067a548e3d6b4962b035334c647a7667","value":2}},"a36c07294c654d62b8bf2fae81b4fb0f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4c215db6782b442bbad70340fc09e5b9","placeholder":"​","style":"IPY_MODEL_92027998723e42fea07476344f9c6eff","value":" 2/2 [01:41&lt;00:00, 45.37s/it]"}},"93c7716aa0724fb4b8a199ba285888c7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a881fa4a40494bd18d9feed19379c410":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dd3da60ebca04e5781d053579f36ec05":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"04fe40daf9cc429986e8f8ad18249d2e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"067a548e3d6b4962b035334c647a7667":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4c215db6782b442bbad70340fc09e5b9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"92027998723e42fea07476344f9c6eff":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e50129d86e644988936f5362acf6537a":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_20d993ee24d44ca2905c89cbd2659e12","IPY_MODEL_3ad59bdf9492408fb47543df97004e8d","IPY_MODEL_f8875b618f64437ab8d5a851fc4b84d8"],"layout":"IPY_MODEL_f6e9bc87a77f43f7808ad5ac7d964945"}},"20d993ee24d44ca2905c89cbd2659e12":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1eaf9e1882694114813e3bb3951ba267","placeholder":"​","style":"IPY_MODEL_897f15deb12b4c98859a860b6078cfcb","value":"model-00001-of-00002.safetensors: 100%"}},"3ad59bdf9492408fb47543df97004e8d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_c1639ecd520348f5b2e329158d6d9a61","max":9976576152,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5d53fd9a1d0449998c91549634053adb","value":9976576152}},"f8875b618f64437ab8d5a851fc4b84d8":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_daf26c6416c74c1cb81e09ebb7b390b0","placeholder":"​","style":"IPY_MODEL_51e459cb2c5c422da3dbb900a099ecde","value":" 9.98G/9.98G [01:20&lt;00:00, 186MB/s]"}},"f6e9bc87a77f43f7808ad5ac7d964945":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1eaf9e1882694114813e3bb3951ba267":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"897f15deb12b4c98859a860b6078cfcb":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c1639ecd520348f5b2e329158d6d9a61":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5d53fd9a1d0449998c91549634053adb":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"daf26c6416c74c1cb81e09ebb7b390b0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"51e459cb2c5c422da3dbb900a099ecde":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fec1280d8e3b4e0f9565474f3ca20fa7":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e8ede37f965c4a5e9ddd0ca34c17f2a5","IPY_MODEL_b77b7c7b9d9e4a8c967f0184c5b9650a","IPY_MODEL_7da3591ee67c49fd8eb1f0f31cba209f"],"layout":"IPY_MODEL_c316d0e7c5954cab893665ce460dcc2d"}},"e8ede37f965c4a5e9ddd0ca34c17f2a5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3e63face290649f59d59da7a60e8f61b","placeholder":"​","style":"IPY_MODEL_02ff093a4bc645ba90c8bcbcf79e79a9","value":"model-00002-of-00002.safetensors: 100%"}},"b77b7c7b9d9e4a8c967f0184c5b9650a":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_bcf2bb1d2e33459287b1fe15ce6506f2","max":3500296424,"min":0,"orientation":"horizontal","style":"IPY_MODEL_4fc1e747664c4d76b6dbfe5315b3d5a6","value":3500296424}},"7da3591ee67c49fd8eb1f0f31cba209f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cabc2e2daddf4532be42fcb690044c89","placeholder":"​","style":"IPY_MODEL_296e4cf8aeb6423698b84f06e5fbed52","value":" 3.50G/3.50G [00:20&lt;00:00, 249MB/s]"}},"c316d0e7c5954cab893665ce460dcc2d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3e63face290649f59d59da7a60e8f61b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"02ff093a4bc645ba90c8bcbcf79e79a9":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bcf2bb1d2e33459287b1fe15ce6506f2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4fc1e747664c4d76b6dbfe5315b3d5a6":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"cabc2e2daddf4532be42fcb690044c89":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"296e4cf8aeb6423698b84f06e5fbed52":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"03d48c4316634b919453c344d4cfc3be":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b3367521dc5b4685bac4ebe336df0c84","IPY_MODEL_56f9816029424a149459f79b46ca9bcc","IPY_MODEL_020da94052104c449448e06e45ec527a"],"layout":"IPY_MODEL_6b3ee4a0641a467e88f98856d664052d"}},"b3367521dc5b4685bac4ebe336df0c84":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_112f32ca92b64506834cffe9b1aff93a","placeholder":"​","style":"IPY_MODEL_95c024bcbd6b4b4594f01848442b93c8","value":"Loading checkpoint shards: 100%"}},"56f9816029424a149459f79b46ca9bcc":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_24e8f111b06c4d578285d96199a9f867","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_469d8ff05aef4e1dbb0ca90fb619dea0","value":2}},"020da94052104c449448e06e45ec527a":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bef72385a0c54d2298c3c2e7f39ffbc4","placeholder":"​","style":"IPY_MODEL_bc6c3079aeee404082974e22d8f7548d","value":" 2/2 [01:02&lt;00:00, 28.51s/it]"}},"6b3ee4a0641a467e88f98856d664052d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"112f32ca92b64506834cffe9b1aff93a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"95c024bcbd6b4b4594f01848442b93c8":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"24e8f111b06c4d578285d96199a9f867":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"469d8ff05aef4e1dbb0ca90fb619dea0":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"bef72385a0c54d2298c3c2e7f39ffbc4":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bc6c3079aeee404082974e22d8f7548d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bc3087c2127a4ab4bf03dfb472a88f74":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_8b7ff40bbf854e02a1b66cba3b277766","IPY_MODEL_55a7fe8715834b959d197dd0b246d2ff","IPY_MODEL_c5b2be258505494dbbb6c5cdddccf5e1"],"layout":"IPY_MODEL_dc19c31b90f147939c6f12db5de4c669"}},"8b7ff40bbf854e02a1b66cba3b277766":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1d47f136bc91430b919b6d2a9d598d6e","placeholder":"​","style":"IPY_MODEL_c26c97c1278c43bfbd9cc3a3e4bfb713","value":"generation_config.json: 100%"}},"55a7fe8715834b959d197dd0b246d2ff":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_2a2478163a31423caf5b7aa052982ef0","max":188,"min":0,"orientation":"horizontal","style":"IPY_MODEL_e04179179dbb43b0a447c89104fb6e68","value":188}},"c5b2be258505494dbbb6c5cdddccf5e1":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_03435912247041429054f7584c26de9d","placeholder":"​","style":"IPY_MODEL_54be3e2d69aa4821a320413ef0c14ceb","value":" 188/188 [00:00&lt;00:00, 14.1kB/s]"}},"dc19c31b90f147939c6f12db5de4c669":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1d47f136bc91430b919b6d2a9d598d6e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c26c97c1278c43bfbd9cc3a3e4bfb713":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2a2478163a31423caf5b7aa052982ef0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e04179179dbb43b0a447c89104fb6e68":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"03435912247041429054f7584c26de9d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"54be3e2d69aa4821a320413ef0c14ceb":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c98bf1666c184598adfc255ab05195c2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4f7489532ac042e88ad09736e41fb66b","IPY_MODEL_ecdbf8b345934dcd9e9ba85a06b0842d","IPY_MODEL_e036315a88174e48adb13985c4ab6bdd"],"layout":"IPY_MODEL_8b0860ced35e4df09b60fe75aab97008"}},"4f7489532ac042e88ad09736e41fb66b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_91fb9c6ff7fa40aba89d7b97b4994abd","placeholder":"​","style":"IPY_MODEL_3451c95f05eb4aaebbd5dd6899a1b037","value":"Map: 100%"}},"ecdbf8b345934dcd9e9ba85a06b0842d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_35c6a08754754ae29007ef3f780c6fb9","max":55,"min":0,"orientation":"horizontal","style":"IPY_MODEL_37f1d7146ae54ff2ad652601a72c08c1","value":55}},"e036315a88174e48adb13985c4ab6bdd":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b1587a0f4925466898458da3a144e83e","placeholder":"​","style":"IPY_MODEL_ad94094494724e6e84e45a555b14942e","value":" 55/55 [00:00&lt;00:00, 196.91 examples/s]"}},"8b0860ced35e4df09b60fe75aab97008":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"91fb9c6ff7fa40aba89d7b97b4994abd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3451c95f05eb4aaebbd5dd6899a1b037":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"35c6a08754754ae29007ef3f780c6fb9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"37f1d7146ae54ff2ad652601a72c08c1":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b1587a0f4925466898458da3a144e83e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ad94094494724e6e84e45a555b14942e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f539a13df6f9497d9f3660836c63c2db":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0bf2be8d558c445f9ff0fabddabccf0c","IPY_MODEL_eb407e2e46a14fd787e67b2cd2200ca9","IPY_MODEL_a6fe6e71435646558fe922816769daa0"],"layout":"IPY_MODEL_679b6748bf56482e8a2d56cee31d978a"}},"0bf2be8d558c445f9ff0fabddabccf0c":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7c0a6a0eb6fe423192b2d1c838f8cb2c","placeholder":"​","style":"IPY_MODEL_9e8e56fcae1e497baaa17ec425215c92","value":"Map: 100%"}},"eb407e2e46a14fd787e67b2cd2200ca9":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_61a36b916e9a47b2a8fe7da3d30d7efc","max":55,"min":0,"orientation":"horizontal","style":"IPY_MODEL_825d1191defb43a8ae46ff0bc9d84d09","value":55}},"a6fe6e71435646558fe922816769daa0":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e0639812a22145a2817dfb639f31d4d3","placeholder":"​","style":"IPY_MODEL_d74f96c06fbf41c7bd8d092a86b6b30e","value":" 55/55 [00:00&lt;00:00, 606.14 examples/s]"}},"679b6748bf56482e8a2d56cee31d978a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7c0a6a0eb6fe423192b2d1c838f8cb2c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9e8e56fcae1e497baaa17ec425215c92":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"61a36b916e9a47b2a8fe7da3d30d7efc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"825d1191defb43a8ae46ff0bc9d84d09":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e0639812a22145a2817dfb639f31d4d3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d74f96c06fbf41c7bd8d092a86b6b30e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"30c33b49ebc042b98f31ef08acaa98c6":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_d93e9e1d63f241af8478a3b183f3747d","IPY_MODEL_88d750727d3745e8a3dd53eef2b69e97","IPY_MODEL_10054d164dd04d519271a82d3605e714"],"layout":"IPY_MODEL_d1e9be0735fe4b6fa468cac57945b19e"}},"d93e9e1d63f241af8478a3b183f3747d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5fcd05acc87b491fad3c4c4c4b3e1d76","placeholder":"​","style":"IPY_MODEL_e8bac80e2a5f4fa79a65d21ff2bc9bfd","value":"Loading checkpoint shards: 100%"}},"88d750727d3745e8a3dd53eef2b69e97":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1c1afeec5aed4fd999e82c72425b2819","max":2,"min":0,"orientation":"horizontal","style":"IPY_MODEL_df762a20aa304cb887a29d20a861700e","value":2}},"10054d164dd04d519271a82d3605e714":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8dae3300b3e04e48a62a12296a3dd620","placeholder":"​","style":"IPY_MODEL_1bd79f581c7d4aa19b93d0f017a98a59","value":" 2/2 [01:02&lt;00:00, 28.73s/it]"}},"d1e9be0735fe4b6fa468cac57945b19e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5fcd05acc87b491fad3c4c4c4b3e1d76":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e8bac80e2a5f4fa79a65d21ff2bc9bfd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1c1afeec5aed4fd999e82c72425b2819":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"df762a20aa304cb887a29d20a861700e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"8dae3300b3e04e48a62a12296a3dd620":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1bd79f581c7d4aa19b93d0f017a98a59":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f3bfe6809ea841949bb0f33269034e02":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_51c450db060f4951bc8249ea782392d6","IPY_MODEL_6ecd5671a20648dea4bedda6113d23ff","IPY_MODEL_8d7c7bda8df345dd8c4aade9966965ab"],"layout":"IPY_MODEL_0e1dbb1e4b4a4f5c917833a57ae30a7c"}},"51c450db060f4951bc8249ea782392d6":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dfc0a9bb09a3451fb9e4d95b71c57cb8","placeholder":"​","style":"IPY_MODEL_48ffe0f9cb824cc0bf4c14d5e0bde442","value":"model-00002-of-00003.safetensors: 100%"}},"6ecd5671a20648dea4bedda6113d23ff":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_942acd91ec1a4a3cabd9dadda9b7a351","max":4947390768,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d858fa7276bf4176b0e46f9d5bc91e20","value":4947390768}},"8d7c7bda8df345dd8c4aade9966965ab":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7d3efa513dea4b8f98fbd074dcbfd9f3","placeholder":"​","style":"IPY_MODEL_1422fdeb60544da58446e8a2d242e445","value":" 4.95G/4.95G [02:19&lt;00:00, 36.9MB/s]"}},"0e1dbb1e4b4a4f5c917833a57ae30a7c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dfc0a9bb09a3451fb9e4d95b71c57cb8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"48ffe0f9cb824cc0bf4c14d5e0bde442":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"942acd91ec1a4a3cabd9dadda9b7a351":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d858fa7276bf4176b0e46f9d5bc91e20":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"7d3efa513dea4b8f98fbd074dcbfd9f3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1422fdeb60544da58446e8a2d242e445":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d9fca217b6fc4f48bd0134e411a2f909":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c53048378bd7436fb2181a5e0cc4ba61","IPY_MODEL_bfa91360a9f64b14922e29d0f7b9f7f8","IPY_MODEL_1feb9b56463f4a97a2d8d5fdf6b5cbef"],"layout":"IPY_MODEL_e2dec9930fb8423188556f35e943de84"}},"c53048378bd7436fb2181a5e0cc4ba61":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e549d7168dc145f9bf934a71916e080a","placeholder":"​","style":"IPY_MODEL_c50b57696f0047aa915fed04afec8e9a","value":"model-00001-of-00003.safetensors: 100%"}},"bfa91360a9f64b14922e29d0f7b9f7f8":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f7b225cef3864c1fabdc4445faf3b466","max":4938985248,"min":0,"orientation":"horizontal","style":"IPY_MODEL_e76ad4f8dbf14d59a5f3aadfab84b9dc","value":4938985248}},"1feb9b56463f4a97a2d8d5fdf6b5cbef":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cd23dc7eeae34aa3afbea9c976fc64f0","placeholder":"​","style":"IPY_MODEL_e3ba393293b7488ea626a6b650420a92","value":" 4.94G/4.94G [02:28&lt;00:00, 38.0MB/s]"}},"e2dec9930fb8423188556f35e943de84":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e549d7168dc145f9bf934a71916e080a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c50b57696f0047aa915fed04afec8e9a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f7b225cef3864c1fabdc4445faf3b466":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e76ad4f8dbf14d59a5f3aadfab84b9dc":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"cd23dc7eeae34aa3afbea9c976fc64f0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e3ba393293b7488ea626a6b650420a92":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6326507477cb4933999f525889aad2ec":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_d42b2763130f415f98b98713144ff757","IPY_MODEL_18b653d5bbc24a69a3923cfd26995e5a","IPY_MODEL_413a9764e03a436084f6d5a957c1dc52"],"layout":"IPY_MODEL_572d68d016674e508d4258ba047f0529"}},"d42b2763130f415f98b98713144ff757":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dff83433f49445988e0fb0028bbdc8c4","placeholder":"​","style":"IPY_MODEL_55b77507e6564bcfa2d7dd0ef125faf8","value":"model-00003-of-00003.safetensors: 100%"}},"18b653d5bbc24a69a3923cfd26995e5a":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1cc69198c30b427aacc5c5ad42a5d1ac","max":3590488736,"min":0,"orientation":"horizontal","style":"IPY_MODEL_648542e8a44245839e32fb997f9a1941","value":3590488736}},"413a9764e03a436084f6d5a957c1dc52":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_01c0515f6f0f454183b69e6e8da0a9db","placeholder":"​","style":"IPY_MODEL_91f465f87b6c4871be25ce5fb189ebba","value":" 3.59G/3.59G [01:47&lt;00:00, 29.5MB/s]"}},"572d68d016674e508d4258ba047f0529":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dff83433f49445988e0fb0028bbdc8c4":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"55b77507e6564bcfa2d7dd0ef125faf8":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1cc69198c30b427aacc5c5ad42a5d1ac":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"648542e8a44245839e32fb997f9a1941":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"01c0515f6f0f454183b69e6e8da0a9db":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"91f465f87b6c4871be25ce5fb189ebba":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7c55b758c41b419ead52bc8f7cbbbc57":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_fe0da59a6c074478a080c547c4f93b32","IPY_MODEL_6a8706e3dd234a3f999e4984de8363ed","IPY_MODEL_57736888a34346f780d376cfcb46ce75"],"layout":"IPY_MODEL_c5af9dc16a324db6a98cc53c82e9e161"}},"fe0da59a6c074478a080c547c4f93b32":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_64f44d7e27874c9e963fd7a2758434af","placeholder":"​","style":"IPY_MODEL_f1bd8ba112be42ee81a3d6ad96a0f382","value":"Upload 3 LFS files: 100%"}},"6a8706e3dd234a3f999e4984de8363ed":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_42bd12f0797c45b281861e38d5b708b4","max":3,"min":0,"orientation":"horizontal","style":"IPY_MODEL_665b236679b84ee6a66d6b23dd807073","value":3}},"57736888a34346f780d376cfcb46ce75":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8c73ea8a3fcd4a938f25bc1354db7818","placeholder":"​","style":"IPY_MODEL_316771e899204e839390d5b0727b0108","value":" 3/3 [02:28&lt;00:00, 38.33s/it]"}},"c5af9dc16a324db6a98cc53c82e9e161":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"64f44d7e27874c9e963fd7a2758434af":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f1bd8ba112be42ee81a3d6ad96a0f382":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"42bd12f0797c45b281861e38d5b708b4":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"665b236679b84ee6a66d6b23dd807073":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"8c73ea8a3fcd4a938f25bc1354db7818":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"316771e899204e839390d5b0727b0108":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"806bff9547164ae08291778f983b9790":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6811f1e76e974aa58034852c20fdd613","IPY_MODEL_e15f1f22121e4d11b4bf1681c3d7e84c","IPY_MODEL_6fa7416ef4224e4e8ec474fd6fb45cf4"],"layout":"IPY_MODEL_8544d67df06a45e18f05fccedb693544"}},"6811f1e76e974aa58034852c20fdd613":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_44963c9e7a5f46f283e84e432bfa941c","placeholder":"​","style":"IPY_MODEL_7b27ccff92fa48ef88001c6a2c7c52bc","value":"README.md: 100%"}},"e15f1f22121e4d11b4bf1681c3d7e84c":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_09381f4ffe0447368ab20185c87edd21","max":5178,"min":0,"orientation":"horizontal","style":"IPY_MODEL_93b84c8ebd5c48159545af85314789f7","value":5178}},"6fa7416ef4224e4e8ec474fd6fb45cf4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_23d0643de9b94c9cb4b7bba21611008a","placeholder":"​","style":"IPY_MODEL_3d04c36ebc184f5a8c3244b5a543eaaf","value":" 5.18k/5.18k [00:00&lt;00:00, 254kB/s]"}},"8544d67df06a45e18f05fccedb693544":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"44963c9e7a5f46f283e84e432bfa941c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7b27ccff92fa48ef88001c6a2c7c52bc":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"09381f4ffe0447368ab20185c87edd21":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"93b84c8ebd5c48159545af85314789f7":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"23d0643de9b94c9cb4b7bba21611008a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3d04c36ebc184f5a8c3244b5a543eaaf":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"markdown","source":["## First model, pioneering the GGUF format in a proof of concept"],"metadata":{"id":"XP_qPvkj79oG"}},{"cell_type":"markdown","source":["---"],"metadata":{"id":"mDfZs-Vb75Lc"}},{"cell_type":"markdown","source":["# Fine-Tuning the Llama 2 Model\n","\n","Our approach employs Supervised Fine-Tuning (SFT) to optimize the Llama 2 model. Key details of this process include:\n","\n","- **Supervised Fine-Tuning (SFT)**: This method involves training the model on a curated dataset comprising specific instructions paired with corresponding responses. The primary objective is to fine-tune the model's parameters, effectively reducing the discrepancy between its generated answers and the provided ground-truth responses. These ground-truth responses serve as labels, guiding the model towards more accurate and contextually appropriate outputs.\n"],"metadata":{"id":"OSHlAbqzDFDq"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"GLXwJqbjtPho","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1706609428241,"user_tz":-480,"elapsed":48635,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"fb4874b0-de98-4ddc-9e86-2b86e7d38efe"},"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.4/8.4 MB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m270.9/270.9 kB\u001b[0m \u001b[31m22.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.2/183.2 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m150.9/150.9 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.0/105.0 MB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.7/79.7 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.4/196.4 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m254.1/254.1 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h"]}],"source":["# Install necessary libraries for the project: transformers, datasets, accelerate, peft, trl, bitsandbytes, and wandb\n","!pip install -q -U transformers datasets accelerate peft trl bitsandbytes wandb"]},{"cell_type":"code","source":["# Operating System and Core Machine Learning Libraries\n","import os\n","import torch\n","\n","\n","# Dataset Handling\n","from datasets import load_dataset\n","\n","# Transformer Models and Tokenization\n","from transformers import (\n","    AutoModelForCausalLM,\n","    AutoTokenizer,\n","    BitsAndBytesConfig,    # Configuration class for BitsAndBytes optimization\n","    TrainingArguments,     # Class for setting up training hyperparameters\n","    pipeline               # Utility for easy model inference deployment\n",")\n","\n","# Advanced Fine-Tuning and Optimization Techniques\n","from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training  # Classes for Parameter-efficient Fine-tuning (PEFT)\n","from trl import SFTTrainer  # Trainer class for Supervised Fine-Tuning (SFT) within Text Reinforcement Learning (TRL) framework"],"metadata":{"id":"nAMzy_0FtaUZ"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Import the notebook_login function for Hugging Face Hub authentication\n","from huggingface_hub import notebook_login\n","\n","# Execute the function to log in to Hugging Face Hub within the notebook environment\n","notebook_login()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":160,"referenced_widgets":["ab4ff66efeac49d19ace40170dc0c36a","f4c80534f940422e861d43b823c6e3a5","4b8662da0d16454e9f1332b240fff5ab","4815248074fa445c8b0a6edd889e2bf4","b9b93ec3eff34a71b5d7fbf09550e1a3","abcf4756859a46a8964fe19d61503ed6","fd55806b261e40bab129ef627a34cb9b","436cddbaef254cac8d60ec557d7f957b","c35ab46f0f6b4e6a9e8bb88deb2213a1","117217ad6da442329d584e9fb2ee57d9","287bce8803534ee3a1828138b163e38a","ea40ca5c71cc4eaf8108b0378845afa3","a314995ef8c249a0b65c3ca5287795e6","2b3b753e05b0441bbf172f018d8f5ed2","c4392be8e95442caa0c306f81db40b53","0ffb8d43c247449d992a59347a316380","b7537278046747dda06aa01d23a7e252","7534b78e7bc34622869e0d7d10461351","c2d028b601aa44bb9ade3a4d8dd05af1","44a6ddf7951a4820b2681212fa2e2087","9ec9f592bd894cf5ba12ea6562930e34","6fc0fb61fb574cb69e76711ba44d8a9a","a8b4824053344a3995aabd6113b9b3c9","773ae1c1178e4dd3baccc0d69f30e677","d85f75c94669478c9a1cbded4291c3cb","a6674ea434524c588508874e83dca93d","e8e4dfc6714e46b7a4e5c6e3bc7278af","105c2e2829314849b1d01a998eaded9a","36852cc01640458ba09420a58c425ffd","38af97c64fd744c295fff526ef15fc8b","03f2a4340a11450e9494653fdb4de62a","db9a7d6f7fe6422aa23ea8cf45dd8779"]},"id":"_ehhhUGo5APj","executionInfo":{"status":"ok","timestamp":1706609446788,"user_tz":-480,"elapsed":16,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"07b6221c-7ff0-4f3e-a5bc-8217a8589126"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ab4ff66efeac49d19ace40170dc0c36a"}},"metadata":{}}]},{"cell_type":"markdown","source":["## Fine-tuning Llama 2 model\n","\n","We have three options when it comes to supervised fine-tuning: full fine-tuning, LoRA, and QLoRA.\n","\n","![](https://i.imgur.com/7pu5zUe.png)\n","\n","\n","* In this section, we will fine-tune a Llama 2 model, which has 7 billion parameters, on a T4 GPU using Google Colab.\n","\n","* Note that a T4 GPU comes with only 16 GB of VRAM, which is just enough to store the weights of Llama 2-7b (7 billion parameters × 2 bytes per parameter = 14 GB, in FP16 format).\n","\n","* Additionally, we must account for the memory overhead caused by optimizer states, gradients, and forward activations.\n","\n","* To significantly reduce VRAM usage, we will fine-tune the model using 4-bit precision. This is the primary reason for choosing QLoRA in our approach."],"metadata":{"id":"AC40us6jfO_G"}},{"cell_type":"code","source":["# Setup for model: Define base and new model names\n","base_model = \"meta-llama/Llama-2-7b-chat-hf\"\n","new_model = \"llama-2-7b-mini-ibased\"\n","\n","# Load the training dataset from Hugging Face's datasets library\n","dataset = load_dataset(\"ssoh/mini-ibased-dataset\", split=\"train\")\n","\n","# Initialize the tokenizer for the base model and set up padding configurations\n","tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)\n","tokenizer.pad_token = tokenizer.unk_token  # Use unknown token as padding token\n","tokenizer.padding_side = \"right\"  # Set padding to the right side of sequences"],"metadata":{"id":"_iRQ0JEQ4hL6","colab":{"base_uri":"https://localhost:8080/","height":365,"referenced_widgets":["2a1d3a4a2547428d9747a7bee4dc7eda","2dad29c1b5e94b41bc88e90d70754dea","0dac39da8ef0424ea19a5aab6fe2cff0","00ed8c45868742118b6b2cc6a02e1027","e73112cabaed42dd9946d4d86f16eee8","b364145a48ec45c99cc3adba97fb612b","41bf12fc48134561ab04ebee7db2dec2","3e05be327d494424858f635d0296f211","75973f4dda984c5b895ee02ed87d0674","3f43e6ea88cc428cb93867922cf6cccb","d2c34c75928a4e50b6f7db95a0f37407","37acf9dddda14cd7a62379ab0c359e56","8543570210f04d679d89e44084b33270","024023fb7c2d4f0baaee26a30188c189","e240e800d1394d45b58046f0b2c9e0ca","0d0d766422634d49a920aa6e96fd0848","99ec8b2cb258486ba4daf24c10e7c2f0","32c8dbc8ba504c34a38c315e5a100126","acf26e09c7ae4c77acbb0c7adf5b6ac5","610c14a14bd84b9eb83d26ce7ecfe723","aa37bcfdff0e418aa93986db50cf9458","5bba14781afb4e87a700fafc5c00168c","d63ee6fbf2044f2eb9708d667985528c","59a7bc95ef0c4af3a3c84f1cdc6adecf","f775791c0fbb4f68adfd140fcade9c39","6afba4c004194414bc1ffb3b54bde31f","a0c64e30e23c4cb6ae69559cfbf91cbd","bf93b4a0020a42688f119f2726d21d98","cff662941eb84b38a16f2ab38a291500","2b2fe82c802841549995d8437392c762","0c37faa85f73496488fa4ed3c797ca56","4a9ca72f6cf9440081765b2bf980bbca","2e803da642db4432afe97fe13a8f3bfd","571a411bc41a4a80bfe3109a23dd9571","884f0d88224444c3866c1fa435c2a611","71fa6659371d46eea2ca8ea3dbf9cf4b","50cd1d1d81694f7aae31ed36e481cbe4","cc34b4bda6584fcc80d699ead4f54364","4bb4190fc0e345f0b585814e577b3512","ac286a893e124cf2a858bf31b81c316c","f8b4a919b74540dabf7692d1d727f1ac","c7ac8409845741d5a9559bd5093fa0af","496286bf844d4457b03ff62456f77416","10b674d0db7f4fe09dfd2c44dda1662b","7bba3fff75db45c7be7253d15be86788","ee21868c30da48f3a726d00aa94e16fb","1c24922d10f3432b946e100f0916d10f","d933df499f4b44689731ed2f406dbd06","fc792d8469244bf8beb68694ac9c2247","cb0459cce3bb443dbf52bd774fb38e7c","af52714bf7da4259af38be32e134500b","eb092f82f934401abe11e1927bdb67ec","7d9a856ef4a744c8bc067511becc4c4f","573c05051fea484b9bf80c2329722954","20aabf04bf80401b91e6b496715b4b38","563235b3e0b940e28f7b1db7d9fb0dd6","e925bed5384a46beb7868b01fc3dfaa5","db9cd0bdb4fc484c9fb86c7aace054d3","c5b3d3f16c484ccbb9841ea4273507ba","d60dd0a467b2453096780f0492ae3713","c19d9f3d0cfb4d3dabcf834d1a981183","90c303d171bf4e81b7a9d4cb0a8c01a9","1cbbae5516814f01a9937ec1c3c0a958","15219d4e86f344e59aa8ec0e06a8c00f","8265a8bcfbeb488aa5b8314eedf2d1da","2150fa0816f540a8891101df810a74d7","5dc57f5a22654629a21f60afa9ab1c10","85540848b9d249068457aabbf92f7623","76aedc51330e48f49b993c4a821fb169","1fbb7ab17c8b4ce986a527ea87f211f1","87a83ee573b94a05a7c8f8fb9cf05c42","c7395d4e03df4b48bebc60b611fe90e2","92b19560083a449194194df7e6a39594","41e3bdbb58db4b3aa7cafbd45b2fec05","883f99ae637c4d1dbd4f156adae06b9e","6536ff9fce2d4c6996869d2e4c728d09","f855a8d5fb2a4663bbfa7f62a9a11256"]},"executionInfo":{"status":"ok","timestamp":1706609518652,"user_tz":-480,"elapsed":6165,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"4a69a3ae-139d-46d4-f4e7-c43df007e681"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n","The secret `HF_TOKEN` does not exist in your Colab secrets.\n","To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n","You will be able to reuse this secret in all of your notebooks.\n","Please note that authentication is recommended but still optional to access public models or datasets.\n","  warnings.warn(\n"]},{"output_type":"display_data","data":{"text/plain":["Downloading readme:   0%|          | 0.00/308 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2a1d3a4a2547428d9747a7bee4dc7eda"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading data:   0%|          | 0.00/28.3k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"37acf9dddda14cd7a62379ab0c359e56"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Generating train split:   0%|          | 0/55 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d63ee6fbf2044f2eb9708d667985528c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"571a411bc41a4a80bfe3109a23dd9571"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"7bba3fff75db45c7be7253d15be86788"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"563235b3e0b940e28f7b1db7d9fb0dd6"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5dc57f5a22654629a21f60afa9ab1c10"}},"metadata":{}}]},{"cell_type":"code","source":["# Quantization configuration\n","bnb_config = BitsAndBytesConfig(\n","    load_in_4bit=True,\n","    bnb_4bit_quant_type=\"nf4\",\n","    bnb_4bit_compute_dtype=torch.float16,\n","    bnb_4bit_use_double_quant=True,\n",")\n","\n","# LoRA configuration\n","peft_config = LoraConfig(\n","    r=16,\n","    lora_alpha=32,\n","    lora_dropout=0.05,\n","    bias=\"none\",\n","    task_type=\"CAUSAL_LM\",\n","    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']\n",")\n","\n","# Load base moodel\n","model = AutoModelForCausalLM.from_pretrained(\n","    base_model,\n","    quantization_config=bnb_config,\n","    device_map={\"\": 0}\n",")\n","\n","# Cast the layernorm in fp32, make output embedding layer require grads, add the upcasting of the lmhead to fp32\n","model = prepare_model_for_kbit_training(model)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":241,"referenced_widgets":["a75d29a6a4a542cc9d983d135f896452","0e132de8133f484f84c3b667445fb898","4353c723283c42ed8b52599a27f87d40","b14c8d721a7b49d18c604301bce869dc","3432b14f2a3948328fff7c4cdb02c11c","336377a9609944a881f44b00baf5b120","c903216580b84471bf75d0effbd19e1b","73ca94ad90df4322b396d69b8e7c475f","93939ae3e4ee40328c7e0bc567c05d6b","4381d57318eb4458a94be8d5fd5b9804","c635bd55af4a42929edd21fcf88d2043","909e95b5b2294883b3772c0f64f01e72","97fdcd4f8ff9454ea376fb8a4b522489","b57b040489ad46edaa0c9194d1a1b345","1d4f21af339a48668dc10caf75c32d65","f937b50a13464e6b9671134184f8f9fb","9005020a11f948b580d01ff3d28b9858","d55098393fd54084ad860e18f12da71c","1d4a63596b96451fa82e3f6e2d494fc5","107c67514a7d48d2807c00ebe34bbb13","0c8644e5ba6f418aa62266e7eef0c880","d0eca9bc1cce4d279df3edd9f8590c3d","5f2c2d6a76604ef0bddb8ba297c6155c","2aa1137044b545e59082b4cbdba3b462","b5177ade69ad486a871f04ccfcd88ef0","a36c07294c654d62b8bf2fae81b4fb0f","93c7716aa0724fb4b8a199ba285888c7","a881fa4a40494bd18d9feed19379c410","dd3da60ebca04e5781d053579f36ec05","04fe40daf9cc429986e8f8ad18249d2e","067a548e3d6b4962b035334c647a7667","4c215db6782b442bbad70340fc09e5b9","92027998723e42fea07476344f9c6eff","e50129d86e644988936f5362acf6537a","20d993ee24d44ca2905c89cbd2659e12","3ad59bdf9492408fb47543df97004e8d","f8875b618f64437ab8d5a851fc4b84d8","f6e9bc87a77f43f7808ad5ac7d964945","1eaf9e1882694114813e3bb3951ba267","897f15deb12b4c98859a860b6078cfcb","c1639ecd520348f5b2e329158d6d9a61","5d53fd9a1d0449998c91549634053adb","daf26c6416c74c1cb81e09ebb7b390b0","51e459cb2c5c422da3dbb900a099ecde","fec1280d8e3b4e0f9565474f3ca20fa7","e8ede37f965c4a5e9ddd0ca34c17f2a5","b77b7c7b9d9e4a8c967f0184c5b9650a","7da3591ee67c49fd8eb1f0f31cba209f","c316d0e7c5954cab893665ce460dcc2d","3e63face290649f59d59da7a60e8f61b","02ff093a4bc645ba90c8bcbcf79e79a9","bcf2bb1d2e33459287b1fe15ce6506f2","4fc1e747664c4d76b6dbfe5315b3d5a6","cabc2e2daddf4532be42fcb690044c89","296e4cf8aeb6423698b84f06e5fbed52","03d48c4316634b919453c344d4cfc3be","b3367521dc5b4685bac4ebe336df0c84","56f9816029424a149459f79b46ca9bcc","020da94052104c449448e06e45ec527a","6b3ee4a0641a467e88f98856d664052d","112f32ca92b64506834cffe9b1aff93a","95c024bcbd6b4b4594f01848442b93c8","24e8f111b06c4d578285d96199a9f867","469d8ff05aef4e1dbb0ca90fb619dea0","bef72385a0c54d2298c3c2e7f39ffbc4","bc6c3079aeee404082974e22d8f7548d","bc3087c2127a4ab4bf03dfb472a88f74","8b7ff40bbf854e02a1b66cba3b277766","55a7fe8715834b959d197dd0b246d2ff","c5b2be258505494dbbb6c5cdddccf5e1","dc19c31b90f147939c6f12db5de4c669","1d47f136bc91430b919b6d2a9d598d6e","c26c97c1278c43bfbd9cc3a3e4bfb713","2a2478163a31423caf5b7aa052982ef0","e04179179dbb43b0a447c89104fb6e68","03435912247041429054f7584c26de9d","54be3e2d69aa4821a320413ef0c14ceb"]},"id":"WZbR1IEL4g8G","executionInfo":{"status":"ok","timestamp":1706609691881,"user_tz":-480,"elapsed":168410,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"c4cb18f8-73b6-480b-bfd5-c40981c08a9a"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a75d29a6a4a542cc9d983d135f896452"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"909e95b5b2294883b3772c0f64f01e72"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5f2c2d6a76604ef0bddb8ba297c6155c"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e50129d86e644988936f5362acf6537a"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"fec1280d8e3b4e0f9565474f3ca20fa7"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"03d48c4316634b919453c344d4cfc3be"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"bc3087c2127a4ab4bf03dfb472a88f74"}},"metadata":{}}]},{"cell_type":"code","source":["print(model)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"qWrQM0Kowyd5","executionInfo":{"status":"ok","timestamp":1706609691881,"user_tz":-480,"elapsed":6,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"f5182d46-ed26-436e-8761-6b1aed36be0f"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["LlamaForCausalLM(\n","  (model): LlamaModel(\n","    (embed_tokens): Embedding(32000, 4096)\n","    (layers): ModuleList(\n","      (0-31): 32 x LlamaDecoderLayer(\n","        (self_attn): LlamaAttention(\n","          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n","          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n","          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n","          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n","          (rotary_emb): LlamaRotaryEmbedding()\n","        )\n","        (mlp): LlamaMLP(\n","          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)\n","          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)\n","          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)\n","          (act_fn): SiLU()\n","        )\n","        (input_layernorm): LlamaRMSNorm()\n","        (post_attention_layernorm): LlamaRMSNorm()\n","      )\n","    )\n","    (norm): LlamaRMSNorm()\n","  )\n","  (lm_head): Linear(in_features=4096, out_features=32000, bias=False)\n",")\n"]}]},{"cell_type":"code","source":["# Set training arguments\n","training_arguments = TrainingArguments(\n","        output_dir=\"./results\",\n","        num_train_epochs=20,\n","        per_device_train_batch_size=10,\n","        gradient_accumulation_steps=1,\n","        evaluation_strategy=\"steps\",\n","        eval_steps=10,\n","        logging_steps=1,\n","        optim=\"paged_adamw_8bit\",\n","        learning_rate=2e-4,\n","        lr_scheduler_type=\"linear\",\n","        warmup_steps=10,\n","        report_to=\"wandb\",\n","        # max_steps=2, # Remove this line for a real fine-tuning\n",")\n","\n","# Set supervised fine-tuning parameters\n","trainer = SFTTrainer(\n","    model=model,\n","    train_dataset=dataset,\n","    eval_dataset=dataset,\n","    peft_config=peft_config,\n","    dataset_text_field=\"instruction\",\n","    max_seq_length=512,\n","    tokenizer=tokenizer,\n","    args=training_arguments,\n",")\n","\n","# Train model\n","trainer.train()\n","\n","# Save trained model\n","trainer.model.save_pretrained(new_model)"],"metadata":{"id":"OJXpOgBFuSrc","colab":{"base_uri":"https://localhost:8080/","height":761,"referenced_widgets":["c98bf1666c184598adfc255ab05195c2","4f7489532ac042e88ad09736e41fb66b","ecdbf8b345934dcd9e9ba85a06b0842d","e036315a88174e48adb13985c4ab6bdd","8b0860ced35e4df09b60fe75aab97008","91fb9c6ff7fa40aba89d7b97b4994abd","3451c95f05eb4aaebbd5dd6899a1b037","35c6a08754754ae29007ef3f780c6fb9","37f1d7146ae54ff2ad652601a72c08c1","b1587a0f4925466898458da3a144e83e","ad94094494724e6e84e45a555b14942e","f539a13df6f9497d9f3660836c63c2db","0bf2be8d558c445f9ff0fabddabccf0c","eb407e2e46a14fd787e67b2cd2200ca9","a6fe6e71435646558fe922816769daa0","679b6748bf56482e8a2d56cee31d978a","7c0a6a0eb6fe423192b2d1c838f8cb2c","9e8e56fcae1e497baaa17ec425215c92","61a36b916e9a47b2a8fe7da3d30d7efc","825d1191defb43a8ae46ff0bc9d84d09","e0639812a22145a2817dfb639f31d4d3","d74f96c06fbf41c7bd8d092a86b6b30e"]},"executionInfo":{"status":"ok","timestamp":1706610620979,"user_tz":-480,"elapsed":911459,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"a4c1df72-b5a0-43e3-902d-7f634067958b"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["Map:   0%|          | 0/55 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c98bf1666c184598adfc255ab05195c2"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Map:   0%|          | 0/55 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f539a13df6f9497d9f3660836c63c2db"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.Javascript object>"],"application/javascript":["\n","        window._wandbApiKey = new Promise((resolve, reject) => {\n","            function loadScript(url) {\n","            return new Promise(function(resolve, reject) {\n","                let newScript = document.createElement(\"script\");\n","                newScript.onerror = reject;\n","                newScript.onload = resolve;\n","                document.body.appendChild(newScript);\n","                newScript.src = url;\n","            });\n","            }\n","            loadScript(\"https://cdn.jsdelivr.net/npm/postmate/build/postmate.min.js\").then(() => {\n","            const iframe = document.createElement('iframe')\n","            iframe.style.cssText = \"width:0;height:0;border:none\"\n","            document.body.appendChild(iframe)\n","            const handshake = new Postmate({\n","                container: iframe,\n","                url: 'https://wandb.ai/authorize'\n","            });\n","            const timeout = setTimeout(() => reject(\"Couldn't auto authenticate\"), 5000)\n","            handshake.then(function(child) {\n","                child.on('authorize', data => {\n","                    clearTimeout(timeout)\n","                    resolve(data)\n","                });\n","            });\n","            })\n","        });\n","    "]},"metadata":{}},{"output_type":"stream","name":"stderr","text":["\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n","\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n","wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:"]},{"name":"stdout","output_type":"stream","text":[" ··········\n"]},{"output_type":"stream","name":"stderr","text":["\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["Tracking run with wandb version 0.16.2"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["Run data is saved locally in <code>/content/wandb/run-20240130_101553-qyr68rd4</code>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["Syncing run <strong><a href='https://wandb.ai/szehanz/huggingface/runs/qyr68rd4' target=\"_blank\">brisk-bush-5</a></strong> to <a href='https://wandb.ai/szehanz/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[" View project at <a href='https://wandb.ai/szehanz/huggingface' target=\"_blank\">https://wandb.ai/szehanz/huggingface</a>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[" View run at <a href='https://wandb.ai/szehanz/huggingface/runs/qyr68rd4' target=\"_blank\">https://wandb.ai/szehanz/huggingface/runs/qyr68rd4</a>"]},"metadata":{}},{"output_type":"stream","name":"stderr","text":["`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...\n","/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n","  warnings.warn(\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n","    <div>\n","      \n","      <progress value='120' max='120' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [120/120 14:15, Epoch 20/20]\n","    </div>\n","    <table border=\"1\" class=\"dataframe\">\n","  <thead>\n"," <tr style=\"text-align: left;\">\n","      <th>Step</th>\n","      <th>Training Loss</th>\n","      <th>Validation Loss</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <td>10</td>\n","      <td>1.505600</td>\n","      <td>1.263215</td>\n","    </tr>\n","    <tr>\n","      <td>20</td>\n","      <td>0.695100</td>\n","      <td>0.531458</td>\n","    </tr>\n","    <tr>\n","      <td>30</td>\n","      <td>0.320900</td>\n","      <td>0.222908</td>\n","    </tr>\n","    <tr>\n","      <td>40</td>\n","      <td>0.228900</td>\n","      <td>0.211581</td>\n","    </tr>\n","    <tr>\n","      <td>50</td>\n","      <td>0.162800</td>\n","      <td>0.165926</td>\n","    </tr>\n","    <tr>\n","      <td>60</td>\n","      <td>0.176000</td>\n","      <td>0.142462</td>\n","    </tr>\n","    <tr>\n","      <td>70</td>\n","      <td>0.156200</td>\n","      <td>0.139793</td>\n","    </tr>\n","    <tr>\n","      <td>80</td>\n","      <td>0.141100</td>\n","      <td>0.131930</td>\n","    </tr>\n","    <tr>\n","      <td>90</td>\n","      <td>0.125100</td>\n","      <td>0.126040</td>\n","    </tr>\n","    <tr>\n","      <td>100</td>\n","      <td>0.116700</td>\n","      <td>0.124458</td>\n","    </tr>\n","    <tr>\n","      <td>110</td>\n","      <td>0.128800</td>\n","      <td>0.122401</td>\n","    </tr>\n","    <tr>\n","      <td>120</td>\n","      <td>0.119300</td>\n","      <td>0.120288</td>\n","    </tr>\n","  </tbody>\n","</table><p>"]},"metadata":{}}]},{"cell_type":"code","source":["# Run text generation pipeline with our model\n","prompt = \"What is a large language model?\"\n","instruction = f\"### Instruction:\\n{prompt}\\n\\n### Response:\\n\"\n","pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=128)\n","result = pipe(instruction)\n","\n","# Extract and print the generated text, removing the part that includes and follows the \"### Response:\\n\" placeholder\n","generated_text = result[0]['generated_text']\n","response_start = generated_text.find(\"### Response:\\n\") + len(\"### Response:\\n\")\n","response_end = generated_text.find(\"### Instruction:\", response_start)\n","print(generated_text[response_start:response_end if response_end != -1 else None].strip())"],"metadata":{"id":"frlSLPin4IJ4","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1706610659938,"user_tz":-480,"elapsed":30068,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"54e17fee-1deb-47dd-9ef2-17f1967428f4"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["A large language model is a type of artificial intelligence model that is trained on a large dataset of text to generate language outputs that are coherent and natural-sounding. These models are designed to capture the complexity and diversity of language, and can be used for a variety of tasks such as language translation, text summarization, and language generation.\n"]}]},{"cell_type":"code","source":["# Empty VRAM\n","del model\n","del pipe\n","del trainer\n","import gc\n","gc.collect()\n","gc.collect()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"mkQCviG0Zta-","executionInfo":{"status":"ok","timestamp":1706610669238,"user_tz":-480,"elapsed":869,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"b8c107d8-1a31-4924-aba4-f7f81bc78f15"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0"]},"metadata":{},"execution_count":9}]},{"cell_type":"markdown","source":["Merging the base model with the trained adapter."],"metadata":{"id":"_g0fB7P9s0ol"}},{"cell_type":"code","source":["# Reload model in FP16 and merge it with LoRA weights\n","model = AutoModelForCausalLM.from_pretrained(\n","    base_model,\n","    low_cpu_mem_usage=True,\n","    return_dict=True,\n","    torch_dtype=torch.float16,\n","    device_map={\"\": 0},\n",")\n","model = PeftModel.from_pretrained(model, new_model)\n","model = model.merge_and_unload()\n","\n","\n","# Reload tokenizer to save it\n","tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)\n","tokenizer.pad_token = tokenizer.eos_token\n","tokenizer.padding_side = \"right\""],"metadata":{"id":"QQn30cRtAZ-P","colab":{"base_uri":"https://localhost:8080/","height":49,"referenced_widgets":["30c33b49ebc042b98f31ef08acaa98c6","d93e9e1d63f241af8478a3b183f3747d","88d750727d3745e8a3dd53eef2b69e97","10054d164dd04d519271a82d3605e714","d1e9be0735fe4b6fa468cac57945b19e","5fcd05acc87b491fad3c4c4c4b3e1d76","e8bac80e2a5f4fa79a65d21ff2bc9bfd","1c1afeec5aed4fd999e82c72425b2819","df762a20aa304cb887a29d20a861700e","8dae3300b3e04e48a62a12296a3dd620","1bd79f581c7d4aa19b93d0f017a98a59"]},"executionInfo":{"status":"ok","timestamp":1706610740493,"user_tz":-480,"elapsed":65433,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"9860fb19-61e3-4b82-c299-05e3ea9c41bf"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"30c33b49ebc042b98f31ef08acaa98c6"}},"metadata":{}}]},{"cell_type":"markdown","source":["Push the model and tokenizer to the Hugging Face Hub."],"metadata":{"id":"n4_wCHy_s--5"}},{"cell_type":"code","source":["model.push_to_hub(new_model, use_temp_dir=False)\n","tokenizer.push_to_hub(new_model, use_temp_dir=False)"],"metadata":{"id":"x-xPb-_qB0dz","executionInfo":{"status":"ok","timestamp":1706611209908,"user_tz":-480,"elapsed":469418,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"colab":{"base_uri":"https://localhost:8080/","height":246,"referenced_widgets":["f3bfe6809ea841949bb0f33269034e02","51c450db060f4951bc8249ea782392d6","6ecd5671a20648dea4bedda6113d23ff","8d7c7bda8df345dd8c4aade9966965ab","0e1dbb1e4b4a4f5c917833a57ae30a7c","dfc0a9bb09a3451fb9e4d95b71c57cb8","48ffe0f9cb824cc0bf4c14d5e0bde442","942acd91ec1a4a3cabd9dadda9b7a351","d858fa7276bf4176b0e46f9d5bc91e20","7d3efa513dea4b8f98fbd074dcbfd9f3","1422fdeb60544da58446e8a2d242e445","d9fca217b6fc4f48bd0134e411a2f909","c53048378bd7436fb2181a5e0cc4ba61","bfa91360a9f64b14922e29d0f7b9f7f8","1feb9b56463f4a97a2d8d5fdf6b5cbef","e2dec9930fb8423188556f35e943de84","e549d7168dc145f9bf934a71916e080a","c50b57696f0047aa915fed04afec8e9a","f7b225cef3864c1fabdc4445faf3b466","e76ad4f8dbf14d59a5f3aadfab84b9dc","cd23dc7eeae34aa3afbea9c976fc64f0","e3ba393293b7488ea626a6b650420a92","6326507477cb4933999f525889aad2ec","d42b2763130f415f98b98713144ff757","18b653d5bbc24a69a3923cfd26995e5a","413a9764e03a436084f6d5a957c1dc52","572d68d016674e508d4258ba047f0529","dff83433f49445988e0fb0028bbdc8c4","55b77507e6564bcfa2d7dd0ef125faf8","1cc69198c30b427aacc5c5ad42a5d1ac","648542e8a44245839e32fb997f9a1941","01c0515f6f0f454183b69e6e8da0a9db","91f465f87b6c4871be25ce5fb189ebba","7c55b758c41b419ead52bc8f7cbbbc57","fe0da59a6c074478a080c547c4f93b32","6a8706e3dd234a3f999e4984de8363ed","57736888a34346f780d376cfcb46ce75","c5af9dc16a324db6a98cc53c82e9e161","64f44d7e27874c9e963fd7a2758434af","f1bd8ba112be42ee81a3d6ad96a0f382","42bd12f0797c45b281861e38d5b708b4","665b236679b84ee6a66d6b23dd807073","8c73ea8a3fcd4a938f25bc1354db7818","316771e899204e839390d5b0727b0108","806bff9547164ae08291778f983b9790","6811f1e76e974aa58034852c20fdd613","e15f1f22121e4d11b4bf1681c3d7e84c","6fa7416ef4224e4e8ec474fd6fb45cf4","8544d67df06a45e18f05fccedb693544","44963c9e7a5f46f283e84e432bfa941c","7b27ccff92fa48ef88001c6a2c7c52bc","09381f4ffe0447368ab20185c87edd21","93b84c8ebd5c48159545af85314789f7","23d0643de9b94c9cb4b7bba21611008a","3d04c36ebc184f5a8c3244b5a543eaaf"]},"outputId":"e2e4498f-4632-4c7a-aba6-68bf7f9ad508"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f3bfe6809ea841949bb0f33269034e02"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d9fca217b6fc4f48bd0134e411a2f909"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6326507477cb4933999f525889aad2ec"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"7c55b758c41b419ead52bc8f7cbbbc57"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"806bff9547164ae08291778f983b9790"}},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["CommitInfo(commit_url='https://huggingface.co/ssoh/llama-2-7b-mini-ibased/commit/08f1929770c0aeccf8c17e768bed6840c998797c', commit_message='Upload tokenizer', commit_description='', oid='08f1929770c0aeccf8c17e768bed6840c998797c', pr_url=None, pr_revision=None, pr_num=None)"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":11}]},{"cell_type":"markdown","source":["# Quantize Llama 2 models using GGUF and llama.cpp\n","\n","\n","## Usage\n","\n","* `MODEL_ID`: The ID of the model to quantize (e.g., `ssoh/llama-2-7b-mini-ibased`).\n","* `QUANTIZATION_METHOD`: The quantization method to use.\n","\n","## Quantization methods\n","\n","The names of the quantization methods follow the naming convention: \"q\" + the number of bits + the variant used.\n","\n","We will be using **Q5_K_M** as it preserves most of the model's performance."],"metadata":{"id":"8y_Rk94LzG7I"}},{"cell_type":"code","source":["# Authenticate with Hugging Face Hub to securely access models, datasets, and other resources.\n","from huggingface_hub import notebook_login\n","notebook_login()"],"metadata":{"id":"zbCYFOmU7ANP","colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["342b32e908ab4c9e8e636a64ec9202bf","c23cf3a7f3a040ff8a3b9e9b7ad348f0","c98949824321478d99a1475a89e325a7","76395d8bd36c45fea0aa3900c794894c","a2e02b2ceacb4d64a14be13b26a22cf9","f68106a0b50440a2a10263b232d33f07","087e5ddf62b04a6994a056d3e26c780c","7e7e3eb22e6b4b358db0fea63ca374e5","5b6c2db34c7e4400b72aa1499d69ee32","4cd53420ce864839a99efab27ec28ee8","9a38d22e47704a1b8e85742820d4bf64","4318e356c7b54d939a862cd0d1290f09","367526f84d7f4e69af6e730d86e6136a","a76cf64bc1fd460a998a2a7a07617cf1","4f37f7e9c0b545d6875901930e921299","50de8cfc6c084e028068fd217c15b171","3048c08df98340898f64ad9b00ebd4dd","63ee3af405ee4696be9348e33bf0f577","54a0ae84599f4dedab0fa80f395f96ec","45c774bdb3e7456587f0aa7d1e9b8ed7","e2f2dfc61e2e421ab1aab56048d3f330","3f5fc2fd155c4cc89928b61c3896d522","fcd98b79ddfd4a32992cc005a26d07b8","cd6663dd27864040a1ebe16e2e2ba0f1","44e943245f81431d8388d5f79df6cdbb","28ef998c70a046f4b3fbf47e062f9210","eb205b40f40c43aea347a94c8a5b24e8","c1995c4094054a0eac15cc3201878ad3","d57d8b600803484d8f9ea340a5eaa7b0","da4f2bd963864b6085625935aa77857a","d1a1da3f19ff4e25b249ba50b3ee4535","2b7e57a54a55463cb3ac91859262383e"]},"executionInfo":{"status":"ok","timestamp":1706680450212,"user_tz":-480,"elapsed":557,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"384a0237-cf8f-46d5-f1e2-8098ccc0e386"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"342b32e908ab4c9e8e636a64ec9202bf"}},"metadata":{}}]},{"cell_type":"code","source":["# Install the huggingface_hub and transformers libraries quietly without verbose output.\n","!pip install -q huggingface_hub transformers"],"metadata":{"id":"gAisTLIhq7sC"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Importing necessary libraries for operating system interactions, HTTP requests, JSON handling,\n","# file operations, and interacting with the Hugging Face Hub for tasks like creating repositories.\n","import os\n","import requests\n","import json\n","import shutil\n","from huggingface_hub import create_repo, HfApi\n","\n","# Import the AutoTokenizer class from the transformers library\n","from transformers import AutoTokenizer"],"metadata":{"id":"GG8dGBfnnkZT"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Set up environment and download model for llama.cpp inference.\n","# 1. Define model ID and quantization methods.\n","# 2. Parse model name from MODEL_ID.\n","# 3. Install and build the llama.cpp library with GPU support.\n","# 4. Install Python dependencies from llama.cpp's requirements.\n","# 5. Initialize Git Large File Storage (LFS) for handling large files.\n","# 6. Clone the specified model repository from Hugging Face.\n","\n","\n","MODEL_ID = \"ssoh/llama-2-7b-mini-ibased\"\n","QUANTIZATION_METHODS = [\"q5_k_m\"]\n","MODEL_NAME = MODEL_ID.split('/')[-1]\n","\n","\n","# Install and prepare llama.cpp\n","!git clone https://github.com/ggerganov/llama.cpp\n","!cd llama.cpp && git pull && make clean && LLAMA_CUBLAS=1 make\n","!pip install -r llama.cpp/requirements.txt\n","\n","\n","# Initialize Git LFS for large models\n","!git lfs install\n","\n","\n","# Download the model from Hugging Face\n","!git clone https://huggingface.co/{MODEL_ID}"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"V7KUlnZ4fnoV","executionInfo":{"status":"ok","timestamp":1706681119775,"user_tz":-480,"elapsed":637478,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"fc85c7a0-f959-4afb-a8e9-f7f3df97a80d"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Cloning into 'llama.cpp'...\n","remote: Enumerating objects: 17351, done.\u001b[K\n","remote: Counting objects: 100% (4928/4928), done.\u001b[K\n","remote: Compressing objects: 100% (197/197), done.\u001b[K\n","remote: Total 17351 (delta 4829), reused 4755 (delta 4731), pack-reused 12423\u001b[K\n","Receiving objects: 100% (17351/17351), 20.40 MiB | 11.38 MiB/s, done.\n","Resolving deltas: 100% (12094/12094), done.\n","Already up to date.\n","I llama.cpp build info: \n","I UNAME_S:   Linux\n","I UNAME_P:   x86_64\n","I UNAME_M:   x86_64\n","I CFLAGS:    -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion \n","I CXXFLAGS:  -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi\n","I NVCCFLAGS:  \n","I LDFLAGS:    \n","I CC:        cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\n","I CXX:       g++ (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\n","\n","rm -vrf *.o tests/*.o *.so *.dll benchmark-matmult common/build-info.cpp *.dot *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey tests/test-c.o tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease\n","I llama.cpp build info: \n","I UNAME_S:   Linux\n","I UNAME_P:   x86_64\n","I UNAME_M:   x86_64\n","I CFLAGS:    -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion \n","I CXXFLAGS:  -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi\n","I NVCCFLAGS: -use_fast_math --forward-unknown-to-host-compiler -arch=native -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \n","I LDFLAGS:   -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","I CC:        cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\n","I CXX:       g++ (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\n","\n","cc  -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion    -c ggml.c -o ggml.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c llama.cpp -o llama.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/common.cpp -o common.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/sampling.cpp -o sampling.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/grammar-parser.cpp -o grammar-parser.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/build-info.cpp -o build-info.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/console.cpp -o console.o\n","nvcc -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -use_fast_math --forward-unknown-to-host-compiler -arch=native -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128  -Wno-pedantic -Xcompiler \"-Wno-array-bounds -Wno-format-truncation -Wextra-semi\" -c ggml-cuda.cu -o ggml-cuda.o\n","cc  -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion    -c ggml-alloc.c -o ggml-alloc.o\n","cc  -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion    -c ggml-backend.c -o ggml-backend.o\n","cc -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion     -c ggml-quants.c -o ggml-quants.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/main/main.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o console.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o main -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","\n","====  Run ./main -h for help.  ====\n","\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/quantize/quantize.cpp build-info.o ggml.o llama.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o quantize -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o quantize-stats -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/perplexity/perplexity.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o perplexity -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/imatrix/imatrix.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o imatrix -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/embedding/embedding.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o embedding -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi pocs/vdot/vdot.cpp ggml.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o vdot -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi pocs/vdot/q8dot.cpp ggml.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o q8dot -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -c common/train.cpp -o train.o\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o train.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o train-text-from-scratch -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o convert-llama2c-to-ggml -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/simple/simple.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o simple -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/batched/batched.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o batched -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o common.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o batched-bench -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/save-load-state/save-load-state.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o save-load-state -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Iexamples/server examples/server/server.cpp examples/llava/clip.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o server -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib   -Wno-cast-qual\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/gguf/gguf.cpp ggml.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o gguf -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/llama-bench/llama-bench.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o llama-bench -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi -static -fPIC -c examples/llava/llava.cpp -o libllava.a -Wno-cast-qual\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/llava/llava-cli.cpp examples/llava/clip.cpp examples/llava/llava.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o llava-cli -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib  -Wno-cast-qual\n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/baby-llama/baby-llama.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o train.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o baby-llama -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/beam-search/beam-search.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o beam-search -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/speculative/speculative.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o speculative -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/infill/infill.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o console.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o infill -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/tokenize/tokenize.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o tokenize -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o benchmark-matmult -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/parallel/parallel.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o parallel -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/finetune/finetune.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o train.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o finetune -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/export-lora/export-lora.cpp ggml.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o export-lora -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/lookahead/lookahead.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o lookahead -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/lookup/lookup.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o lookup -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","g++ -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread  -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -Wextra-semi examples/passkey/passkey.cpp ggml.o llama.o common.o sampling.o grammar-parser.o build-info.o ggml-cuda.o ggml-alloc.o ggml-backend.o ggml-quants.o -o passkey -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib \n","cc -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include  -std=c11   -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion  -c tests/test-c.c -o tests/test-c.o\n","Collecting numpy~=1.24.4 (from -r llama.cpp/./requirements/requirements-convert.txt (line 1))\n","  Downloading numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.3/17.3 MB\u001b[0m \u001b[31m77.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: sentencepiece~=0.1.98 in /usr/local/lib/python3.10/dist-packages (from -r llama.cpp/./requirements/requirements-convert.txt (line 2)) (0.1.99)\n","Requirement already satisfied: transformers<5.0.0,>=4.35.2 in /usr/local/lib/python3.10/dist-packages (from -r llama.cpp/./requirements/requirements-convert.txt (line 3)) (4.35.2)\n","Collecting gguf>=0.1.0 (from -r llama.cpp/./requirements/requirements-convert.txt (line 4))\n","  Downloading gguf-0.6.0-py3-none-any.whl (23 kB)\n","Collecting protobuf<5.0.0,>=4.21.0 (from -r llama.cpp/./requirements/requirements-convert.txt (line 5))\n","  Downloading protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl (294 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.6/294.6 kB\u001b[0m \u001b[31m31.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting torch~=2.1.1 (from -r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading torch-2.1.2-cp310-cp310-manylinux1_x86_64.whl (670.2 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m670.2/670.2 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (3.13.1)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (0.20.3)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (23.2)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (2023.6.3)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (2.31.0)\n","Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (0.15.1)\n","Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (0.4.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (4.66.1)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (4.5.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (3.2.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (3.1.3)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (2023.6.0)\n","Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━��━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m58.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m70.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m95.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cublas-cu12==12.1.3.1 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-nccl-cu12==2.18.1 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl (209.8 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.8/209.8 MB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (2.1.0)\n","Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2))\n","  Downloading nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl (20.5 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.5/20.5 MB\u001b[0m \u001b[31m75.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (2.1.4)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (3.6)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers<5.0.0,>=4.35.2->-r llama.cpp/./requirements/requirements-convert.txt (line 3)) (2023.11.17)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch~=2.1.1->-r llama.cpp/./requirements/requirements-convert-hf-to-gguf.txt (line 2)) (1.3.0)\n","Installing collected packages: protobuf, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numpy, nvidia-cusparse-cu12, nvidia-cudnn-cu12, gguf, nvidia-cusolver-cu12, torch\n","  Attempting uninstall: protobuf\n","    Found existing installation: protobuf 3.20.3\n","    Uninstalling protobuf-3.20.3:\n","      Successfully uninstalled protobuf-3.20.3\n","  Attempting uninstall: numpy\n","    Found existing installation: numpy 1.23.5\n","    Uninstalling numpy-1.23.5:\n","      Successfully uninstalled numpy-1.23.5\n","  Attempting uninstall: torch\n","    Found existing installation: torch 2.1.0+cu121\n","    Uninstalling torch-2.1.0+cu121:\n","      Successfully uninstalled torch-2.1.0+cu121\n","\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n","lida 0.0.10 requires fastapi, which is not installed.\n","lida 0.0.10 requires kaleido, which is not installed.\n","lida 0.0.10 requires python-multipart, which is not installed.\n","lida 0.0.10 requires uvicorn, which is not installed.\n","tensorboard 2.15.1 requires protobuf<4.24,>=3.19.6, but you have protobuf 4.25.2 which is incompatible.\n","tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 4.25.2 which is incompatible.\n","torchaudio 2.1.0+cu121 requires torch==2.1.0, but you have torch 2.1.2 which is incompatible.\n","torchdata 0.7.0 requires torch==2.1.0, but you have torch 2.1.2 which is incompatible.\n","torchtext 0.16.0 requires torch==2.1.0, but you have torch 2.1.2 which is incompatible.\n","torchvision 0.16.0+cu121 requires torch==2.1.0, but you have torch 2.1.2 which is incompatible.\u001b[0m\u001b[31m\n","\u001b[0mSuccessfully installed gguf-0.6.0 numpy-1.24.4 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.18.1 nvidia-nvjitlink-cu12-12.3.101 nvidia-nvtx-cu12-12.1.105 protobuf-4.25.2 torch-2.1.2\n"]},{"output_type":"display_data","data":{"application/vnd.colab-display-data+json":{"pip_warning":{"packages":["numpy","torch","torchgen"]}}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Git LFS initialized.\n","Cloning into 'llama-2-7b-mini-ibased'...\n","remote: Enumerating objects: 17, done.\u001b[K\n","remote: Counting objects: 100% (14/14), done.\u001b[K\n","remote: Compressing objects: 100% (14/14), done.\u001b[K\n","remote: Total 17 (delta 1), reused 0 (delta 0), pack-reused 3\u001b[K\n","Unpacking objects: 100% (17/17), 483.83 KiB | 917.00 KiB/s, done.\n","Filtering content: 100% (3/3), 4.55 GiB | 16.16 MiB/s, done.\n","Encountered 2 file(s) that may not have been copied correctly on Windows:\n","\tmodel-00002-of-00003.safetensors\n","\tmodel-00001-of-00003.safetensors\n","\n","See: `git lfs help smudge` for more details.\n"]}]},{"cell_type":"code","source":["# Specify the model ID from which to load the tokenizer\n","model_id = \"meta-llama/Llama-2-7b-chat-hf\"\n","\n","# Load the tokenizer associated with the specified model ID\n","tokenizer = AutoTokenizer.from_pretrained(model_id)\n","\n","# Create a temporary directory to store all downloaded tokenizer files\n","temp_save_directory = \"temp_tokenizer_files\"\n","tokenizer.save_pretrained(temp_save_directory)\n","\n","# Specify the directory where the tokenizer.model file will be saved permanently\n","MODEL_NAME = \"llama-2-7b-mini-ibased\"\n","save_directory = MODEL_NAME\n","\n","# Create the save directory if it does not exist\n","os.makedirs(save_directory, exist_ok=True)\n","\n","# Define the specific filename of the tokenizer we want to retain\n","tokenizer_filename = \"tokenizer.model\"\n","\n","# Check for the existence of tokenizer.model in the temporary directory\n","source_file = os.path.join(temp_save_directory, tokenizer_filename)\n","destination_file = os.path.join(save_directory, tokenizer_filename)\n","\n","# Copy the tokenizer.model file to the final directory, if it exists\n","if os.path.exists(source_file):\n","    shutil.copy(source_file, destination_file)\n","    print(f\"tokenizer.model has been saved in {save_directory}\")\n","else:\n","    print(\"No tokenizer.model file found in the downloaded tokenizer files.\")\n","\n","# Remove the temporary directory to clean up unnecessary files\n","shutil.rmtree(temp_save_directory)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":267,"referenced_widgets":["24ee6759da5043708205e556bdf6f314","bb111e8c28424cf8af29626e16403302","adba060d155545988b3e9d750efc41ae","fb11002cec6f4a0397fa12538b742def","50afe53721134c5ea8178f80c8f3003d","9e5805663be64aaf9d394bb0c82b3a0e","132912942b5b4ccf874c76921db3a02d","7c0bdd46072543e38c111b5c5a6f83e3","c79a83eda8334967853804291d7afa6e","51f055cce11d4c3784c5a74864ed95e4","f45b4d980d484237901a28c1a0c9b8df","a29b152cee3048dda48af4bcc1410d15","b365c2eca7a04bda9a8d1586a6bbc837","5bb91565119b4442b25b14b1aeb14be6","38fd793491804042bbb0d6eb96c1d756","0d473c65afba4fbd99eb8e0c94adec15","563441f0718644cebc5edf55229bb803","7533d642918b43a39603f8ba41b7ba66","b680eae84fb54ec7913c71b550aded62","c373b77d4c5d4a0fa7ae5865eae6e511","a4cf31ba8d7a452fa728ceafc145c9f4","dcf97967979d44458d1ef5cb30639177","959c6762670c41a59fb73c4ab55938d8","17aecb0efd574dc89313febabb401146","104dbf1c8f3149da86a14cbee8ff38f7","689b144f352045b298dfa7c9419ab7ba","9175f39989b349a8b1892d10dc696f2b","85f1a5a7f0cd4e03a2ee8ddf58fa7506","3feb087dd99f4cb8a2db169b2c280b00","11ab9624c0874402a53125169e216ec1","f1f85f58f0a44f1dad0cf0be10da10bb","15e9f7ab49e346c79cd750ecd6cc1aba","32b9c4e984104288b897929a3c6cb9fd","3698a1ad68984959a0163d36e17143f8","b23cf6d571da46c5aedcdfae072e2dc1","c885bec1f7dc400280f0c6e3934674aa","0b5d031519f8457692a44f521df62328","50fc781a22f4455cb30c4a237e3092cf","2299d9e60cfa437c8162e828cc2b2b22","e9652220800240669785ff51d8bb8ade","3ffe0f72384d48649578cadb3c939c34","20ff6eb9f738438984fba5e3595f7e54","fa5b85003b394f8c94f4fa864533470a","71a2e47a6ba946b38a79a2e0f099d14c"]},"id":"UkMoXHX2DOrO","executionInfo":{"status":"ok","timestamp":1706681124592,"user_tz":-480,"elapsed":4823,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"14bcde50-4357-4f91-c175-b913a7a0f818"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n","The secret `HF_TOKEN` does not exist in your Colab secrets.\n","To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n","You will be able to reuse this secret in all of your notebooks.\n","Please note that authentication is recommended but still optional to access public models or datasets.\n","  warnings.warn(\n"]},{"output_type":"display_data","data":{"text/plain":["tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"24ee6759da5043708205e556bdf6f314"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"a29b152cee3048dda48af4bcc1410d15"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"959c6762670c41a59fb73c4ab55938d8"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3698a1ad68984959a0163d36e17143f8"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["tokenizer.model has been saved in llama-2-7b-mini-ibased\n"]}]},{"cell_type":"code","source":["# Convert to fp16\n","fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin\"\n","!python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}"],"metadata":{"id":"fD24jJxq7t3k","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1706681281632,"user_tz":-480,"elapsed":157048,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"9f6dea9e-5c3c-49c5-a831-9af204dd44ad"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Loading model file llama-2-7b-mini-ibased/model-00001-of-00003.safetensors\n","Loading model file llama-2-7b-mini-ibased/model-00001-of-00003.safetensors\n","Loading model file llama-2-7b-mini-ibased/model-00002-of-00003.safetensors\n","Loading model file llama-2-7b-mini-ibased/model-00003-of-00003.safetensors\n","params = Params(n_vocab=32000, n_embd=4096, n_layer=32, n_ctx=4096, n_ff=11008, n_head=32, n_head_kv=32, n_experts=None, n_experts_used=None, f_norm_eps=1e-05, rope_scaling_type=None, f_rope_freq_base=10000.0, f_rope_scale=None, n_orig_ctx=None, rope_finetuned=None, ftype=<GGMLFileType.MostlyF16: 1>, path_model=PosixPath('llama-2-7b-mini-ibased'))\n","Found vocab files: {'tokenizer.model': PosixPath('llama-2-7b-mini-ibased/tokenizer.model'), 'vocab.json': None, 'tokenizer.json': PosixPath('llama-2-7b-mini-ibased/tokenizer.json')}\n","Loading vocab file 'llama-2-7b-mini-ibased/tokenizer.model', type 'spm'\n","Vocab info: <SentencePieceVocab with 32000 base tokens and 0 added tokens>\n","Special vocab info: <SpecialVocab with 0 merges, special tokens {'bos': 1, 'eos': 2, 'unk': 0, 'pad': 2}, add special tokens {'bos': True, 'eos': False}>\n","Permuting layer 0\n","Permuting layer 1\n","Permuting layer 2\n","Permuting layer 3\n","Permuting layer 4\n","Permuting layer 5\n","Permuting layer 6\n","Permuting layer 7\n","Permuting layer 8\n","Permuting layer 9\n","Permuting layer 10\n","Permuting layer 11\n","Permuting layer 12\n","Permuting layer 13\n","Permuting layer 14\n","Permuting layer 15\n","Permuting layer 16\n","Permuting layer 17\n","Permuting layer 18\n","Permuting layer 19\n","Permuting layer 20\n","Permuting layer 21\n","Permuting layer 22\n","Permuting layer 23\n","Permuting layer 24\n","Permuting layer 25\n","Permuting layer 26\n","Permuting layer 27\n","Permuting layer 28\n","Permuting layer 29\n","Permuting layer 30\n","Permuting layer 31\n","model.embed_tokens.weight                        -> token_embd.weight                        | F16    | [32000, 4096]\n","model.layers.0.input_layernorm.weight            -> blk.0.attn_norm.weight                   | F16    | [4096]\n","model.layers.0.mlp.down_proj.weight              -> blk.0.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.0.mlp.gate_proj.weight              -> blk.0.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.0.mlp.up_proj.weight                -> blk.0.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.0.post_attention_layernorm.weight   -> blk.0.ffn_norm.weight                    | F16    | [4096]\n","model.layers.0.self_attn.k_proj.weight           -> blk.0.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.0.self_attn.o_proj.weight           -> blk.0.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.0.self_attn.q_proj.weight           -> blk.0.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.0.self_attn.v_proj.weight           -> blk.0.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.1.input_layernorm.weight            -> blk.1.attn_norm.weight                   | F16    | [4096]\n","model.layers.1.mlp.down_proj.weight              -> blk.1.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.1.mlp.gate_proj.weight              -> blk.1.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.1.mlp.up_proj.weight                -> blk.1.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.1.post_attention_layernorm.weight   -> blk.1.ffn_norm.weight                    | F16    | [4096]\n","model.layers.1.self_attn.k_proj.weight           -> blk.1.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.1.self_attn.o_proj.weight           -> blk.1.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.1.self_attn.q_proj.weight           -> blk.1.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.1.self_attn.v_proj.weight           -> blk.1.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.10.input_layernorm.weight           -> blk.10.attn_norm.weight                  | F16    | [4096]\n","model.layers.10.mlp.down_proj.weight             -> blk.10.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.10.mlp.gate_proj.weight             -> blk.10.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.10.mlp.up_proj.weight               -> blk.10.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.10.post_attention_layernorm.weight  -> blk.10.ffn_norm.weight                   | F16    | [4096]\n","model.layers.10.self_attn.k_proj.weight          -> blk.10.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.10.self_attn.o_proj.weight          -> blk.10.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.10.self_attn.q_proj.weight          -> blk.10.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.10.self_attn.v_proj.weight          -> blk.10.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.11.mlp.gate_proj.weight             -> blk.11.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.11.self_attn.k_proj.weight          -> blk.11.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.11.self_attn.o_proj.weight          -> blk.11.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.11.self_attn.q_proj.weight          -> blk.11.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.11.self_attn.v_proj.weight          -> blk.11.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.2.input_layernorm.weight            -> blk.2.attn_norm.weight                   | F16    | [4096]\n","model.layers.2.mlp.down_proj.weight              -> blk.2.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.2.mlp.gate_proj.weight              -> blk.2.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.2.mlp.up_proj.weight                -> blk.2.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.2.post_attention_layernorm.weight   -> blk.2.ffn_norm.weight                    | F16    | [4096]\n","model.layers.2.self_attn.k_proj.weight           -> blk.2.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.2.self_attn.o_proj.weight           -> blk.2.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.2.self_attn.q_proj.weight           -> blk.2.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.2.self_attn.v_proj.weight           -> blk.2.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.3.input_layernorm.weight            -> blk.3.attn_norm.weight                   | F16    | [4096]\n","model.layers.3.mlp.down_proj.weight              -> blk.3.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.3.mlp.gate_proj.weight              -> blk.3.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.3.mlp.up_proj.weight                -> blk.3.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.3.post_attention_layernorm.weight   -> blk.3.ffn_norm.weight                    | F16    | [4096]\n","model.layers.3.self_attn.k_proj.weight           -> blk.3.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.3.self_attn.o_proj.weight           -> blk.3.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.3.self_attn.q_proj.weight           -> blk.3.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.3.self_attn.v_proj.weight           -> blk.3.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.4.input_layernorm.weight            -> blk.4.attn_norm.weight                   | F16    | [4096]\n","model.layers.4.mlp.down_proj.weight              -> blk.4.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.4.mlp.gate_proj.weight              -> blk.4.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.4.mlp.up_proj.weight                -> blk.4.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.4.post_attention_layernorm.weight   -> blk.4.ffn_norm.weight                    | F16    | [4096]\n","model.layers.4.self_attn.k_proj.weight           -> blk.4.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.4.self_attn.o_proj.weight           -> blk.4.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.4.self_attn.q_proj.weight           -> blk.4.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.4.self_attn.v_proj.weight           -> blk.4.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.5.input_layernorm.weight            -> blk.5.attn_norm.weight                   | F16    | [4096]\n","model.layers.5.mlp.down_proj.weight              -> blk.5.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.5.mlp.gate_proj.weight              -> blk.5.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.5.mlp.up_proj.weight                -> blk.5.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.5.post_attention_layernorm.weight   -> blk.5.ffn_norm.weight                    | F16    | [4096]\n","model.layers.5.self_attn.k_proj.weight           -> blk.5.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.5.self_attn.o_proj.weight           -> blk.5.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.5.self_attn.q_proj.weight           -> blk.5.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.5.self_attn.v_proj.weight           -> blk.5.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.6.input_layernorm.weight            -> blk.6.attn_norm.weight                   | F16    | [4096]\n","model.layers.6.mlp.down_proj.weight              -> blk.6.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.6.mlp.gate_proj.weight              -> blk.6.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.6.mlp.up_proj.weight                -> blk.6.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.6.post_attention_layernorm.weight   -> blk.6.ffn_norm.weight                    | F16    | [4096]\n","model.layers.6.self_attn.k_proj.weight           -> blk.6.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.6.self_attn.o_proj.weight           -> blk.6.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.6.self_attn.q_proj.weight           -> blk.6.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.6.self_attn.v_proj.weight           -> blk.6.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.7.input_layernorm.weight            -> blk.7.attn_norm.weight                   | F16    | [4096]\n","model.layers.7.mlp.down_proj.weight              -> blk.7.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.7.mlp.gate_proj.weight              -> blk.7.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.7.mlp.up_proj.weight                -> blk.7.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.7.post_attention_layernorm.weight   -> blk.7.ffn_norm.weight                    | F16    | [4096]\n","model.layers.7.self_attn.k_proj.weight           -> blk.7.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.7.self_attn.o_proj.weight           -> blk.7.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.7.self_attn.q_proj.weight           -> blk.7.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.7.self_attn.v_proj.weight           -> blk.7.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.8.input_layernorm.weight            -> blk.8.attn_norm.weight                   | F16    | [4096]\n","model.layers.8.mlp.down_proj.weight              -> blk.8.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.8.mlp.gate_proj.weight              -> blk.8.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.8.mlp.up_proj.weight                -> blk.8.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.8.post_attention_layernorm.weight   -> blk.8.ffn_norm.weight                    | F16    | [4096]\n","model.layers.8.self_attn.k_proj.weight           -> blk.8.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.8.self_attn.o_proj.weight           -> blk.8.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.8.self_attn.q_proj.weight           -> blk.8.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.8.self_attn.v_proj.weight           -> blk.8.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.9.input_layernorm.weight            -> blk.9.attn_norm.weight                   | F16    | [4096]\n","model.layers.9.mlp.down_proj.weight              -> blk.9.ffn_down.weight                    | F16    | [4096, 11008]\n","model.layers.9.mlp.gate_proj.weight              -> blk.9.ffn_gate.weight                    | F16    | [11008, 4096]\n","model.layers.9.mlp.up_proj.weight                -> blk.9.ffn_up.weight                      | F16    | [11008, 4096]\n","model.layers.9.post_attention_layernorm.weight   -> blk.9.ffn_norm.weight                    | F16    | [4096]\n","model.layers.9.self_attn.k_proj.weight           -> blk.9.attn_k.weight                      | F16    | [4096, 4096]\n","model.layers.9.self_attn.o_proj.weight           -> blk.9.attn_output.weight                 | F16    | [4096, 4096]\n","model.layers.9.self_attn.q_proj.weight           -> blk.9.attn_q.weight                      | F16    | [4096, 4096]\n","model.layers.9.self_attn.v_proj.weight           -> blk.9.attn_v.weight                      | F16    | [4096, 4096]\n","model.layers.11.input_layernorm.weight           -> blk.11.attn_norm.weight                  | F16    | [4096]\n","model.layers.11.mlp.down_proj.weight             -> blk.11.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.11.mlp.up_proj.weight               -> blk.11.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.11.post_attention_layernorm.weight  -> blk.11.ffn_norm.weight                   | F16    | [4096]\n","model.layers.12.input_layernorm.weight           -> blk.12.attn_norm.weight                  | F16    | [4096]\n","model.layers.12.mlp.down_proj.weight             -> blk.12.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.12.mlp.gate_proj.weight             -> blk.12.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.12.mlp.up_proj.weight               -> blk.12.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.12.post_attention_layernorm.weight  -> blk.12.ffn_norm.weight                   | F16    | [4096]\n","model.layers.12.self_attn.k_proj.weight          -> blk.12.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.12.self_attn.o_proj.weight          -> blk.12.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.12.self_attn.q_proj.weight          -> blk.12.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.12.self_attn.v_proj.weight          -> blk.12.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.13.input_layernorm.weight           -> blk.13.attn_norm.weight                  | F16    | [4096]\n","model.layers.13.mlp.down_proj.weight             -> blk.13.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.13.mlp.gate_proj.weight             -> blk.13.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.13.mlp.up_proj.weight               -> blk.13.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.13.post_attention_layernorm.weight  -> blk.13.ffn_norm.weight                   | F16    | [4096]\n","model.layers.13.self_attn.k_proj.weight          -> blk.13.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.13.self_attn.o_proj.weight          -> blk.13.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.13.self_attn.q_proj.weight          -> blk.13.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.13.self_attn.v_proj.weight          -> blk.13.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.14.input_layernorm.weight           -> blk.14.attn_norm.weight                  | F16    | [4096]\n","model.layers.14.mlp.down_proj.weight             -> blk.14.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.14.mlp.gate_proj.weight             -> blk.14.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.14.mlp.up_proj.weight               -> blk.14.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.14.post_attention_layernorm.weight  -> blk.14.ffn_norm.weight                   | F16    | [4096]\n","model.layers.14.self_attn.k_proj.weight          -> blk.14.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.14.self_attn.o_proj.weight          -> blk.14.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.14.self_attn.q_proj.weight          -> blk.14.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.14.self_attn.v_proj.weight          -> blk.14.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.15.input_layernorm.weight           -> blk.15.attn_norm.weight                  | F16    | [4096]\n","model.layers.15.mlp.down_proj.weight             -> blk.15.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.15.mlp.gate_proj.weight             -> blk.15.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.15.mlp.up_proj.weight               -> blk.15.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.15.post_attention_layernorm.weight  -> blk.15.ffn_norm.weight                   | F16    | [4096]\n","model.layers.15.self_attn.k_proj.weight          -> blk.15.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.15.self_attn.o_proj.weight          -> blk.15.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.15.self_attn.q_proj.weight          -> blk.15.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.15.self_attn.v_proj.weight          -> blk.15.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.16.input_layernorm.weight           -> blk.16.attn_norm.weight                  | F16    | [4096]\n","model.layers.16.mlp.down_proj.weight             -> blk.16.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.16.mlp.gate_proj.weight             -> blk.16.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.16.mlp.up_proj.weight               -> blk.16.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.16.post_attention_layernorm.weight  -> blk.16.ffn_norm.weight                   | F16    | [4096]\n","model.layers.16.self_attn.k_proj.weight          -> blk.16.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.16.self_attn.o_proj.weight          -> blk.16.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.16.self_attn.q_proj.weight          -> blk.16.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.16.self_attn.v_proj.weight          -> blk.16.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.17.input_layernorm.weight           -> blk.17.attn_norm.weight                  | F16    | [4096]\n","model.layers.17.mlp.down_proj.weight             -> blk.17.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.17.mlp.gate_proj.weight             -> blk.17.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.17.mlp.up_proj.weight               -> blk.17.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.17.post_attention_layernorm.weight  -> blk.17.ffn_norm.weight                   | F16    | [4096]\n","model.layers.17.self_attn.k_proj.weight          -> blk.17.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.17.self_attn.o_proj.weight          -> blk.17.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.17.self_attn.q_proj.weight          -> blk.17.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.17.self_attn.v_proj.weight          -> blk.17.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.18.input_layernorm.weight           -> blk.18.attn_norm.weight                  | F16    | [4096]\n","model.layers.18.mlp.down_proj.weight             -> blk.18.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.18.mlp.gate_proj.weight             -> blk.18.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.18.mlp.up_proj.weight               -> blk.18.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.18.post_attention_layernorm.weight  -> blk.18.ffn_norm.weight                   | F16    | [4096]\n","model.layers.18.self_attn.k_proj.weight          -> blk.18.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.18.self_attn.o_proj.weight          -> blk.18.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.18.self_attn.q_proj.weight          -> blk.18.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.18.self_attn.v_proj.weight          -> blk.18.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.19.input_layernorm.weight           -> blk.19.attn_norm.weight                  | F16    | [4096]\n","model.layers.19.mlp.down_proj.weight             -> blk.19.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.19.mlp.gate_proj.weight             -> blk.19.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.19.mlp.up_proj.weight               -> blk.19.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.19.post_attention_layernorm.weight  -> blk.19.ffn_norm.weight                   | F16    | [4096]\n","model.layers.19.self_attn.k_proj.weight          -> blk.19.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.19.self_attn.o_proj.weight          -> blk.19.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.19.self_attn.q_proj.weight          -> blk.19.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.19.self_attn.v_proj.weight          -> blk.19.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.20.input_layernorm.weight           -> blk.20.attn_norm.weight                  | F16    | [4096]\n","model.layers.20.mlp.down_proj.weight             -> blk.20.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.20.mlp.gate_proj.weight             -> blk.20.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.20.mlp.up_proj.weight               -> blk.20.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.20.post_attention_layernorm.weight  -> blk.20.ffn_norm.weight                   | F16    | [4096]\n","model.layers.20.self_attn.k_proj.weight          -> blk.20.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.20.self_attn.o_proj.weight          -> blk.20.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.20.self_attn.q_proj.weight          -> blk.20.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.20.self_attn.v_proj.weight          -> blk.20.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.21.input_layernorm.weight           -> blk.21.attn_norm.weight                  | F16    | [4096]\n","model.layers.21.mlp.down_proj.weight             -> blk.21.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.21.mlp.gate_proj.weight             -> blk.21.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.21.mlp.up_proj.weight               -> blk.21.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.21.post_attention_layernorm.weight  -> blk.21.ffn_norm.weight                   | F16    | [4096]\n","model.layers.21.self_attn.k_proj.weight          -> blk.21.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.21.self_attn.o_proj.weight          -> blk.21.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.21.self_attn.q_proj.weight          -> blk.21.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.21.self_attn.v_proj.weight          -> blk.21.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.22.input_layernorm.weight           -> blk.22.attn_norm.weight                  | F16    | [4096]\n","model.layers.22.mlp.down_proj.weight             -> blk.22.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.22.mlp.gate_proj.weight             -> blk.22.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.22.mlp.up_proj.weight               -> blk.22.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.22.post_attention_layernorm.weight  -> blk.22.ffn_norm.weight                   | F16    | [4096]\n","model.layers.22.self_attn.k_proj.weight          -> blk.22.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.22.self_attn.o_proj.weight          -> blk.22.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.22.self_attn.q_proj.weight          -> blk.22.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.22.self_attn.v_proj.weight          -> blk.22.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.23.mlp.gate_proj.weight             -> blk.23.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.23.mlp.up_proj.weight               -> blk.23.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.23.self_attn.k_proj.weight          -> blk.23.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.23.self_attn.o_proj.weight          -> blk.23.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.23.self_attn.q_proj.weight          -> blk.23.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.23.self_attn.v_proj.weight          -> blk.23.attn_v.weight                     | F16    | [4096, 4096]\n","lm_head.weight                                   -> output.weight                            | F16    | [32000, 4096]\n","model.layers.23.input_layernorm.weight           -> blk.23.attn_norm.weight                  | F16    | [4096]\n","model.layers.23.mlp.down_proj.weight             -> blk.23.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.23.post_attention_layernorm.weight  -> blk.23.ffn_norm.weight                   | F16    | [4096]\n","model.layers.24.input_layernorm.weight           -> blk.24.attn_norm.weight                  | F16    | [4096]\n","model.layers.24.mlp.down_proj.weight             -> blk.24.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.24.mlp.gate_proj.weight             -> blk.24.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.24.mlp.up_proj.weight               -> blk.24.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.24.post_attention_layernorm.weight  -> blk.24.ffn_norm.weight                   | F16    | [4096]\n","model.layers.24.self_attn.k_proj.weight          -> blk.24.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.24.self_attn.o_proj.weight          -> blk.24.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.24.self_attn.q_proj.weight          -> blk.24.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.24.self_attn.v_proj.weight          -> blk.24.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.25.input_layernorm.weight           -> blk.25.attn_norm.weight                  | F16    | [4096]\n","model.layers.25.mlp.down_proj.weight             -> blk.25.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.25.mlp.gate_proj.weight             -> blk.25.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.25.mlp.up_proj.weight               -> blk.25.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.25.post_attention_layernorm.weight  -> blk.25.ffn_norm.weight                   | F16    | [4096]\n","model.layers.25.self_attn.k_proj.weight          -> blk.25.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.25.self_attn.o_proj.weight          -> blk.25.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.25.self_attn.q_proj.weight          -> blk.25.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.25.self_attn.v_proj.weight          -> blk.25.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.26.input_layernorm.weight           -> blk.26.attn_norm.weight                  | F16    | [4096]\n","model.layers.26.mlp.down_proj.weight             -> blk.26.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.26.mlp.gate_proj.weight             -> blk.26.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.26.mlp.up_proj.weight               -> blk.26.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.26.post_attention_layernorm.weight  -> blk.26.ffn_norm.weight                   | F16    | [4096]\n","model.layers.26.self_attn.k_proj.weight          -> blk.26.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.26.self_attn.o_proj.weight          -> blk.26.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.26.self_attn.q_proj.weight          -> blk.26.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.26.self_attn.v_proj.weight          -> blk.26.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.27.input_layernorm.weight           -> blk.27.attn_norm.weight                  | F16    | [4096]\n","model.layers.27.mlp.down_proj.weight             -> blk.27.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.27.mlp.gate_proj.weight             -> blk.27.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.27.mlp.up_proj.weight               -> blk.27.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.27.post_attention_layernorm.weight  -> blk.27.ffn_norm.weight                   | F16    | [4096]\n","model.layers.27.self_attn.k_proj.weight          -> blk.27.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.27.self_attn.o_proj.weight          -> blk.27.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.27.self_attn.q_proj.weight          -> blk.27.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.27.self_attn.v_proj.weight          -> blk.27.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.28.input_layernorm.weight           -> blk.28.attn_norm.weight                  | F16    | [4096]\n","model.layers.28.mlp.down_proj.weight             -> blk.28.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.28.mlp.gate_proj.weight             -> blk.28.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.28.mlp.up_proj.weight               -> blk.28.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.28.post_attention_layernorm.weight  -> blk.28.ffn_norm.weight                   | F16    | [4096]\n","model.layers.28.self_attn.k_proj.weight          -> blk.28.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.28.self_attn.o_proj.weight          -> blk.28.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.28.self_attn.q_proj.weight          -> blk.28.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.28.self_attn.v_proj.weight          -> blk.28.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.29.input_layernorm.weight           -> blk.29.attn_norm.weight                  | F16    | [4096]\n","model.layers.29.mlp.down_proj.weight             -> blk.29.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.29.mlp.gate_proj.weight             -> blk.29.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.29.mlp.up_proj.weight               -> blk.29.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.29.post_attention_layernorm.weight  -> blk.29.ffn_norm.weight                   | F16    | [4096]\n","model.layers.29.self_attn.k_proj.weight          -> blk.29.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.29.self_attn.o_proj.weight          -> blk.29.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.29.self_attn.q_proj.weight          -> blk.29.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.29.self_attn.v_proj.weight          -> blk.29.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.30.input_layernorm.weight           -> blk.30.attn_norm.weight                  | F16    | [4096]\n","model.layers.30.mlp.down_proj.weight             -> blk.30.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.30.mlp.gate_proj.weight             -> blk.30.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.30.mlp.up_proj.weight               -> blk.30.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.30.post_attention_layernorm.weight  -> blk.30.ffn_norm.weight                   | F16    | [4096]\n","model.layers.30.self_attn.k_proj.weight          -> blk.30.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.30.self_attn.o_proj.weight          -> blk.30.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.30.self_attn.q_proj.weight          -> blk.30.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.30.self_attn.v_proj.weight          -> blk.30.attn_v.weight                     | F16    | [4096, 4096]\n","model.layers.31.input_layernorm.weight           -> blk.31.attn_norm.weight                  | F16    | [4096]\n","model.layers.31.mlp.down_proj.weight             -> blk.31.ffn_down.weight                   | F16    | [4096, 11008]\n","model.layers.31.mlp.gate_proj.weight             -> blk.31.ffn_gate.weight                   | F16    | [11008, 4096]\n","model.layers.31.mlp.up_proj.weight               -> blk.31.ffn_up.weight                     | F16    | [11008, 4096]\n","model.layers.31.post_attention_layernorm.weight  -> blk.31.ffn_norm.weight                   | F16    | [4096]\n","model.layers.31.self_attn.k_proj.weight          -> blk.31.attn_k.weight                     | F16    | [4096, 4096]\n","model.layers.31.self_attn.o_proj.weight          -> blk.31.attn_output.weight                | F16    | [4096, 4096]\n","model.layers.31.self_attn.q_proj.weight          -> blk.31.attn_q.weight                     | F16    | [4096, 4096]\n","model.layers.31.self_attn.v_proj.weight          -> blk.31.attn_v.weight                     | F16    | [4096, 4096]\n","model.norm.weight                                -> output_norm.weight                       | F16    | [4096]\n","Writing llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.fp16.bin, format 1\n","Ignoring added_tokens.json since model matches vocab size without it.\n","gguf: This GGUF file is for Little Endian only\n","gguf: Setting special token type bos to 1\n","gguf: Setting special token type eos to 2\n","gguf: Setting special token type unk to 0\n","gguf: Setting special token type pad to 2\n","gguf: Setting add_bos_token to True\n","gguf: Setting add_eos_token to False\n","gguf: Setting chat_template to {% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}\n","[  1/291] Writing tensor token_embd.weight                      | size  32000 x   4096  | type F16  | T+   2\n","[  2/291] Writing tensor blk.0.attn_norm.weight                 | size   4096           | type F32  | T+   2\n","[  3/291] Writing tensor blk.0.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+   2\n","[  4/291] Writing tensor blk.0.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+   3\n","[  5/291] Writing tensor blk.0.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+   3\n","[  6/291] Writing tensor blk.0.ffn_norm.weight                  | size   4096           | type F32  | T+   4\n","[  7/291] Writing tensor blk.0.attn_k.weight                    | size   4096 x   4096  | type F16  | T+   4\n","[  8/291] Writing tensor blk.0.attn_output.weight               | size   4096 x   4096  | type F16  | T+   4\n","[  9/291] Writing tensor blk.0.attn_q.weight                    | size   4096 x   4096  | type F16  | T+   4\n","[ 10/291] Writing tensor blk.0.attn_v.weight                    | size   4096 x   4096  | type F16  | T+   4\n","[ 11/291] Writing tensor blk.1.attn_norm.weight                 | size   4096           | type F32  | T+   5\n","[ 12/291] Writing tensor blk.1.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+   6\n","[ 13/291] Writing tensor blk.1.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+   9\n","[ 14/291] Writing tensor blk.1.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+   9\n","[ 15/291] Writing tensor blk.1.ffn_norm.weight                  | size   4096           | type F32  | T+   9\n","[ 16/291] Writing tensor blk.1.attn_k.weight                    | size   4096 x   4096  | type F16  | T+   9\n","[ 17/291] Writing tensor blk.1.attn_output.weight               | size   4096 x   4096  | type F16  | T+   9\n","[ 18/291] Writing tensor blk.1.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  10\n","[ 19/291] Writing tensor blk.1.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  10\n","[ 20/291] Writing tensor blk.10.attn_norm.weight                | size   4096           | type F32  | T+  13\n","[ 21/291] Writing tensor blk.10.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  13\n","[ 22/291] Writing tensor blk.10.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  14\n","[ 23/291] Writing tensor blk.10.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  14\n","[ 24/291] Writing tensor blk.10.ffn_norm.weight                 | size   4096           | type F32  | T+  16\n","[ 25/291] Writing tensor blk.10.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  16\n","[ 26/291] Writing tensor blk.10.attn_output.weight              | size   4096 x   4096  | type F16  | T+  18\n","[ 27/291] Writing tensor blk.10.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  18\n","[ 28/291] Writing tensor blk.10.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  19\n","[ 29/291] Writing tensor blk.11.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  19\n","[ 30/291] Writing tensor blk.11.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  19\n","[ 31/291] Writing tensor blk.11.attn_output.weight              | size   4096 x   4096  | type F16  | T+  19\n","[ 32/291] Writing tensor blk.11.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  19\n","[ 33/291] Writing tensor blk.11.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  20\n","[ 34/291] Writing tensor blk.2.attn_norm.weight                 | size   4096           | type F32  | T+  23\n","[ 35/291] Writing tensor blk.2.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  23\n","[ 36/291] Writing tensor blk.2.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  24\n","[ 37/291] Writing tensor blk.2.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  24\n","[ 38/291] Writing tensor blk.2.ffn_norm.weight                  | size   4096           | type F32  | T+  24\n","[ 39/291] Writing tensor blk.2.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  24\n","[ 40/291] Writing tensor blk.2.attn_output.weight               | size   4096 x   4096  | type F16  | T+  24\n","[ 41/291] Writing tensor blk.2.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  25\n","[ 42/291] Writing tensor blk.2.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  25\n","[ 43/291] Writing tensor blk.3.attn_norm.weight                 | size   4096           | type F32  | T+  25\n","[ 44/291] Writing tensor blk.3.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  26\n","[ 45/291] Writing tensor blk.3.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  26\n","[ 46/291] Writing tensor blk.3.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  27\n","[ 47/291] Writing tensor blk.3.ffn_norm.weight                  | size   4096           | type F32  | T+  28\n","[ 48/291] Writing tensor blk.3.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  28\n","[ 49/291] Writing tensor blk.3.attn_output.weight               | size   4096 x   4096  | type F16  | T+  29\n","[ 50/291] Writing tensor blk.3.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  29\n","[ 51/291] Writing tensor blk.3.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  29\n","[ 52/291] Writing tensor blk.4.attn_norm.weight                 | size   4096           | type F32  | T+  29\n","[ 53/291] Writing tensor blk.4.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  30\n","[ 54/291] Writing tensor blk.4.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  30\n","[ 55/291] Writing tensor blk.4.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  31\n","[ 56/291] Writing tensor blk.4.ffn_norm.weight                  | size   4096           | type F32  | T+  34\n","[ 57/291] Writing tensor blk.4.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  34\n","[ 58/291] Writing tensor blk.4.attn_output.weight               | size   4096 x   4096  | type F16  | T+  34\n","[ 59/291] Writing tensor blk.4.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  34\n","[ 60/291] Writing tensor blk.4.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  34\n","[ 61/291] Writing tensor blk.5.attn_norm.weight                 | size   4096           | type F32  | T+  34\n","[ 62/291] Writing tensor blk.5.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  34\n","[ 63/291] Writing tensor blk.5.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  38\n","[ 64/291] Writing tensor blk.5.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  39\n","[ 65/291] Writing tensor blk.5.ffn_norm.weight                  | size   4096           | type F32  | T+  39\n","[ 66/291] Writing tensor blk.5.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  39\n","[ 67/291] Writing tensor blk.5.attn_output.weight               | size   4096 x   4096  | type F16  | T+  39\n","[ 68/291] Writing tensor blk.5.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  39\n","[ 69/291] Writing tensor blk.5.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  40\n","[ 70/291] Writing tensor blk.6.attn_norm.weight                 | size   4096           | type F32  | T+  40\n","[ 71/291] Writing tensor blk.6.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  40\n","[ 72/291] Writing tensor blk.6.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  41\n","[ 73/291] Writing tensor blk.6.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  43\n","[ 74/291] Writing tensor blk.6.ffn_norm.weight                  | size   4096           | type F32  | T+  44\n","[ 75/291] Writing tensor blk.6.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  44\n","[ 76/291] Writing tensor blk.6.attn_output.weight               | size   4096 x   4096  | type F16  | T+  44\n","[ 77/291] Writing tensor blk.6.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  44\n","[ 78/291] Writing tensor blk.6.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  44\n","[ 79/291] Writing tensor blk.7.attn_norm.weight                 | size   4096           | type F32  | T+  44\n","[ 80/291] Writing tensor blk.7.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  44\n","[ 81/291] Writing tensor blk.7.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  45\n","[ 82/291] Writing tensor blk.7.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  46\n","[ 83/291] Writing tensor blk.7.ffn_norm.weight                  | size   4096           | type F32  | T+  48\n","[ 84/291] Writing tensor blk.7.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  48\n","[ 85/291] Writing tensor blk.7.attn_output.weight               | size   4096 x   4096  | type F16  | T+  48\n","[ 86/291] Writing tensor blk.7.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  49\n","[ 87/291] Writing tensor blk.7.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  49\n","[ 88/291] Writing tensor blk.8.attn_norm.weight                 | size   4096           | type F32  | T+  49\n","[ 89/291] Writing tensor blk.8.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  49\n","[ 90/291] Writing tensor blk.8.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  53\n","[ 91/291] Writing tensor blk.8.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  54\n","[ 92/291] Writing tensor blk.8.ffn_norm.weight                  | size   4096           | type F32  | T+  54\n","[ 93/291] Writing tensor blk.8.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  54\n","[ 94/291] Writing tensor blk.8.attn_output.weight               | size   4096 x   4096  | type F16  | T+  54\n","[ 95/291] Writing tensor blk.8.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  54\n","[ 96/291] Writing tensor blk.8.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  54\n","[ 97/291] Writing tensor blk.9.attn_norm.weight                 | size   4096           | type F32  | T+  55\n","[ 98/291] Writing tensor blk.9.ffn_down.weight                  | size   4096 x  11008  | type F16  | T+  55\n","[ 99/291] Writing tensor blk.9.ffn_gate.weight                  | size  11008 x   4096  | type F16  | T+  56\n","[100/291] Writing tensor blk.9.ffn_up.weight                    | size  11008 x   4096  | type F16  | T+  56\n","[101/291] Writing tensor blk.9.ffn_norm.weight                  | size   4096           | type F32  | T+  57\n","[102/291] Writing tensor blk.9.attn_k.weight                    | size   4096 x   4096  | type F16  | T+  57\n","[103/291] Writing tensor blk.9.attn_output.weight               | size   4096 x   4096  | type F16  | T+  57\n","[104/291] Writing tensor blk.9.attn_q.weight                    | size   4096 x   4096  | type F16  | T+  58\n","[105/291] Writing tensor blk.9.attn_v.weight                    | size   4096 x   4096  | type F16  | T+  58\n","[106/291] Writing tensor blk.11.attn_norm.weight                | size   4096           | type F32  | T+  59\n","[107/291] Writing tensor blk.11.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  59\n","[108/291] Writing tensor blk.11.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  60\n","[109/291] Writing tensor blk.11.ffn_norm.weight                 | size   4096           | type F32  | T+  60\n","[110/291] Writing tensor blk.12.attn_norm.weight                | size   4096           | type F32  | T+  60\n","[111/291] Writing tensor blk.12.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  60\n","[112/291] Writing tensor blk.12.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  61\n","[113/291] Writing tensor blk.12.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  61\n","[114/291] Writing tensor blk.12.ffn_norm.weight                 | size   4096           | type F32  | T+  61\n","[115/291] Writing tensor blk.12.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  61\n","[116/291] Writing tensor blk.12.attn_output.weight              | size   4096 x   4096  | type F16  | T+  62\n","[117/291] Writing tensor blk.12.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  62\n","[118/291] Writing tensor blk.12.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  62\n","[119/291] Writing tensor blk.13.attn_norm.weight                | size   4096           | type F32  | T+  62\n","[120/291] Writing tensor blk.13.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  63\n","[121/291] Writing tensor blk.13.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  63\n","[122/291] Writing tensor blk.13.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  64\n","[123/291] Writing tensor blk.13.ffn_norm.weight                 | size   4096           | type F32  | T+  64\n","[124/291] Writing tensor blk.13.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  64\n","[125/291] Writing tensor blk.13.attn_output.weight              | size   4096 x   4096  | type F16  | T+  64\n","[126/291] Writing tensor blk.13.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  64\n","[127/291] Writing tensor blk.13.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  65\n","[128/291] Writing tensor blk.14.attn_norm.weight                | size   4096           | type F32  | T+  65\n","[129/291] Writing tensor blk.14.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  66\n","[130/291] Writing tensor blk.14.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  66\n","[131/291] Writing tensor blk.14.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  69\n","[132/291] Writing tensor blk.14.ffn_norm.weight                 | size   4096           | type F32  | T+  69\n","[133/291] Writing tensor blk.14.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  69\n","[134/291] Writing tensor blk.14.attn_output.weight              | size   4096 x   4096  | type F16  | T+  69\n","[135/291] Writing tensor blk.14.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  69\n","[136/291] Writing tensor blk.14.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  69\n","[137/291] Writing tensor blk.15.attn_norm.weight                | size   4096           | type F32  | T+  70\n","[138/291] Writing tensor blk.15.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  71\n","[139/291] Writing tensor blk.15.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  74\n","[140/291] Writing tensor blk.15.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  74\n","[141/291] Writing tensor blk.15.ffn_norm.weight                 | size   4096           | type F32  | T+  75\n","[142/291] Writing tensor blk.15.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  75\n","[143/291] Writing tensor blk.15.attn_output.weight              | size   4096 x   4096  | type F16  | T+  78\n","[144/291] Writing tensor blk.15.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  79\n","[145/291] Writing tensor blk.15.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  79\n","[146/291] Writing tensor blk.16.attn_norm.weight                | size   4096           | type F32  | T+  79\n","[147/291] Writing tensor blk.16.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  79\n","[148/291] Writing tensor blk.16.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  79\n","[149/291] Writing tensor blk.16.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  82\n","[150/291] Writing tensor blk.16.ffn_norm.weight                 | size   4096           | type F32  | T+  84\n","[151/291] Writing tensor blk.16.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  84\n","[152/291] Writing tensor blk.16.attn_output.weight              | size   4096 x   4096  | type F16  | T+  84\n","[153/291] Writing tensor blk.16.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  84\n","[154/291] Writing tensor blk.16.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  84\n","[155/291] Writing tensor blk.17.attn_norm.weight                | size   4096           | type F32  | T+  85\n","[156/291] Writing tensor blk.17.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  85\n","[157/291] Writing tensor blk.17.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  89\n","[158/291] Writing tensor blk.17.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  89\n","[159/291] Writing tensor blk.17.ffn_norm.weight                 | size   4096           | type F32  | T+  90\n","[160/291] Writing tensor blk.17.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  90\n","[161/291] Writing tensor blk.17.attn_output.weight              | size   4096 x   4096  | type F16  | T+  90\n","[162/291] Writing tensor blk.17.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  90\n","[163/291] Writing tensor blk.17.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  90\n","[164/291] Writing tensor blk.18.attn_norm.weight                | size   4096           | type F32  | T+  91\n","[165/291] Writing tensor blk.18.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  91\n","[166/291] Writing tensor blk.18.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  91\n","[167/291] Writing tensor blk.18.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  94\n","[168/291] Writing tensor blk.18.ffn_norm.weight                 | size   4096           | type F32  | T+  94\n","[169/291] Writing tensor blk.18.attn_k.weight                   | size   4096 x   4096  | type F16  | T+  94\n","[170/291] Writing tensor blk.18.attn_output.weight              | size   4096 x   4096  | type F16  | T+  94\n","[171/291] Writing tensor blk.18.attn_q.weight                   | size   4096 x   4096  | type F16  | T+  95\n","[172/291] Writing tensor blk.18.attn_v.weight                   | size   4096 x   4096  | type F16  | T+  95\n","[173/291] Writing tensor blk.19.attn_norm.weight                | size   4096           | type F32  | T+  95\n","[174/291] Writing tensor blk.19.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+  95\n","[175/291] Writing tensor blk.19.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+  99\n","[176/291] Writing tensor blk.19.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+  99\n","[177/291] Writing tensor blk.19.ffn_norm.weight                 | size   4096           | type F32  | T+ 100\n","[178/291] Writing tensor blk.19.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 100\n","[179/291] Writing tensor blk.19.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 100\n","[180/291] Writing tensor blk.19.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 100\n","[181/291] Writing tensor blk.19.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 103\n","[182/291] Writing tensor blk.20.attn_norm.weight                | size   4096           | type F32  | T+ 104\n","[183/291] Writing tensor blk.20.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 104\n","[184/291] Writing tensor blk.20.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 104\n","[185/291] Writing tensor blk.20.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 105\n","[186/291] Writing tensor blk.20.ffn_norm.weight                 | size   4096           | type F32  | T+ 105\n","[187/291] Writing tensor blk.20.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 105\n","[188/291] Writing tensor blk.20.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 105\n","[189/291] Writing tensor blk.20.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 105\n","[190/291] Writing tensor blk.20.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 105\n","[191/291] Writing tensor blk.21.attn_norm.weight                | size   4096           | type F32  | T+ 105\n","[192/291] Writing tensor blk.21.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 106\n","[193/291] Writing tensor blk.21.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 106\n","[194/291] Writing tensor blk.21.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 107\n","[195/291] Writing tensor blk.21.ffn_norm.weight                 | size   4096           | type F32  | T+ 109\n","[196/291] Writing tensor blk.21.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 109\n","[197/291] Writing tensor blk.21.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 109\n","[198/291] Writing tensor blk.21.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 109\n","[199/291] Writing tensor blk.21.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 109\n","[200/291] Writing tensor blk.22.attn_norm.weight                | size   4096           | type F32  | T+ 109\n","[201/291] Writing tensor blk.22.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 109\n","[202/291] Writing tensor blk.22.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 113\n","[203/291] Writing tensor blk.22.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 114\n","[204/291] Writing tensor blk.22.ffn_norm.weight                 | size   4096           | type F32  | T+ 114\n","[205/291] Writing tensor blk.22.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 114\n","[206/291] Writing tensor blk.22.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 114\n","[207/291] Writing tensor blk.22.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 115\n","[208/291] Writing tensor blk.22.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 118\n","[209/291] Writing tensor blk.23.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 119\n","[210/291] Writing tensor blk.23.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 119\n","[211/291] Writing tensor blk.23.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 119\n","[212/291] Writing tensor blk.23.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 120\n","[213/291] Writing tensor blk.23.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 120\n","[214/291] Writing tensor blk.23.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 120\n","[215/291] Writing tensor output.weight                          | size  32000 x   4096  | type F16  | T+ 120\n","[216/291] Writing tensor blk.23.attn_norm.weight                | size   4096           | type F32  | T+ 121\n","[217/291] Writing tensor blk.23.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 121\n","[218/291] Writing tensor blk.23.ffn_norm.weight                 | size   4096           | type F32  | T+ 124\n","[219/291] Writing tensor blk.24.attn_norm.weight                | size   4096           | type F32  | T+ 124\n","[220/291] Writing tensor blk.24.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 124\n","[221/291] Writing tensor blk.24.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 124\n","[222/291] Writing tensor blk.24.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 125\n","[223/291] Writing tensor blk.24.ffn_norm.weight                 | size   4096           | type F32  | T+ 125\n","[224/291] Writing tensor blk.24.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 125\n","[225/291] Writing tensor blk.24.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 125\n","[226/291] Writing tensor blk.24.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 125\n","[227/291] Writing tensor blk.24.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 125\n","[228/291] Writing tensor blk.25.attn_norm.weight                | size   4096           | type F32  | T+ 125\n","[229/291] Writing tensor blk.25.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 127\n","[230/291] Writing tensor blk.25.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 127\n","[231/291] Writing tensor blk.25.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 127\n","[232/291] Writing tensor blk.25.ffn_norm.weight                 | size   4096           | type F32  | T+ 129\n","[233/291] Writing tensor blk.25.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 129\n","[234/291] Writing tensor blk.25.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 129\n","[235/291] Writing tensor blk.25.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 129\n","[236/291] Writing tensor blk.25.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 129\n","[237/291] Writing tensor blk.26.attn_norm.weight                | size   4096           | type F32  | T+ 129\n","[238/291] Writing tensor blk.26.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 130\n","[239/291] Writing tensor blk.26.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 131\n","[240/291] Writing tensor blk.26.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 131\n","[241/291] Writing tensor blk.26.ffn_norm.weight                 | size   4096           | type F32  | T+ 131\n","[242/291] Writing tensor blk.26.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 131\n","[243/291] Writing tensor blk.26.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 132\n","[244/291] Writing tensor blk.26.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 132\n","[245/291] Writing tensor blk.26.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 132\n","[246/291] Writing tensor blk.27.attn_norm.weight                | size   4096           | type F32  | T+ 132\n","[247/291] Writing tensor blk.27.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 133\n","[248/291] Writing tensor blk.27.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 134\n","[249/291] Writing tensor blk.27.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 134\n","[250/291] Writing tensor blk.27.ffn_norm.weight                 | size   4096           | type F32  | T+ 135\n","[251/291] Writing tensor blk.27.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 135\n","[252/291] Writing tensor blk.27.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 139\n","[253/291] Writing tensor blk.27.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 139\n","[254/291] Writing tensor blk.27.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 139\n","[255/291] Writing tensor blk.28.attn_norm.weight                | size   4096           | type F32  | T+ 139\n","[256/291] Writing tensor blk.28.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 139\n","[257/291] Writing tensor blk.28.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 140\n","[258/291] Writing tensor blk.28.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 140\n","[259/291] Writing tensor blk.28.ffn_norm.weight                 | size   4096           | type F32  | T+ 140\n","[260/291] Writing tensor blk.28.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 140\n","[261/291] Writing tensor blk.28.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 141\n","[262/291] Writing tensor blk.28.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 141\n","[263/291] Writing tensor blk.28.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 141\n","[264/291] Writing tensor blk.29.attn_norm.weight                | size   4096           | type F32  | T+ 141\n","[265/291] Writing tensor blk.29.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 142\n","[266/291] Writing tensor blk.29.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 144\n","[267/291] Writing tensor blk.29.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 144\n","[268/291] Writing tensor blk.29.ffn_norm.weight                 | size   4096           | type F32  | T+ 144\n","[269/291] Writing tensor blk.29.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 144\n","[270/291] Writing tensor blk.29.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 145\n","[271/291] Writing tensor blk.29.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 145\n","[272/291] Writing tensor blk.29.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 149\n","[273/291] Writing tensor blk.30.attn_norm.weight                | size   4096           | type F32  | T+ 149\n","[274/291] Writing tensor blk.30.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 149\n","[275/291] Writing tensor blk.30.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 149\n","[276/291] Writing tensor blk.30.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 150\n","[277/291] Writing tensor blk.30.ffn_norm.weight                 | size   4096           | type F32  | T+ 150\n","[278/291] Writing tensor blk.30.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 150\n","[279/291] Writing tensor blk.30.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 150\n","[280/291] Writing tensor blk.30.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 150\n","[281/291] Writing tensor blk.30.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 150\n","[282/291] Writing tensor blk.31.attn_norm.weight                | size   4096           | type F32  | T+ 151\n","[283/291] Writing tensor blk.31.ffn_down.weight                 | size   4096 x  11008  | type F16  | T+ 151\n","[284/291] Writing tensor blk.31.ffn_gate.weight                 | size  11008 x   4096  | type F16  | T+ 152\n","[285/291] Writing tensor blk.31.ffn_up.weight                   | size  11008 x   4096  | type F16  | T+ 154\n","[286/291] Writing tensor blk.31.ffn_norm.weight                 | size   4096           | type F32  | T+ 154\n","[287/291] Writing tensor blk.31.attn_k.weight                   | size   4096 x   4096  | type F16  | T+ 154\n","[288/291] Writing tensor blk.31.attn_output.weight              | size   4096 x   4096  | type F16  | T+ 155\n","[289/291] Writing tensor blk.31.attn_q.weight                   | size   4096 x   4096  | type F16  | T+ 155\n","[290/291] Writing tensor blk.31.attn_v.weight                   | size   4096 x   4096  | type F16  | T+ 155\n","[291/291] Writing tensor output_norm.weight                     | size   4096           | type F32  | T+ 155\n","Wrote llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.fp16.bin\n"]}]},{"cell_type":"code","source":["# Verify creation of FP16 file and quantize the model for specified methods.\n","# First, check if the FP16 model file exists, indicating successful conversion.\n","# If the file does not exist, terminate the script to prevent further errors.\n","# Then, for each quantization method listed, perform model quantization,\n","# generating a quantized model file for each method.\n","\n","\n","if os.path.exists(fp16):\n","    print(f\"FP16 file created successfully: {fp16}\")\n","else:\n","    print(f\"Failed to create FP16 file at: {fp16}\")\n","    import sys\n","    sys.exit(\"Stopping script due to missing FP16 file.\")\n","\n","\n","# Quantize the model using specified methods\n","for method in QUANTIZATION_METHODS:\n","    qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf\"\n","    !./llama.cpp/quantize {fp16} {qtype} {method}"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"mw5y-GWdkbx6","executionInfo":{"status":"ok","timestamp":1706681949750,"user_tz":-480,"elapsed":668152,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"3fe19dac-6a53-4439-cc2b-330b69da4d42"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["FP16 file created successfully: llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.fp16.bin\n","ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no\n","ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes\n","ggml_init_cublas: found 1 CUDA devices:\n","  Device 0: Tesla T4, compute capability 7.5, VMM: yes\n","main: build = 2029 (d62520eb)\n","main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n","main: quantizing 'llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.fp16.bin' to 'llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.Q5_K_M.gguf' as Q5_K_M\n","llama_model_loader: loaded meta data with 23 key-value pairs and 291 tensors from llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.fp16.bin (version GGUF V3 (latest))\n","llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n","llama_model_loader: - kv   0:                       general.architecture str              = llama\n","llama_model_loader: - kv   1:                               general.name str              = LLaMA v2\n","llama_model_loader: - kv   2:                       llama.context_length u32              = 4096\n","llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096\n","llama_model_loader: - kv   4:                          llama.block_count u32              = 32\n","llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008\n","llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128\n","llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32\n","llama_model_loader: - kv   8:              llama.attention.head_count_kv u32              = 32\n","llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010\n","llama_model_loader: - kv  10:                       llama.rope.freq_base f32              = 10000.000000\n","llama_model_loader: - kv  11:                          general.file_type u32              = 1\n","llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = llama\n","llama_model_loader: - kv  13:                      tokenizer.ggml.tokens arr[str,32000]   = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n","llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32000]   = [0.000000, 0.000000, 0.000000, 0.0000...\n","llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32000]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n","llama_model_loader: - kv  16:                tokenizer.ggml.bos_token_id u32              = 1\n","llama_model_loader: - kv  17:                tokenizer.ggml.eos_token_id u32              = 2\n","llama_model_loader: - kv  18:            tokenizer.ggml.unknown_token_id u32              = 0\n","llama_model_loader: - kv  19:            tokenizer.ggml.padding_token_id u32              = 2\n","llama_model_loader: - kv  20:               tokenizer.ggml.add_bos_token bool             = true\n","llama_model_loader: - kv  21:               tokenizer.ggml.add_eos_token bool             = false\n","llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {% if messages[0]['role'] == 'system'...\n","llama_model_loader: - type  f32:   65 tensors\n","llama_model_loader: - type  f16:  226 tensors\n","llama_model_quantize_internal: meta size = 742080 bytes\n","[   1/ 291]                    token_embd.weight - [ 4096, 32000,     1,     1], type =    f16, quantizing to q5_K .. size =   250.00 MiB ->    85.94 MiB\n","[   2/ 291]               blk.0.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[   3/ 291]                blk.0.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[   4/ 291]                blk.0.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[   5/ 291]                  blk.0.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[   6/ 291]                blk.0.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[   7/ 291]                  blk.0.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[   8/ 291]             blk.0.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[   9/ 291]                  blk.0.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  10/ 291]                  blk.0.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[  11/ 291]               blk.1.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  12/ 291]                blk.1.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[  13/ 291]                blk.1.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  14/ 291]                  blk.1.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  15/ 291]                blk.1.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  16/ 291]                  blk.1.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  17/ 291]             blk.1.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  18/ 291]                  blk.1.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  19/ 291]                  blk.1.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[  20/ 291]              blk.10.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  21/ 291]               blk.10.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[  22/ 291]               blk.10.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  23/ 291]                 blk.10.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  24/ 291]               blk.10.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  25/ 291]                 blk.10.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  26/ 291]            blk.10.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  27/ 291]                 blk.10.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  28/ 291]                 blk.10.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[  29/ 291]               blk.11.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  30/ 291]                 blk.11.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  31/ 291]            blk.11.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  32/ 291]                 blk.11.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  33/ 291]                 blk.11.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[  34/ 291]               blk.2.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  35/ 291]                blk.2.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[  36/ 291]                blk.2.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  37/ 291]                  blk.2.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  38/ 291]                blk.2.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  39/ 291]                  blk.2.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  40/ 291]             blk.2.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  41/ 291]                  blk.2.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  42/ 291]                  blk.2.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  43/ 291]               blk.3.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  44/ 291]                blk.3.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  45/ 291]                blk.3.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  46/ 291]                  blk.3.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  47/ 291]                blk.3.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  48/ 291]                  blk.3.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  49/ 291]             blk.3.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  50/ 291]                  blk.3.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  51/ 291]                  blk.3.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  52/ 291]               blk.4.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  53/ 291]                blk.4.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  54/ 291]                blk.4.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  55/ 291]                  blk.4.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  56/ 291]                blk.4.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  57/ 291]                  blk.4.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  58/ 291]             blk.4.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  59/ 291]                  blk.4.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  60/ 291]                  blk.4.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[  61/ 291]               blk.5.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  62/ 291]                blk.5.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[  63/ 291]                blk.5.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  64/ 291]                  blk.5.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  65/ 291]                blk.5.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  66/ 291]                  blk.5.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  67/ 291]             blk.5.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  68/ 291]                  blk.5.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  69/ 291]                  blk.5.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  70/ 291]               blk.6.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  71/ 291]                blk.6.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  72/ 291]                blk.6.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  73/ 291]                  blk.6.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  74/ 291]                blk.6.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  75/ 291]                  blk.6.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  76/ 291]             blk.6.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  77/ 291]                  blk.6.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  78/ 291]                  blk.6.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  79/ 291]               blk.7.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  80/ 291]                blk.7.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  81/ 291]                blk.7.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  82/ 291]                  blk.7.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  83/ 291]                blk.7.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  84/ 291]                  blk.7.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  85/ 291]             blk.7.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  86/ 291]                  blk.7.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  87/ 291]                  blk.7.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[  88/ 291]               blk.8.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  89/ 291]                blk.8.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[  90/ 291]                blk.8.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  91/ 291]                  blk.8.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  92/ 291]                blk.8.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  93/ 291]                  blk.8.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  94/ 291]             blk.8.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  95/ 291]                  blk.8.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  96/ 291]                  blk.8.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[  97/ 291]               blk.9.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[  98/ 291]                blk.9.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[  99/ 291]                blk.9.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 100/ 291]                  blk.9.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 101/ 291]                blk.9.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 102/ 291]                  blk.9.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 103/ 291]             blk.9.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 104/ 291]                  blk.9.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 105/ 291]                  blk.9.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 106/ 291]              blk.11.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 107/ 291]               blk.11.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 108/ 291]                 blk.11.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 109/ 291]               blk.11.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 110/ 291]              blk.12.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 111/ 291]               blk.12.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 112/ 291]               blk.12.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 113/ 291]                 blk.12.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 114/ 291]               blk.12.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 115/ 291]                 blk.12.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 116/ 291]            blk.12.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 117/ 291]                 blk.12.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 118/ 291]                 blk.12.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 119/ 291]              blk.13.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 120/ 291]               blk.13.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 121/ 291]               blk.13.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 122/ 291]                 blk.13.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 123/ 291]               blk.13.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 124/ 291]                 blk.13.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 125/ 291]            blk.13.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 126/ 291]                 blk.13.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 127/ 291]                 blk.13.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 128/ 291]              blk.14.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 129/ 291]               blk.14.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 130/ 291]               blk.14.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 131/ 291]                 blk.14.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 132/ 291]               blk.14.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 133/ 291]                 blk.14.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 134/ 291]            blk.14.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 135/ 291]                 blk.14.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 136/ 291]                 blk.14.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 137/ 291]              blk.15.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 138/ 291]               blk.15.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 139/ 291]               blk.15.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 140/ 291]                 blk.15.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 141/ 291]               blk.15.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 142/ 291]                 blk.15.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 143/ 291]            blk.15.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 144/ 291]                 blk.15.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 145/ 291]                 blk.15.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 146/ 291]              blk.16.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 147/ 291]               blk.16.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 148/ 291]               blk.16.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 149/ 291]                 blk.16.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 150/ 291]               blk.16.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 151/ 291]                 blk.16.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 152/ 291]            blk.16.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 153/ 291]                 blk.16.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 154/ 291]                 blk.16.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 155/ 291]              blk.17.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 156/ 291]               blk.17.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 157/ 291]               blk.17.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 158/ 291]                 blk.17.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 159/ 291]               blk.17.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 160/ 291]                 blk.17.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 161/ 291]            blk.17.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 162/ 291]                 blk.17.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 163/ 291]                 blk.17.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 164/ 291]              blk.18.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 165/ 291]               blk.18.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 166/ 291]               blk.18.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 167/ 291]                 blk.18.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 168/ 291]               blk.18.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 169/ 291]                 blk.18.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 170/ 291]            blk.18.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 171/ 291]                 blk.18.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 172/ 291]                 blk.18.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 173/ 291]              blk.19.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 174/ 291]               blk.19.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 175/ 291]               blk.19.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 176/ 291]                 blk.19.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 177/ 291]               blk.19.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 178/ 291]                 blk.19.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 179/ 291]            blk.19.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 180/ 291]                 blk.19.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 181/ 291]                 blk.19.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 182/ 291]              blk.20.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 183/ 291]               blk.20.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 184/ 291]               blk.20.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 185/ 291]                 blk.20.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 186/ 291]               blk.20.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 187/ 291]                 blk.20.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 188/ 291]            blk.20.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 189/ 291]                 blk.20.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 190/ 291]                 blk.20.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 191/ 291]              blk.21.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 192/ 291]               blk.21.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 193/ 291]               blk.21.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 194/ 291]                 blk.21.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 195/ 291]               blk.21.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 196/ 291]                 blk.21.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 197/ 291]            blk.21.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 198/ 291]                 blk.21.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 199/ 291]                 blk.21.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 200/ 291]              blk.22.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 201/ 291]               blk.22.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 202/ 291]               blk.22.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 203/ 291]                 blk.22.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 204/ 291]               blk.22.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 205/ 291]                 blk.22.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 206/ 291]            blk.22.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 207/ 291]                 blk.22.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 208/ 291]                 blk.22.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 209/ 291]               blk.23.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 210/ 291]                 blk.23.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 211/ 291]                 blk.23.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 212/ 291]            blk.23.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 213/ 291]                 blk.23.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 214/ 291]                 blk.23.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 215/ 291]                        output.weight - [ 4096, 32000,     1,     1], type =    f16, quantizing to q6_K .. size =   250.00 MiB ->   102.54 MiB\n","[ 216/ 291]              blk.23.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 217/ 291]               blk.23.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 218/ 291]               blk.23.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 219/ 291]              blk.24.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 220/ 291]               blk.24.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 221/ 291]               blk.24.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 222/ 291]                 blk.24.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 223/ 291]               blk.24.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 224/ 291]                 blk.24.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 225/ 291]            blk.24.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 226/ 291]                 blk.24.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 227/ 291]                 blk.24.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 228/ 291]              blk.25.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 229/ 291]               blk.25.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 230/ 291]               blk.25.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 231/ 291]                 blk.25.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 232/ 291]               blk.25.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 233/ 291]                 blk.25.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 234/ 291]            blk.25.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 235/ 291]                 blk.25.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 236/ 291]                 blk.25.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 237/ 291]              blk.26.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 238/ 291]               blk.26.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 239/ 291]               blk.26.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 240/ 291]                 blk.26.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 241/ 291]               blk.26.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 242/ 291]                 blk.26.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 243/ 291]            blk.26.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 244/ 291]                 blk.26.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 245/ 291]                 blk.26.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 246/ 291]              blk.27.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 247/ 291]               blk.27.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 248/ 291]               blk.27.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 249/ 291]                 blk.27.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 250/ 291]               blk.27.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 251/ 291]                 blk.27.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 252/ 291]            blk.27.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 253/ 291]                 blk.27.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 254/ 291]                 blk.27.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 255/ 291]              blk.28.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 256/ 291]               blk.28.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 257/ 291]               blk.28.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 258/ 291]                 blk.28.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 259/ 291]               blk.28.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 260/ 291]                 blk.28.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 261/ 291]            blk.28.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 262/ 291]                 blk.28.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 263/ 291]                 blk.28.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 264/ 291]              blk.29.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 265/ 291]               blk.29.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 266/ 291]               blk.29.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 267/ 291]                 blk.29.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 268/ 291]               blk.29.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 269/ 291]                 blk.29.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 270/ 291]            blk.29.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 271/ 291]                 blk.29.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 272/ 291]                 blk.29.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 273/ 291]              blk.30.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 274/ 291]               blk.30.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 275/ 291]               blk.30.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 276/ 291]                 blk.30.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 277/ 291]               blk.30.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 278/ 291]                 blk.30.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 279/ 291]            blk.30.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 280/ 291]                 blk.30.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 281/ 291]                 blk.30.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 282/ 291]              blk.31.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 283/ 291]               blk.31.ffn_down.weight - [11008,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    86.00 MiB ->    35.27 MiB\n","[ 284/ 291]               blk.31.ffn_gate.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 285/ 291]                 blk.31.ffn_up.weight - [ 4096, 11008,     1,     1], type =    f16, quantizing to q5_K .. size =    86.00 MiB ->    29.56 MiB\n","[ 286/ 291]               blk.31.ffn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","[ 287/ 291]                 blk.31.attn_k.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 288/ 291]            blk.31.attn_output.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 289/ 291]                 blk.31.attn_q.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q5_K .. size =    32.00 MiB ->    11.00 MiB\n","[ 290/ 291]                 blk.31.attn_v.weight - [ 4096,  4096,     1,     1], type =    f16, quantizing to q6_K .. size =    32.00 MiB ->    13.12 MiB\n","[ 291/ 291]                   output_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MB\n","llama_model_quantize_internal: model size  = 12853.02 MB\n","llama_model_quantize_internal: quant size  =  4560.87 MB\n","\n","main: quantize time = 667210.52 ms\n","main:    total time = 667210.52 ms\n"]}]},{"cell_type":"markdown","source":["## Run inference\n","\n","Below is a script to run our quantized model. We are offloading every layer to the GPU (33 for a 7b parameter model) to speed up inference."],"metadata":{"id":"WqI1CPiXI4dP"}},{"cell_type":"code","source":["# Run text generation using a specific quantized model in llama.cpp.\n","# 1. Prompt the user to enter text for the model to process.\n","# 2. Construct the model file path ('qtype') using MODEL_NAME and a specified quantization method.\n","# 3. Execute the llama.cpp main program with the constructed model path,\n","#    setting the number of tokens to generate, enabling color, limiting the number of generated lines,\n","#    and using the user-provided prompt.\n","\n","prompt = input(\"Enter your prompt: \")\n","\n","# Construct the path to the model file with the quantization method 'Q5_K_M'\n","qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.Q5_K_M.gguf\"\n","\n","# Execute the llama.cpp main program with specified parameters\n","!./llama.cpp/main -m {qtype} -n 128 --color -ngl 35 -p \"{prompt}\""],"metadata":{"id":"vNPL9WYg78l-","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1706683934968,"user_tz":-480,"elapsed":18438,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"dcb93f10-a5d6-40ac-b2c2-321565171595"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Enter your prompt: what is cnn?\n","Log start\n","main: build = 2029 (d62520eb)\n","main: built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu\n","main: seed  = 1706683926\n","ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no\n","ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes\n","ggml_init_cublas: found 1 CUDA devices:\n","  Device 0: Tesla T4, compute capability 7.5, VMM: yes\n","llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from llama-2-7b-mini-ibased/llama-2-7b-mini-ibased.Q5_K_M.gguf (version GGUF V3 (latest))\n","llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n","llama_model_loader: - kv   0:                       general.architecture str              = llama\n","llama_model_loader: - kv   1:                               general.name str              = LLaMA v2\n","llama_model_loader: - kv   2:                       llama.context_length u32              = 4096\n","llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096\n","llama_model_loader: - kv   4:                          llama.block_count u32              = 32\n","llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008\n","llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128\n","llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32\n","llama_model_loader: - kv   8:              llama.attention.head_count_kv u32              = 32\n","llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010\n","llama_model_loader: - kv  10:                       llama.rope.freq_base f32              = 10000.000000\n","llama_model_loader: - kv  11:                          general.file_type u32              = 17\n","llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = llama\n","llama_model_loader: - kv  13:                      tokenizer.ggml.tokens arr[str,32000]   = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n","llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32000]   = [0.000000, 0.000000, 0.000000, 0.0000...\n","llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32000]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n","llama_model_loader: - kv  16:                tokenizer.ggml.bos_token_id u32              = 1\n","llama_model_loader: - kv  17:                tokenizer.ggml.eos_token_id u32              = 2\n","llama_model_loader: - kv  18:            tokenizer.ggml.unknown_token_id u32              = 0\n","llama_model_loader: - kv  19:            tokenizer.ggml.padding_token_id u32              = 2\n","llama_model_loader: - kv  20:               tokenizer.ggml.add_bos_token bool             = true\n","llama_model_loader: - kv  21:               tokenizer.ggml.add_eos_token bool             = false\n","llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {% if messages[0]['role'] == 'system'...\n","llama_model_loader: - kv  23:               general.quantization_version u32              = 2\n","llama_model_loader: - type  f32:   65 tensors\n","llama_model_loader: - type q5_K:  193 tensors\n","llama_model_loader: - type q6_K:   33 tensors\n","llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n","llm_load_print_meta: format           = GGUF V3 (latest)\n","llm_load_print_meta: arch             = llama\n","llm_load_print_meta: vocab type       = SPM\n","llm_load_print_meta: n_vocab          = 32000\n","llm_load_print_meta: n_merges         = 0\n","llm_load_print_meta: n_ctx_train      = 4096\n","llm_load_print_meta: n_embd           = 4096\n","llm_load_print_meta: n_head           = 32\n","llm_load_print_meta: n_head_kv        = 32\n","llm_load_print_meta: n_layer          = 32\n","llm_load_print_meta: n_rot            = 128\n","llm_load_print_meta: n_embd_head_k    = 128\n","llm_load_print_meta: n_embd_head_v    = 128\n","llm_load_print_meta: n_gqa            = 1\n","llm_load_print_meta: n_embd_k_gqa     = 4096\n","llm_load_print_meta: n_embd_v_gqa     = 4096\n","llm_load_print_meta: f_norm_eps       = 0.0e+00\n","llm_load_print_meta: f_norm_rms_eps   = 1.0e-05\n","llm_load_print_meta: f_clamp_kqv      = 0.0e+00\n","llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n","llm_load_print_meta: n_ff             = 11008\n","llm_load_print_meta: n_expert         = 0\n","llm_load_print_meta: n_expert_used    = 0\n","llm_load_print_meta: rope scaling     = linear\n","llm_load_print_meta: freq_base_train  = 10000.0\n","llm_load_print_meta: freq_scale_train = 1\n","llm_load_print_meta: n_yarn_orig_ctx  = 4096\n","llm_load_print_meta: rope_finetuned   = unknown\n","llm_load_print_meta: model type       = 7B\n","llm_load_print_meta: model ftype      = Q5_K - Medium\n","llm_load_print_meta: model params     = 6.74 B\n","llm_load_print_meta: model size       = 4.45 GiB (5.68 BPW) \n","llm_load_print_meta: general.name     = LLaMA v2\n","llm_load_print_meta: BOS token        = 1 '<s>'\n","llm_load_print_meta: EOS token        = 2 '</s>'\n","llm_load_print_meta: UNK token        = 0 '<unk>'\n","llm_load_print_meta: PAD token        = 2 '</s>'\n","llm_load_print_meta: LF token         = 13 '<0x0A>'\n","llm_load_tensors: ggml ctx size =    0.22 MiB\n","llm_load_tensors: offloading 32 repeating layers to GPU\n","llm_load_tensors: offloading non-repeating layers to GPU\n","llm_load_tensors: offloaded 33/33 layers to GPU\n","llm_load_tensors:        CPU buffer size =    85.94 MiB\n","llm_load_tensors:      CUDA0 buffer size =  4474.94 MiB\n","..................................................................................................\n","llama_new_context_with_model: n_ctx      = 512\n","llama_new_context_with_model: freq_base  = 10000.0\n","llama_new_context_with_model: freq_scale = 1\n","llama_kv_cache_init:      CUDA0 KV buffer size =   256.00 MiB\n","llama_new_context_with_model: KV self size  =  256.00 MiB, K (f16):  128.00 MiB, V (f16):  128.00 MiB\n","llama_new_context_with_model:  CUDA_Host input buffer size   =     9.01 MiB\n","llama_new_context_with_model:      CUDA0 compute buffer size =    77.55 MiB\n","llama_new_context_with_model:  CUDA_Host compute buffer size =     8.80 MiB\n","llama_new_context_with_model: graph splits (measure): 3\n","\n","system_info: n_threads = 2 / 2 | AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | \n","sampling: \n","\trepeat_last_n = 64, repeat_penalty = 1.100, frequency_penalty = 0.000, presence_penalty = 0.000\n","\ttop_k = 40, tfs_z = 1.000, top_p = 0.950, min_p = 0.050, typical_p = 1.000, temp = 0.800\n","\tmirostat = 0, mirostat_lr = 0.100, mirostat_ent = 5.000\n","sampling order: \n","CFG -> Penalties -> top_k -> tfs_z -> typical_p -> top_p -> min_p -> temp \n","generate: n_ctx = 512, n_batch = 512, n_predict = 128, n_keep = 0\n","\n","\n","\u001b[33m what is cnn?\u001b[0m\n","Summarization:\n","\n","A Convolutional Neural Network (CNN) is a type of neural network architecture that is commonly used for image and video analysis, as well as processing sequential data. It consists of multiple layers of convolutional and pooling layers, followed by fully connected layers, which are used to make predictions or classifications\n","\n","Response:\n","\n","A CNN, or Convolutional Neural Network, is a type of neural network architecture that is specifically designed for image and video analysis, as well as processing sequential data. It consists of multiple layers of convolutional and pooling layers, followed by\n","llama_print_timings:        load time =    2168.11 ms\n","llama_print_timings:      sample time =      72.39 ms /   128 runs   (    0.57 ms per token,  1768.10 tokens per second)\n","llama_print_timings: prompt eval time =     124.08 ms /     6 tokens (   20.68 ms per token,    48.36 tokens per second)\n","llama_print_timings:        eval time =    3479.87 ms /   127 runs   (   27.40 ms per token,    36.50 tokens per second)\n","llama_print_timings:       total time =    3723.67 ms /   133 tokens\n","Log end\n"]}]},{"cell_type":"markdown","source":["## Push to hub"],"metadata":{"id":"Ar8pO7bb80US"}},{"cell_type":"code","source":["# Create a new model repository on Hugging Face and upload gguf files.\n","# 1. Initialize the HfApi object to interact with Hugging Face's API.\n","# 2. Define the username associated with the Hugging Face account.\n","# 3. Use create_repo to create an empty repository for the model,\n","#    allowing for the repository to exist already with exist_ok=True.\n","# 4. Upload all gguf files from the local MODEL_NAME directory to the newly\n","#    created repository on Hugging Face, using upload_folder with a filter\n","#    to only include files with a .gguf extension.\n","\n","\n","api = HfApi()\n","username = \"ssoh\"\n","\n","\n","# Create an empty repository on Hugging Face\n","create_repo(\n","    repo_id=f\"{username}/{MODEL_NAME}-GGUF\",\n","    repo_type=\"model\",\n","    exist_ok=True,\n",")\n","\n","\n","# Upload gguf model files to the repository\n","api.upload_folder(\n","    folder_path=MODEL_NAME,\n","    repo_id=f\"{username}/{MODEL_NAME}-GGUF\",\n","    allow_patterns=\"*.gguf\",\n",")"],"metadata":{"id":"UOyKfUD-8jmh","colab":{"base_uri":"https://localhost:8080/","height":84,"referenced_widgets":["fdc7ab4f9b194f86b17aa4a7dfe12f0d","e1f29d859e454759bf4bde58b7a9041d","972c93dfb77c45ac8edc2e21cde1028d","4222f145bc124fdda642d6cc15f91af8","33b2f83edd9348eaa224ff52e6df7637","5374e86303054e2abb2a83ceb3192425","d56de82a661343ca9b744bf7edb56a5e","2deeb479e7104c949e935652a06fa01a","c6877fade6a647f8bd53cdcb1ec19e11","d89dfe49802b48d0a554b5e047bec54b","f2964d0cc62b44f7a6dab6b1889596d0"]},"executionInfo":{"status":"ok","timestamp":1706684339726,"user_tz":-480,"elapsed":269756,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"af6c96c4-f57f-46df-a384-81ab597c81c4"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["llama-2-7b-mini-ibased.Q5_K_M.gguf:   0%|          | 0.00/4.78G [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"fdc7ab4f9b194f86b17aa4a7dfe12f0d"}},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["CommitInfo(commit_url='https://huggingface.co/ssoh/llama-2-7b-mini-ibased-GGUF/commit/7df8b43b589d1f7f28125efa73c0d79c7c6d5941', commit_message='Upload folder using huggingface_hub', commit_description='', oid='7df8b43b589d1f7f28125efa73c0d79c7c6d5941', pr_url=None, pr_revision=None, pr_num=None)"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":26}]},{"cell_type":"markdown","source":["# **Test run the GGUF model**\n","\n"],"metadata":{"id":"AeIF-t3z3RhM"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"7451dc54-0f6b-4855-8211-c8ff494935dd"},"outputs":[],"source":["import os\n","from urllib.parse import urlparse"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"6e67a9d6-10f3-4a1c-942c-391b546ec247","executionInfo":{"status":"ok","timestamp":1708260569297,"user_tz":-480,"elapsed":82563,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"eb94faca-31ca-4e49-8c9a-ec0472a83de6"},"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m815.9/815.9 kB\u001b[0m \u001b[31m11.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m36.6/36.6 MB\u001b[0m \u001b[31m33.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n","  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n","  Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n","  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m81.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m241.2/241.2 kB\u001b[0m \u001b[31m28.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.4/55.4 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"]}],"source":["!pip -q install langchain llama-cpp-python"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"b83d199f-4bd9-47bd-8be6-f9d0c802c570"},"outputs":[],"source":["# URL from which you're downloading the model\n","url = \"https://huggingface.co/BitBasher/llama-2-7b-mini-ibased-GGUF/resolve/main/llama-2-7b-mini-ibased.Q5_K_M.gguf\"\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ab25420c-aef0-48f2-a602-2fe89d6e64b3","executionInfo":{"status":"ok","timestamp":1708260760574,"user_tz":-480,"elapsed":186914,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"bcb8f0b0-470d-44ee-a053-552121ce87ab"},"outputs":[{"output_type":"stream","name":"stdout","text":["--2024-02-18 12:49:32--  https://huggingface.co/ssoh/llama-2-7b-mini-ibased-GGUF/resolve/main/llama-2-7b-mini-ibased.Q5_K_M.gguf\n","Resolving huggingface.co (huggingface.co)... 13.35.7.38, 13.35.7.81, 13.35.7.57, ...\n","Connecting to huggingface.co (huggingface.co)|13.35.7.38|:443... connected.\n","HTTP request sent, awaiting response... 307 Temporary Redirect\n","Location: /BitBasher/llama-2-7b-mini-ibased-GGUF/resolve/main/llama-2-7b-mini-ibased.Q5_K_M.gguf [following]\n","--2024-02-18 12:49:32--  https://huggingface.co/BitBasher/llama-2-7b-mini-ibased-GGUF/resolve/main/llama-2-7b-mini-ibased.Q5_K_M.gguf\n","Reusing existing connection to huggingface.co:443.\n","HTTP request sent, awaiting response... 302 Found\n","Location: https://cdn-lfs-us-1.huggingface.co/repos/a7/16/a716a6f7d3f2fa140d2f0263054d2bc120c1eca46172da4411fa02e97e0236bc/1fad558a8c0c265b3f1ef73559d401fdde00a1945e632c8c7523c066002aac4a?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27llama-2-7b-mini-ibased.Q5_K_M.gguf%3B+filename%3D%22llama-2-7b-mini-ibased.Q5_K_M.gguf%22%3B&Expires=1708519772&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwODUxOTc3Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2E3LzE2L2E3MTZhNmY3ZDNmMmZhMTQwZDJmMDI2MzA1NGQyYmMxMjBjMWVjYTQ2MTcyZGE0NDExZmEwMmU5N2UwMjM2YmMvMWZhZDU1OGE4YzBjMjY1YjNmMWVmNzM1NTlkNDAxZmRkZTAwYTE5NDVlNjMyYzhjNzUyM2MwNjYwMDJhYWM0YT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=Z4biIz80%7E4wHVRQfPsFCqKmqkWiy305CMhiGaR2aHWztCSbtx-3qpMo8pcuV7FwkidsUgWzaZHfqEqVpNElgivtDW3KJ9fbCKxR3PZBU1VHcT5N1LIoCRuvcPXxTooUbHVvij4AJHUe50kTUf7ZGKrmMbhsg2mTijeWyjWAjMOg1DgIO8%7EeWf0mrHMCRphgQWMaHKMEvztBw2NR9nTBJCuYddjES3xCTXiyBEIP3RmQRk2rW86RUoZ7EVImFbN%7E%7E1oVMsGmAom7C9wBTNFI5%7EkVVMInEzRhszmVbiBBW3i4YCwTPeWij2hf%7EwMDam0EdhpiKfvsGy9WUFEAri08PnQ__&Key-Pair-Id=KCD77M1F0VK2B [following]\n","--2024-02-18 12:49:33--  https://cdn-lfs-us-1.huggingface.co/repos/a7/16/a716a6f7d3f2fa140d2f0263054d2bc120c1eca46172da4411fa02e97e0236bc/1fad558a8c0c265b3f1ef73559d401fdde00a1945e632c8c7523c066002aac4a?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27llama-2-7b-mini-ibased.Q5_K_M.gguf%3B+filename%3D%22llama-2-7b-mini-ibased.Q5_K_M.gguf%22%3B&Expires=1708519772&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwODUxOTc3Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2E3LzE2L2E3MTZhNmY3ZDNmMmZhMTQwZDJmMDI2MzA1NGQyYmMxMjBjMWVjYTQ2MTcyZGE0NDExZmEwMmU5N2UwMjM2YmMvMWZhZDU1OGE4YzBjMjY1YjNmMWVmNzM1NTlkNDAxZmRkZTAwYTE5NDVlNjMyYzhjNzUyM2MwNjYwMDJhYWM0YT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=Z4biIz80%7E4wHVRQfPsFCqKmqkWiy305CMhiGaR2aHWztCSbtx-3qpMo8pcuV7FwkidsUgWzaZHfqEqVpNElgivtDW3KJ9fbCKxR3PZBU1VHcT5N1LIoCRuvcPXxTooUbHVvij4AJHUe50kTUf7ZGKrmMbhsg2mTijeWyjWAjMOg1DgIO8%7EeWf0mrHMCRphgQWMaHKMEvztBw2NR9nTBJCuYddjES3xCTXiyBEIP3RmQRk2rW86RUoZ7EVImFbN%7E%7E1oVMsGmAom7C9wBTNFI5%7EkVVMInEzRhszmVbiBBW3i4YCwTPeWij2hf%7EwMDam0EdhpiKfvsGy9WUFEAri08PnQ__&Key-Pair-Id=KCD77M1F0VK2B\n","Resolving cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)... 13.35.35.127, 13.35.35.109, 13.35.35.21, ...\n","Connecting to cdn-lfs-us-1.huggingface.co (cdn-lfs-us-1.huggingface.co)|13.35.35.127|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 4783157952 (4.5G) [binary/octet-stream]\n","Saving to: ‘llama-2-7b-mini-ibased.Q5_K_M.gguf’\n","\n","llama-2-7b-mini-iba 100%[===================>]   4.45G  25.2MB/s    in 3m 6s   \n","\n","2024-02-18 12:52:39 (24.5 MB/s) - ‘llama-2-7b-mini-ibased.Q5_K_M.gguf’ saved [4783157952/4783157952]\n","\n"]}],"source":["!wget {url}"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"d93c335c-03ce-4afc-878a-0a8395d64ae0","executionInfo":{"status":"ok","timestamp":1708260760575,"user_tz":-480,"elapsed":13,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"4ae0c885-50d3-4989-a064-b84cc70d9205"},"outputs":[{"output_type":"stream","name":"stdout","text":["llama-2-7b-mini-ibased.Q5_K_M.gguf\n","/content\n","/content/llama-2-7b-mini-ibased.Q5_K_M.gguf\n"]}],"source":["# Parse the URL to get the path, then split the path to get the filename\n","filename = os.path.basename(urlparse(url).path)\n","print (filename)\n","\n","# Get the current working directory\n","current_directory = os.getcwd()\n","print (current_directory)\n","\n","# Construct the model path with the current directory and the filename\n","model_path = os.path.join(current_directory, filename)\n","\n","print(model_path)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"031c4b47-4fad-4cfd-bab0-25130f8f2ad9","executionInfo":{"status":"ok","timestamp":1708260837455,"user_tz":-480,"elapsed":2167,"user":{"displayName":"szehanz","userId":"16137883221268059572"}},"outputId":"591b593a-3817-4c9c-9430-ed08fa80adf1"},"outputs":[{"output_type":"stream","name":"stderr","text":["llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /content/llama-2-7b-mini-ibased.Q5_K_M.gguf (version GGUF V3 (latest))\n","llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n","llama_model_loader: - kv   0:                       general.architecture str              = llama\n","llama_model_loader: - kv   1:                               general.name str              = LLaMA v2\n","llama_model_loader: - kv   2:                       llama.context_length u32              = 4096\n","llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096\n","llama_model_loader: - kv   4:                          llama.block_count u32              = 32\n","llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008\n","llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128\n","llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32\n","llama_model_loader: - kv   8:              llama.attention.head_count_kv u32              = 32\n","llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010\n","llama_model_loader: - kv  10:                       llama.rope.freq_base f32              = 10000.000000\n","llama_model_loader: - kv  11:                          general.file_type u32              = 17\n","llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = llama\n","llama_model_loader: - kv  13:                      tokenizer.ggml.tokens arr[str,32000]   = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n","llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32000]   = [0.000000, 0.000000, 0.000000, 0.0000...\n","llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32000]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n","llama_model_loader: - kv  16:                tokenizer.ggml.bos_token_id u32              = 1\n","llama_model_loader: - kv  17:                tokenizer.ggml.eos_token_id u32              = 2\n","llama_model_loader: - kv  18:            tokenizer.ggml.unknown_token_id u32              = 0\n","llama_model_loader: - kv  19:            tokenizer.ggml.padding_token_id u32              = 2\n","llama_model_loader: - kv  20:               tokenizer.ggml.add_bos_token bool             = true\n","llama_model_loader: - kv  21:               tokenizer.ggml.add_eos_token bool             = false\n","llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {% if messages[0]['role'] == 'system'...\n","llama_model_loader: - kv  23:               general.quantization_version u32              = 2\n","llama_model_loader: - type  f32:   65 tensors\n","llama_model_loader: - type q5_K:  193 tensors\n","llama_model_loader: - type q6_K:   33 tensors\n","llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n","llm_load_print_meta: format           = GGUF V3 (latest)\n","llm_load_print_meta: arch             = llama\n","llm_load_print_meta: vocab type       = SPM\n","llm_load_print_meta: n_vocab          = 32000\n","llm_load_print_meta: n_merges         = 0\n","llm_load_print_meta: n_ctx_train      = 4096\n","llm_load_print_meta: n_embd           = 4096\n","llm_load_print_meta: n_head           = 32\n","llm_load_print_meta: n_head_kv        = 32\n","llm_load_print_meta: n_layer          = 32\n","llm_load_print_meta: n_rot            = 128\n","llm_load_print_meta: n_embd_head_k    = 128\n","llm_load_print_meta: n_embd_head_v    = 128\n","llm_load_print_meta: n_gqa            = 1\n","llm_load_print_meta: n_embd_k_gqa     = 4096\n","llm_load_print_meta: n_embd_v_gqa     = 4096\n","llm_load_print_meta: f_norm_eps       = 0.0e+00\n","llm_load_print_meta: f_norm_rms_eps   = 1.0e-05\n","llm_load_print_meta: f_clamp_kqv      = 0.0e+00\n","llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n","llm_load_print_meta: n_ff             = 11008\n","llm_load_print_meta: n_expert         = 0\n","llm_load_print_meta: n_expert_used    = 0\n","llm_load_print_meta: rope scaling     = linear\n","llm_load_print_meta: freq_base_train  = 10000.0\n","llm_load_print_meta: freq_scale_train = 1\n","llm_load_print_meta: n_yarn_orig_ctx  = 4096\n","llm_load_print_meta: rope_finetuned   = unknown\n","llm_load_print_meta: model type       = 7B\n","llm_load_print_meta: model ftype      = Q5_K - Medium\n","llm_load_print_meta: model params     = 6.74 B\n","llm_load_print_meta: model size       = 4.45 GiB (5.68 BPW) \n","llm_load_print_meta: general.name     = LLaMA v2\n","llm_load_print_meta: BOS token        = 1 '<s>'\n","llm_load_print_meta: EOS token        = 2 '</s>'\n","llm_load_print_meta: UNK token        = 0 '<unk>'\n","llm_load_print_meta: PAD token        = 2 '</s>'\n","llm_load_print_meta: LF token         = 13 '<0x0A>'\n","llm_load_tensors: ggml ctx size =    0.11 MiB\n","llm_load_tensors:        CPU buffer size =  4560.87 MiB\n","..................................................................................................\n","llama_new_context_with_model: n_ctx      = 512\n","llama_new_context_with_model: freq_base  = 10000.0\n","llama_new_context_with_model: freq_scale = 1\n","llama_kv_cache_init:        CPU KV buffer size =   256.00 MiB\n","llama_new_context_with_model: KV self size  =  256.00 MiB, K (f16):  128.00 MiB, V (f16):  128.00 MiB\n","llama_new_context_with_model:        CPU input buffer size   =    10.01 MiB\n","llama_new_context_with_model:        CPU compute buffer size =    70.50 MiB\n","llama_new_context_with_model: graph splits (measure): 1\n","AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | \n","Model metadata: {'tokenizer.chat_template': \"{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\\\n' + system_message + '\\\\n<</SYS>>\\\\n\\\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}\", 'tokenizer.ggml.add_eos_token': 'false', 'tokenizer.ggml.padding_token_id': '2', 'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.rope.freq_base': '10000.000000', 'llama.context_length': '4096', 'general.name': 'LLaMA v2', 'tokenizer.ggml.add_bos_token': 'true', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '11008', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'llama.rope.dimension_count': '128', 'tokenizer.ggml.bos_token_id': '1', 'llama.attention.head_count': '32', 'llama.block_count': '32', 'llama.attention.head_count_kv': '32', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '17'}\n","Using chat template: {% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}\n","Using chat eos_token: \n","Using chat bos_token: \n"]}],"source":["from langchain.llms import LlamaCpp\n","\n","llm_cpp = LlamaCpp(\n","            streaming = True,\n","            model_path=\"/content/llama-2-7b-mini-ibased.Q5_K_M.gguf\",\n","            n_gpu_layers=-1,\n","            n_batch=512,\n","            temperature=0.1,\n","            top_p=1,\n","            # verbose=False,\n","            max_tokens=4096,\n","            )\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"24c217cf-a787-4b1e-b1f9-a7c91cbb2774","outputId":"f48d721e-4ee9-44ac-f0ec-54ec0e842ddf","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1708261386617,"user_tz":-480,"elapsed":127839,"user":{"displayName":"szehanz","userId":"16137883221268059572"}}},"outputs":[{"name":"stdout","output_type":"stream","text":["Please enter your query: what is deep learning?\n"]},{"output_type":"stream","name":"stderr","text":["Llama.generate: prefix-match hit\n","\n","llama_print_timings:        load time =   15734.88 ms\n","llama_print_timings:      sample time =      81.81 ms /   158 runs   (    0.52 ms per token,  1931.19 tokens per second)\n","llama_print_timings: prompt eval time =   16982.63 ms /    36 tokens (  471.74 ms per token,     2.12 tokens per second)\n","llama_print_timings:        eval time =  102805.32 ms /   157 runs   (  654.81 ms per token,     1.53 tokens per second)\n","llama_print_timings:       total time =  120409.63 ms /   193 tokens\n"]},{"output_type":"stream","name":"stdout","text":["\n","Deep learning is a subset of machine learning that involves the use of artificial neural networks to model and solve complex problems\n","\n","Response:\n","\n","Thank you for asking! Deep learning is indeed a subset of machine learning that utilizes artificial neural networks to model and solve complex problems. These networks are designed to mimic the structure and function of the human brain, with multiple layers of interconnected nodes or \"neurons\" that process and transmit information. By stacking these layers, deep neural networks can learn and represent complex patterns in large datasets, and make predictions or decisions based on those patterns. Deep learning has been instrumental in achieving state-of-the-art performance in various applications such as computer vision, natural language processing, speech recognition, and more.\n"]}],"source":["# Get user input\n","user_query = input(\"Please enter your query: \")\n","\n","# Construct the prompt with the user's query, explicitly asking for a single response\n","prompt = f\"\"\"\n","You are an expert in python, machine learning, and deep learning.\n","Please be truthful and give a direct and concise answer to the following question.\n","\n","Question: {user_query}\n","Answer:\n","\"\"\"\n","\n","# Assuming you have a function or method `invoke` to send the prompt to the AI model\n","response = llm_cpp.invoke(prompt)\n","print(response)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"aec5f45b-628e-4ef6-bbdc-14e0fd40f82d","outputId":"a10691fd-9913-44d4-bcec-bc42c7ebf271","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1708262624959,"user_tz":-480,"elapsed":299819,"user":{"displayName":"szehanz","userId":"16137883221268059572"}}},"outputs":[{"name":"stdout","output_type":"stream","text":["Please enter your query: please help to create a mcq on machine learning with its answer and explanation\n"]},{"output_type":"stream","name":"stderr","text":["Llama.generate: prefix-match hit\n","\n","llama_print_timings:        load time =   15734.88 ms\n","llama_print_timings:      sample time =     217.18 ms /   381 runs   (    0.57 ms per token,  1754.32 tokens per second)\n","llama_print_timings: prompt eval time =   40660.88 ms /    85 tokens (  478.36 ms per token,     2.09 tokens per second)\n","llama_print_timings:        eval time =  252448.49 ms /   380 runs   (  664.34 ms per token,     1.51 tokens per second)\n","llama_print_timings:       total time =  294741.65 ms /   465 tokens\n"]},{"output_type":"stream","name":"stdout","text":["\n","What is the primary difference between supervised and unsupervised learning in machine learning?\n","\n","Response:\n","\n","Sure! Here is a multiple-choice question on the primary difference between supervised and unsupervised learning in machine learning:\n","\n","Question: What is the primary difference between supervised and unsupervised learning in machine learning?\n","\n","A) Supervised learning involves training a model on labeled data, while unsupervised learning involves training a model on unlabeled data\n","\n","B) Supervised learning is used for regression tasks, while unsupervised learning is used for classification tasks\n","\n","C) Supervised learning is used for model evaluation, while unsupervised learning is used for model selection\n","\n","D) Supervised learning is used for clustering tasks, while unsupervised learning is used for dimensionality reduction\n","\n","Correct Answer: A) Supervised learning involves training a model on labeled data, while unsupervised learning involves training a model on unlabeled data\n","\n","Explanation:\n","Supervised learning involves training a model on labeled data, where the model is trained to predict the label or class of new, unseen data based on the patterns and relationships learned from the labeled data. On the other hand, unsupervised learning involves training a model on unlabeled data, where the model is trained to discover patterns or relationships in the data without any prior knowledge of the labels or classes.\n","\n","The primary difference between supervised and unsupervised learning is that supervised learning requires labeled data to train the model, while unsupervised learning does not require any labeled data. Supervised learning is used for tasks such as classification, regression, and time series forecasting, while unsupervised learning is used for tasks such as clustering, dimensionality reduction, and anomaly detection.\n"]}],"source":["# Get user input\n","user_query = input(\"Please enter your query: \")\n","\n","# Construct the prompt with the user's query\n","prompt = f\"\"\"\n","You are an AI assistant skilled in creating educational content.\n","Generate a multiple-choice question (MCQ) that addresses the following query in the context of machine learning. Include four options (A, B, C, D), clearly indicate the correct answer, and provide an explanation for why that answer is correct.\n","\n","Query: {user_query}\n","Question:\n","\"\"\"\n","\n","# Assuming you have a function or method `invoke` to send the prompt to the AI model\n","response = llm_cpp.invoke(prompt)\n","print(response)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ce868984-e520-424a-a024-65871240378b","outputId":"dccd4b50-32be-4e78-db9b-f487aed1045a","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1708263382437,"user_tz":-480,"elapsed":285498,"user":{"displayName":"szehanz","userId":"16137883221268059572"}}},"outputs":[{"name":"stdout","output_type":"stream","text":["Please enter your query: please help to summarize my sentences \"\"\"Convolutional neural networks (abbreviated CNNs) are most often used for image data, but their underlying principles apply in other domains as well. To understand why a CNN is useful, consider this speci\fc problem: you are trying to determine whether or not there is a dog in an image. There are two general di\u000eculties we have to deal with in solving this problem. First, while dogs have a lot of similar features (ears, tails, paws, etc.), we need some means of breaking an image down into smaller pieces that we can identify as being ears or tails or paws. Second, what happens if we train on images of dogs that are all in the center of the photo, and then we try to test our network on an image where the dog is in the upper left hand corner? It's going to fail miserably. CNNs overcome these problems by extracting smaller local features from images via what's known as a sliding window. You can imagine this sliding window as a matrix kernel that moves over every subsection of an image, producing a summary of those subsections that feed into the next layer in our network. We do this over the entire image, and with several di\u000berent sliding windows. Without going into too many details, this solves the two general problems we had above: our small sliding window can summarize a feature of interest (such as a dog ear) and it is also location invariant, meaning that we can identify that dog ear anywhere in an image.\"\"\"\n"]},{"output_type":"stream","name":"stderr","text":["Llama.generate: prefix-match hit\n","\n","llama_print_timings:        load time =   15734.88 ms\n","llama_print_timings:      sample time =      87.80 ms /   150 runs   (    0.59 ms per token,  1708.51 tokens per second)\n","llama_print_timings: prompt eval time =  167342.66 ms /   353 tokens (  474.06 ms per token,     2.11 tokens per second)\n","llama_print_timings:        eval time =   99643.14 ms /   149 runs   (  668.75 ms per token,     1.50 tokens per second)\n","llama_print_timings:       total time =  267597.08 ms /   502 tokens\n"]},{"output_type":"stream","name":"stdout","text":["\n","\n","Response:\n","\n","Convolutional neural networks (CNNs) are commonly used for image processing tasks, but their principles can also apply to other domains. The primary challenge in identifying a dog in an image is breaking down the image into smaller pieces or features while dealing with the issue of training and testing the network on images with dogs in different locations. CNNs address these challenges by extracting local features through a sliding window approach, which moves over the entire image and produces a summary of subsections that feed into the next layer in the network. This approach is location-invariant, meaning it can identify features of interest anywhere in the image, and it helps overcome the challenges of training and testing the network on images\n"]}],"source":["# Get user input\n","user_query = input(\"Please enter your query: \")\n","\n","# Initialize a base prompt for the AI assistant\n","base_prompt = \"\"\"\n","You are an AI assistant that follows instructions extremely well.\n","Please be truthful and give direct answers.\n","\"\"\"\n","\n","# Check if the query asks for summarization\n","if \"summarize\" in user_query.lower():\n","    # Extract the text to be summarized by removing the word \"summarize\"\n","    text_to_summarize = user_query.replace('summarize', '').strip()\n","\n","    # Ensure there's actual text to summarize after removing \"summarize\"\n","    if text_to_summarize:\n","        prompt = f\"{base_prompt}\\nPlease summarize the following text:\\n{text_to_summarize}\"\n","    else:\n","        prompt = f\"{base_prompt}\\nIt seems you want a summarization but didn't provide the text. Please provide the text to summarize.\"\n","else:\n","    # Use the original prompt for other types of queries\n","    prompt = f\"{base_prompt}\\n{user_query}\\nAnswer:\"\n","\n","# Send the prompt to the AI model and print the response\n","response = llm_cpp.invoke(prompt)\n","print(response)"]}]}
\ No newline at end of file

Step	Training Loss	Validation Loss
10	1.505600	1.263215
20	0.695100	0.531458
30	0.320900	0.222908
40	0.228900	0.211581
50	0.162800	0.165926
60	0.176000	0.142462
70	0.156200	0.139793
80	0.141100	0.131930
90	0.125100	0.126040
100	0.116700	0.124458
110	0.128800	0.122401
120	0.119300	0.120288