Spaces:

MJ3128
/

CS-GY-6613-Project

Runtime error

App Files Files

MJ commited on Apr 27, 2023

Commit

83e19cd

•

1 Parent(s): 54d8a35

Added milestone-3 files

Browse files

Files changed (2) hide show

Milestone_3.ipynb +1251 -0
app.py +90 -22

Milestone_3.ipynb ADDED Viewed

	@@ -0,0 +1,1251 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU",
+    "gpuClass": "standard",
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "44bae0dd4d024583a4942516604af83a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_dc7945ddd9844c9286f5d7aeb0a87e2c",
+              "IPY_MODEL_b215c40be67f4e3c9556343a5e8b6a8f",
+              "IPY_MODEL_ae9d91d81a414ea5a48d4a0374ee7cc5"
+            ],
+            "layout": "IPY_MODEL_5111fe0aaaa54f329374a1c3dedc6981"
+          }
+        },
+        "dc7945ddd9844c9286f5d7aeb0a87e2c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_17b0e3d10405412dab8a0c5e99d78c5f",
+            "placeholder": "",
+            "style": "IPY_MODEL_d88273c0d390433cad8e2ebb810fdb6d",
+            "value": "100%"
+          }
+        },
+        "b215c40be67f4e3c9556343a5e8b6a8f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_f1375d9369b9429fa5ee83101f225bac",
+            "max": 2,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_31cea131c8714b4883daceee0b3a4414",
+            "value": 2
+          }
+        },
+        "ae9d91d81a414ea5a48d4a0374ee7cc5": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_32fd51563a5149a8b20e463a85f5ca0a",
+            "placeholder": "",
+            "style": "IPY_MODEL_516719f733c6440fb9bbd15ca3dc037a",
+            "value": " 2/2 [00:00&lt;00:00, 67.36it/s]"
+          }
+        },
+        "5111fe0aaaa54f329374a1c3dedc6981": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "17b0e3d10405412dab8a0c5e99d78c5f": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "d88273c0d390433cad8e2ebb810fdb6d": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "f1375d9369b9429fa5ee83101f225bac": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "31cea131c8714b4883daceee0b3a4414": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "32fd51563a5149a8b20e463a85f5ca0a": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "516719f733c6440fb9bbd15ca3dc037a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "7876733281784505a7cce1549c4d4002": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_54ad98acf60a429b8689f2f39b83d679",
+              "IPY_MODEL_8c0b927c2cc945f9bc796d574dbe734b",
+              "IPY_MODEL_b28bb08b3f2a4529815182714df85d24"
+            ],
+            "layout": "IPY_MODEL_83005ceb9c614856994e3a3973b5b211"
+          }
+        },
+        "54ad98acf60a429b8689f2f39b83d679": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_bbd9ff305af5489fbea28e37695fd86d",
+            "placeholder": "",
+            "style": "IPY_MODEL_a22736b7c4bd48c081b2d7696708a6e8",
+            "value": "Map: 100%"
+          }
+        },
+        "8c0b927c2cc945f9bc796d574dbe734b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_282755013feb4326827a731c1cbf2da1",
+            "max": 9094,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_c66db78a54004b719e7c576406ac261b",
+            "value": 9094
+          }
+        },
+        "b28bb08b3f2a4529815182714df85d24": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_97424e4bbbdb4536ae10f65c5352ee27",
+            "placeholder": "",
+            "style": "IPY_MODEL_0ee137e1367546bd8738a52935ee9b95",
+            "value": " 9094/9094 [00:44&lt;00:00, 262.05 examples/s]"
+          }
+        },
+        "83005ceb9c614856994e3a3973b5b211": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": "hidden",
+            "width": null
+          }
+        },
+        "bbd9ff305af5489fbea28e37695fd86d": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "a22736b7c4bd48c081b2d7696708a6e8": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "282755013feb4326827a731c1cbf2da1": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "c66db78a54004b719e7c576406ac261b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "97424e4bbbdb4536ae10f65c5352ee27": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "0ee137e1367546bd8738a52935ee9b95": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "! pip install datasets -q\n",
+        "! pip install transformers -q \n",
+        "! pip install evaluate -q\n",
+        "! pip install accelerate -q"
+      ],
+      "metadata": {
+        "id": "4a1gg5YCc3NA"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')\n",
+        "%cd \"/content/drive/MyDrive/Colab Notebooks/Project\""
+      ],
+      "metadata": {
+        "id": "WzMdj31Ktf_I",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "4214aef0-e79c-4075-f745-134c4978b291"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n",
+            "/content/drive/MyDrive/Colab Notebooks/Project\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import transformers\n",
+        "from datasets import load_dataset, ClassLabel\n",
+        "from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments\n",
+        "import numpy as np\n",
+        "import evaluate"
+      ],
+      "metadata": {
+        "id": "rhBp0k13AqRC"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Loading the Dataset"
+      ],
+      "metadata": {
+        "id": "8X7Or5qactFF"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "dataset_dict = load_dataset('HUPD/hupd',\n",
+        "    name='sample',\n",
+        "    data_files=\"https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather\", \n",
+        "    icpr_label=None,\n",
+        "    train_filing_start_date='2016-01-01',\n",
+        "    train_filing_end_date='2016-01-21',\n",
+        "    val_filing_start_date='2016-01-22',\n",
+        "    val_filing_end_date='2016-01-31',\n",
+        ")"
+      ],
+      "metadata": {
+        "id": "vm7-_ncug7I6",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 86,
+          "referenced_widgets": [
+            "44bae0dd4d024583a4942516604af83a",
+            "dc7945ddd9844c9286f5d7aeb0a87e2c",
+            "b215c40be67f4e3c9556343a5e8b6a8f",
+            "ae9d91d81a414ea5a48d4a0374ee7cc5",
+            "5111fe0aaaa54f329374a1c3dedc6981",
+            "17b0e3d10405412dab8a0c5e99d78c5f",
+            "d88273c0d390433cad8e2ebb810fdb6d",
+            "f1375d9369b9429fa5ee83101f225bac",
+            "31cea131c8714b4883daceee0b3a4414",
+            "32fd51563a5149a8b20e463a85f5ca0a",
+            "516719f733c6440fb9bbd15ca3dc037a"
+          ]
+        },
+        "outputId": "4a7ca506-e35f-4b1e-d33c-550edb540dc1"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "WARNING:datasets.builder:Found cached dataset hupd (/root/.cache/huggingface/datasets/HUPD___hupd/sample-85e70a41d39c65dd/0.0.0/6920d2def8fd7767046c0470603357f76866e5a09c97e19571896bfdca521142)\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "  0%|          | 0/2 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "44bae0dd4d024583a4942516604af83a"
+            }
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "raw_training_data = dataset_dict[\"train\"]\n",
+        "validation_data = dataset_dict[\"validation\"]"
+      ],
+      "metadata": {
+        "id": "FNPuthOVhJFg"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Filtering Dataset to only include the relevant variables"
+      ],
+      "metadata": {
+        "id": "Wo-cOQEYmfbF"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "features_to_remove = ['patent_number', 'title', 'background', 'summary', 'description', 'cpc_label', \n",
+        "                      'ipc_label', 'filing_date', 'patent_issue_date', 'date_published', 'examiner_id']\n",
+        "# Removing irrelevant columns\n",
+        "raw_training_data = raw_training_data.remove_columns(features_to_remove)\n",
+        "validation_data = validation_data.remove_columns(features_to_remove)\n",
+        "\n",
+        "# Renaming Column names to match expected input\n",
+        "raw_training_data = raw_training_data.rename_column('decision', 'labels')\n",
+        "validation_data = validation_data.rename_column('decision', 'labels')"
+      ],
+      "metadata": {
+        "id": "8p0aweR7jwHF"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Converting Dataset labels to encoded values"
+      ],
+      "metadata": {
+        "id": "pKay62q50mAQ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "features = raw_training_data.features.copy()\n",
+        "features[\"labels\"] = ClassLabel(names = [\"REJECTED\", \"PENDING\", \"ACCEPTED\"])\n",
+        "raw_training_data = raw_training_data.cast(features)\n",
+        "\n",
+        "features = validation_data.features.copy()\n",
+        "features[\"labels\"] = ClassLabel(names = [\"REJECTED\", \"PENDING\", \"ACCEPTED\"])\n",
+        "validation_data = validation_data.cast(features)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ece-OlYxyJ7e",
+        "outputId": "8f42ef75-9bef-41fa-cf4c-1f9a1f9c88f9"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "WARNING:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/HUPD___hupd/sample-85e70a41d39c65dd/0.0.0/6920d2def8fd7767046c0470603357f76866e5a09c97e19571896bfdca521142/cache-e851499ec526ea46.arrow\n",
+            "WARNING:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/HUPD___hupd/sample-85e70a41d39c65dd/0.0.0/6920d2def8fd7767046c0470603357f76866e5a09c97e19571896bfdca521142/cache-1c918e033c2ee87e.arrow\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Getting a Pre-Trained Model"
+      ],
+      "metadata": {
+        "id": "OQnpksYyh8KZ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model_name = 'distilbert-base-cased'\n",
+        "\n",
+        "label2id = {\n",
+        "    \"REJECTED\" : 0,\n",
+        "    \"PENDING\" : 1,\n",
+        "    \"ACCEPTED\": 2\n",
+        "}\n",
+        "\n",
+        "id2label = {\n",
+        "    0 : \"REJECTED\",\n",
+        "    1 : \"PENDING\",\n",
+        "    2 : \"ACCEPTED\"\n",
+        "}\n",
+        "\n",
+        "model = AutoModelForSequenceClassification.from_pretrained(\n",
+        "    model_name, \n",
+        "    num_labels = 3,\n",
+        "    id2label=id2label,\n",
+        "    label2id=label2id\n",
+        ")\n",
+        "\n",
+        "tokenizer = AutoTokenizer.from_pretrained(model_name)"
+      ],
+      "metadata": {
+        "id": "2a6MmqVai9EL",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "d7020e61-fca6-49a6-bd7d-ceaae253bfb8"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Some weights of the model checkpoint at distilbert-base-cased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_projector.bias']\n",
+            "- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+            "- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+            "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-cased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight', 'classifier.weight']\n",
+            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def tokenize_function(data):\n",
+        "  tokenized_data = tokenizer(data[\"abstract\"], padding = \"max_length\", truncation = True)\n",
+        "  tokenized_data = tokenizer(data[\"claims\"], padding = \"max_length\", truncation = True)\n",
+        "  return tokenized_data"
+      ],
+      "metadata": {
+        "id": "WMkiB9nF8Q6Z"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "tokenized_training_data = raw_training_data.map(tokenize_function, batched = True)\n",
+        "tokenized_validation_data = validation_data.map(tokenize_function, batched = True)"
+      ],
+      "metadata": {
+        "id": "OzMvG9xd9Fct",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 54,
+          "referenced_widgets": [
+            "7876733281784505a7cce1549c4d4002",
+            "54ad98acf60a429b8689f2f39b83d679",
+            "8c0b927c2cc945f9bc796d574dbe734b",
+            "b28bb08b3f2a4529815182714df85d24",
+            "83005ceb9c614856994e3a3973b5b211",
+            "bbd9ff305af5489fbea28e37695fd86d",
+            "a22736b7c4bd48c081b2d7696708a6e8",
+            "282755013feb4326827a731c1cbf2da1",
+            "c66db78a54004b719e7c576406ac261b",
+            "97424e4bbbdb4536ae10f65c5352ee27",
+            "0ee137e1367546bd8738a52935ee9b95"
+          ]
+        },
+        "outputId": "50fa0346-1191-47c1-b0de-6bef83fe0597"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "WARNING:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/HUPD___hupd/sample-85e70a41d39c65dd/0.0.0/6920d2def8fd7767046c0470603357f76866e5a09c97e19571896bfdca521142/cache-76692ae19051dcfe.arrow\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Map:   0%|          | 0/9094 [00:00<?, ? examples/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "7876733281784505a7cce1549c4d4002"
+            }
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Removing Text Columns\n",
+        "training_data = tokenized_training_data\n",
+        "training_data = training_data.remove_columns([\"abstract\", \"claims\"])\n",
+        "validation_data = tokenized_validation_data\n",
+        "validation_data = validation_data.remove_columns([\"abstract\", \"claims\"])\n",
+        "# Setting to return tensors\n",
+        "training_data.set_format(\"torch\")\n",
+        "validation_data.set_format(\"torch\")"
+      ],
+      "metadata": {
+        "id": "gVEzcKUMq6ch"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# smaller_training_data = training_data.shuffle(seed = 129).select(range(1000))\n",
+        "# smaller_validation_data = validation_data.shuffle(seed = 129).select(range(750))"
+      ],
+      "metadata": {
+        "id": "9-g0Q76A9TXj"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "accuracy = evaluate.load(\"accuracy\")\n",
+        "\n",
+        "def compute_metrics(eval_pred):\n",
+        "  logits, labels = eval_pred\n",
+        "  predictions = np.argmax(logits, axis=1)\n",
+        "  return accuracy.compute(predictions=predictions, references=labels)"
+      ],
+      "metadata": {
+        "id": "UjSGNyMP5KZo"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "training_args = TrainingArguments(\n",
+        "    output_dir=\"Bert-Patent-Model-2\",\n",
+        "    per_device_train_batch_size=4,\n",
+        "    per_device_eval_batch_size=4,\n",
+        "    num_train_epochs=12,\n",
+        "    weight_decay=0.01,\n",
+        "    evaluation_strategy=\"epoch\",\n",
+        "    save_strategy=\"epoch\",\n",
+        "    load_best_model_at_end=True,\n",
+        "    fp16=True,\n",
+        "    gradient_accumulation_steps=16,\n",
+        "    optim=\"adafactor\",\n",
+        "    resume_from_checkpoint=\"./Bert-Patent-Model/checkpoint-504\"\n",
+        ")\n",
+        "\n",
+        "trainer = Trainer(\n",
+        "    model = model,\n",
+        "    args=training_args,\n",
+        "    train_dataset=training_data,\n",
+        "    eval_dataset=validation_data,\n",
+        "    tokenizer=tokenizer,\n",
+        "    compute_metrics=compute_metrics\n",
+        ")"
+      ],
+      "metadata": {
+        "id": "1wUBYokkBPmp"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "transformers.logging.set_verbosity_info()"
+      ],
+      "metadata": {
+        "id": "MSlmjffDEb4c"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "trainer.train()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 486
+        },
+        "id": "E1QAvQEeCBrR",
+        "outputId": "2a64ed66-7c5e-4dab-818e-06fabc1a70cf"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "\n",
+              "    <div>\n",
+              "      \n",
+              "      <progress value='3024' max='3024' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+              "      [3024/3024 1:17:47, Epoch 11/12]\n",
+              "    </div>\n",
+              "    <table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              " <tr style=\"text-align: left;\">\n",
+              "      <th>Epoch</th>\n",
+              "      <th>Training Loss</th>\n",
+              "      <th>Validation Loss</th>\n",
+              "      <th>Accuracy</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <td>0</td>\n",
+              "      <td>No log</td>\n",
+              "      <td>0.932718</td>\n",
+              "      <td>0.556081</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>1</td>\n",
+              "      <td>0.713200</td>\n",
+              "      <td>1.062583</td>\n",
+              "      <td>0.537387</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>2</td>\n",
+              "      <td>0.713200</td>\n",
+              "      <td>1.149405</td>\n",
+              "      <td>0.545854</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>3</td>\n",
+              "      <td>0.484300</td>\n",
+              "      <td>1.394087</td>\n",
+              "      <td>0.518474</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>4</td>\n",
+              "      <td>0.484300</td>\n",
+              "      <td>1.625637</td>\n",
+              "      <td>0.520013</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>5</td>\n",
+              "      <td>0.234500</td>\n",
+              "      <td>1.928906</td>\n",
+              "      <td>0.534638</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>6</td>\n",
+              "      <td>0.234500</td>\n",
+              "      <td>2.101890</td>\n",
+              "      <td>0.535188</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>7</td>\n",
+              "      <td>0.113600</td>\n",
+              "      <td>2.447903</td>\n",
+              "      <td>0.521553</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>8</td>\n",
+              "      <td>0.113600</td>\n",
+              "      <td>2.633792</td>\n",
+              "      <td>0.512756</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>9</td>\n",
+              "      <td>0.052100</td>\n",
+              "      <td>3.018095</td>\n",
+              "      <td>0.529250</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>10</td>\n",
+              "      <td>0.052100</td>\n",
+              "      <td>3.211678</td>\n",
+              "      <td>0.522542</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>11</td>\n",
+              "      <td>0.022200</td>\n",
+              "      <td>3.319586</td>\n",
+              "      <td>0.523532</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table><p>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "TrainOutput(global_step=3024, training_loss=0.26791910230916327, metrics={'train_runtime': 4668.6932, 'train_samples_per_second': 41.518, 'train_steps_per_second': 0.648, 'total_flos': 2.563329616742707e+16, 'train_loss': 0.26791910230916327, 'epoch': 11.98})"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 18
+        }
+      ]
+    }
+  ]
+}

app.py CHANGED Viewed

@@ -1,35 +1,103 @@
 import streamlit as st
-from transformers import pipeline
-if "sentiment" not in st.session_state:
-    st.session_state.sentiment = ""
 if "score" not in st.session_state:
     st.session_state.score = ""
-def run_sentiment_model(text_in, model_in):
-    classifier = pipeline(task="sentiment-analysis",
-                          model=model_in)
-    analysis = classifier(text_in)
-    st.session_state.sentiment = analysis[0]["label"]
-    st.session_state.score = "{:.2f}".format(analysis[0]["score"] * 100)
-models_available = {"Roberta Large English": "siebert/sentiment-roberta-large-english",
-                    "Generic": "Seethal/sentiment_analysis_generic_dataset",
-                    "Twitter Roberta": "cardiffnlp/twitter-roberta-base-sentiment"}
-st.title("Sentiment Analysis Section (Milestone-2)")
-text_input = st.text_area(
-    label="Enter the text to analyze", value="I Love Pizza")
-model_picked = st.selectbox(
-    "Choose a model to run on", options=models_available.keys())
-st.button("Submit", on_click=run_sentiment_model, args=(
-    text_input, models_available[model_picked]))
-st.markdown(body="Sentiment: {}, Confidence Score: {} %".format(
-    st.session_state.sentiment, st.session_state.score))
-st.title("Patentability Score Section (Milestone-3)")

 import streamlit as st
+from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
+from datasets import load_dataset
+# Milestone-3
+if "viability" not in st.session_state:
+    st.session_state.viability = ""
 if "score" not in st.session_state:
     st.session_state.score = ""
+def get_patent_score(pipeline, abstract, claims):
+    abstract_score = pipeline(abstract)
+    claims_score = pipeline(claims)
+    abstract_label = abstract_score[0]["label"]
+    claims_label = claims_score[0]["label"]
+    st.session_state.score = "{:.2f}".format(
+        ((abstract_score[0]["score"] + claims_score[0]["score"]) / 2) * 100
+    )
+    if abstract_label == claims_label:
+        st.session_state.viability = abstract_label
+    else:
+        if abstract_score[0]["score"] > claims_score[0]["label"]:
+            st.session_state.viability = abstract_label
+        else:
+            st.session_state.viability = claims_label
+checkpoint_file = "./checkpoint-3024"
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint_file)
+tokenizer = AutoTokenizer.from_pretrained(checkpoint_file)
+pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
+dataset_dict = load_dataset('HUPD/hupd',
+                            name='sample',
+                            data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
+                            icpr_label=None,
+                            train_filing_start_date='2016-01-01',
+                            train_filing_end_date='2016-01-21',
+                            val_filing_start_date='2016-01-22',
+                            val_filing_end_date='2016-01-31',
+                            )
+dataset = dataset_dict["train"]
+abstract_dict = {}
+claims_dict = {}
+for i in range(10):
+    abstract_dict[dataset["title"][i]] = dataset["abstract"][i]
+    claims_dict[dataset["title"][i]] = dataset["claims"][i]
+st.title("Patent Vibility Score Checker")
+chosen_patent = st.selectbox(
+    "Chose a patent to run the checker on", options=abstract_dict.keys())
+abstract = st.text_area(
+    label="Abstract",
+    value=abstract_dict[chosen_patent]
+)
+claims = st.text_area(
+    label="Claims",
+    value=claims_dict[chosen_patent]
+)
+st.button("Check Viability", on_click=get_patent_score,
+          options=(pipeline, abstract, claims))
+st.markdown(body="Outcome: {}, Score: {}%".format(
+    st.session_state.viability, st.session_state.score))
+# Milestone-2
+# if "sentiment" not in st.session_state:
+#     st.session_state.sentiment = ""
+# if "score" not in st.session_state:
+#     st.session_state.score = ""
+# def run_model(text_in, model_in):
+#     classifier = pipeline(task="sentiment-analysis",
+#                           model=model_in)
+#     analysis = classifier(text_in)
+#     st.session_state.sentiment = analysis[0]["label"]
+#     st.session_state.score = "{:.2f}".format(analysis[0]["score"] * 100)
+# models_available = {"Roberta Large English": "siebert/sentiment-roberta-large-english",
+#                     "Generic": "Seethal/sentiment_analysis_generic_dataset",
+#                     "Twitter Roberta": "cardiffnlp/twitter-roberta-base-sentiment"}
+# st.title("Sentiment Analysis Web Application")
+# text_input = st.text_area(
+#     label="Enter the text to analyze", value="I Love Pizza")
+# model_picked = st.selectbox(
+#     "Choose a model to run on", options=models_available.keys())
+# st.button("Submit", on_click=run_model, args=(
+#     text_input, models_available[model_picked]))
+# st.markdown(body="Sentiment: {}, Confidence Score: {} %".format(
+#     st.session_state.sentiment, st.session_state.score))