MJ commited on
Commit
83e19cd
1 Parent(s): 54d8a35

Added milestone-3 files

Browse files
Files changed (2) hide show
  1. Milestone_3.ipynb +1251 -0
  2. app.py +90 -22
Milestone_3.ipynb ADDED
@@ -0,0 +1,1251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ },
15
+ "accelerator": "GPU",
16
+ "gpuClass": "standard",
17
+ "widgets": {
18
+ "application/vnd.jupyter.widget-state+json": {
19
+ "44bae0dd4d024583a4942516604af83a": {
20
+ "model_module": "@jupyter-widgets/controls",
21
+ "model_name": "HBoxModel",
22
+ "model_module_version": "1.5.0",
23
+ "state": {
24
+ "_dom_classes": [],
25
+ "_model_module": "@jupyter-widgets/controls",
26
+ "_model_module_version": "1.5.0",
27
+ "_model_name": "HBoxModel",
28
+ "_view_count": null,
29
+ "_view_module": "@jupyter-widgets/controls",
30
+ "_view_module_version": "1.5.0",
31
+ "_view_name": "HBoxView",
32
+ "box_style": "",
33
+ "children": [
34
+ "IPY_MODEL_dc7945ddd9844c9286f5d7aeb0a87e2c",
35
+ "IPY_MODEL_b215c40be67f4e3c9556343a5e8b6a8f",
36
+ "IPY_MODEL_ae9d91d81a414ea5a48d4a0374ee7cc5"
37
+ ],
38
+ "layout": "IPY_MODEL_5111fe0aaaa54f329374a1c3dedc6981"
39
+ }
40
+ },
41
+ "dc7945ddd9844c9286f5d7aeb0a87e2c": {
42
+ "model_module": "@jupyter-widgets/controls",
43
+ "model_name": "HTMLModel",
44
+ "model_module_version": "1.5.0",
45
+ "state": {
46
+ "_dom_classes": [],
47
+ "_model_module": "@jupyter-widgets/controls",
48
+ "_model_module_version": "1.5.0",
49
+ "_model_name": "HTMLModel",
50
+ "_view_count": null,
51
+ "_view_module": "@jupyter-widgets/controls",
52
+ "_view_module_version": "1.5.0",
53
+ "_view_name": "HTMLView",
54
+ "description": "",
55
+ "description_tooltip": null,
56
+ "layout": "IPY_MODEL_17b0e3d10405412dab8a0c5e99d78c5f",
57
+ "placeholder": "​",
58
+ "style": "IPY_MODEL_d88273c0d390433cad8e2ebb810fdb6d",
59
+ "value": "100%"
60
+ }
61
+ },
62
+ "b215c40be67f4e3c9556343a5e8b6a8f": {
63
+ "model_module": "@jupyter-widgets/controls",
64
+ "model_name": "FloatProgressModel",
65
+ "model_module_version": "1.5.0",
66
+ "state": {
67
+ "_dom_classes": [],
68
+ "_model_module": "@jupyter-widgets/controls",
69
+ "_model_module_version": "1.5.0",
70
+ "_model_name": "FloatProgressModel",
71
+ "_view_count": null,
72
+ "_view_module": "@jupyter-widgets/controls",
73
+ "_view_module_version": "1.5.0",
74
+ "_view_name": "ProgressView",
75
+ "bar_style": "success",
76
+ "description": "",
77
+ "description_tooltip": null,
78
+ "layout": "IPY_MODEL_f1375d9369b9429fa5ee83101f225bac",
79
+ "max": 2,
80
+ "min": 0,
81
+ "orientation": "horizontal",
82
+ "style": "IPY_MODEL_31cea131c8714b4883daceee0b3a4414",
83
+ "value": 2
84
+ }
85
+ },
86
+ "ae9d91d81a414ea5a48d4a0374ee7cc5": {
87
+ "model_module": "@jupyter-widgets/controls",
88
+ "model_name": "HTMLModel",
89
+ "model_module_version": "1.5.0",
90
+ "state": {
91
+ "_dom_classes": [],
92
+ "_model_module": "@jupyter-widgets/controls",
93
+ "_model_module_version": "1.5.0",
94
+ "_model_name": "HTMLModel",
95
+ "_view_count": null,
96
+ "_view_module": "@jupyter-widgets/controls",
97
+ "_view_module_version": "1.5.0",
98
+ "_view_name": "HTMLView",
99
+ "description": "",
100
+ "description_tooltip": null,
101
+ "layout": "IPY_MODEL_32fd51563a5149a8b20e463a85f5ca0a",
102
+ "placeholder": "​",
103
+ "style": "IPY_MODEL_516719f733c6440fb9bbd15ca3dc037a",
104
+ "value": " 2/2 [00:00<00:00, 67.36it/s]"
105
+ }
106
+ },
107
+ "5111fe0aaaa54f329374a1c3dedc6981": {
108
+ "model_module": "@jupyter-widgets/base",
109
+ "model_name": "LayoutModel",
110
+ "model_module_version": "1.2.0",
111
+ "state": {
112
+ "_model_module": "@jupyter-widgets/base",
113
+ "_model_module_version": "1.2.0",
114
+ "_model_name": "LayoutModel",
115
+ "_view_count": null,
116
+ "_view_module": "@jupyter-widgets/base",
117
+ "_view_module_version": "1.2.0",
118
+ "_view_name": "LayoutView",
119
+ "align_content": null,
120
+ "align_items": null,
121
+ "align_self": null,
122
+ "border": null,
123
+ "bottom": null,
124
+ "display": null,
125
+ "flex": null,
126
+ "flex_flow": null,
127
+ "grid_area": null,
128
+ "grid_auto_columns": null,
129
+ "grid_auto_flow": null,
130
+ "grid_auto_rows": null,
131
+ "grid_column": null,
132
+ "grid_gap": null,
133
+ "grid_row": null,
134
+ "grid_template_areas": null,
135
+ "grid_template_columns": null,
136
+ "grid_template_rows": null,
137
+ "height": null,
138
+ "justify_content": null,
139
+ "justify_items": null,
140
+ "left": null,
141
+ "margin": null,
142
+ "max_height": null,
143
+ "max_width": null,
144
+ "min_height": null,
145
+ "min_width": null,
146
+ "object_fit": null,
147
+ "object_position": null,
148
+ "order": null,
149
+ "overflow": null,
150
+ "overflow_x": null,
151
+ "overflow_y": null,
152
+ "padding": null,
153
+ "right": null,
154
+ "top": null,
155
+ "visibility": null,
156
+ "width": null
157
+ }
158
+ },
159
+ "17b0e3d10405412dab8a0c5e99d78c5f": {
160
+ "model_module": "@jupyter-widgets/base",
161
+ "model_name": "LayoutModel",
162
+ "model_module_version": "1.2.0",
163
+ "state": {
164
+ "_model_module": "@jupyter-widgets/base",
165
+ "_model_module_version": "1.2.0",
166
+ "_model_name": "LayoutModel",
167
+ "_view_count": null,
168
+ "_view_module": "@jupyter-widgets/base",
169
+ "_view_module_version": "1.2.0",
170
+ "_view_name": "LayoutView",
171
+ "align_content": null,
172
+ "align_items": null,
173
+ "align_self": null,
174
+ "border": null,
175
+ "bottom": null,
176
+ "display": null,
177
+ "flex": null,
178
+ "flex_flow": null,
179
+ "grid_area": null,
180
+ "grid_auto_columns": null,
181
+ "grid_auto_flow": null,
182
+ "grid_auto_rows": null,
183
+ "grid_column": null,
184
+ "grid_gap": null,
185
+ "grid_row": null,
186
+ "grid_template_areas": null,
187
+ "grid_template_columns": null,
188
+ "grid_template_rows": null,
189
+ "height": null,
190
+ "justify_content": null,
191
+ "justify_items": null,
192
+ "left": null,
193
+ "margin": null,
194
+ "max_height": null,
195
+ "max_width": null,
196
+ "min_height": null,
197
+ "min_width": null,
198
+ "object_fit": null,
199
+ "object_position": null,
200
+ "order": null,
201
+ "overflow": null,
202
+ "overflow_x": null,
203
+ "overflow_y": null,
204
+ "padding": null,
205
+ "right": null,
206
+ "top": null,
207
+ "visibility": null,
208
+ "width": null
209
+ }
210
+ },
211
+ "d88273c0d390433cad8e2ebb810fdb6d": {
212
+ "model_module": "@jupyter-widgets/controls",
213
+ "model_name": "DescriptionStyleModel",
214
+ "model_module_version": "1.5.0",
215
+ "state": {
216
+ "_model_module": "@jupyter-widgets/controls",
217
+ "_model_module_version": "1.5.0",
218
+ "_model_name": "DescriptionStyleModel",
219
+ "_view_count": null,
220
+ "_view_module": "@jupyter-widgets/base",
221
+ "_view_module_version": "1.2.0",
222
+ "_view_name": "StyleView",
223
+ "description_width": ""
224
+ }
225
+ },
226
+ "f1375d9369b9429fa5ee83101f225bac": {
227
+ "model_module": "@jupyter-widgets/base",
228
+ "model_name": "LayoutModel",
229
+ "model_module_version": "1.2.0",
230
+ "state": {
231
+ "_model_module": "@jupyter-widgets/base",
232
+ "_model_module_version": "1.2.0",
233
+ "_model_name": "LayoutModel",
234
+ "_view_count": null,
235
+ "_view_module": "@jupyter-widgets/base",
236
+ "_view_module_version": "1.2.0",
237
+ "_view_name": "LayoutView",
238
+ "align_content": null,
239
+ "align_items": null,
240
+ "align_self": null,
241
+ "border": null,
242
+ "bottom": null,
243
+ "display": null,
244
+ "flex": null,
245
+ "flex_flow": null,
246
+ "grid_area": null,
247
+ "grid_auto_columns": null,
248
+ "grid_auto_flow": null,
249
+ "grid_auto_rows": null,
250
+ "grid_column": null,
251
+ "grid_gap": null,
252
+ "grid_row": null,
253
+ "grid_template_areas": null,
254
+ "grid_template_columns": null,
255
+ "grid_template_rows": null,
256
+ "height": null,
257
+ "justify_content": null,
258
+ "justify_items": null,
259
+ "left": null,
260
+ "margin": null,
261
+ "max_height": null,
262
+ "max_width": null,
263
+ "min_height": null,
264
+ "min_width": null,
265
+ "object_fit": null,
266
+ "object_position": null,
267
+ "order": null,
268
+ "overflow": null,
269
+ "overflow_x": null,
270
+ "overflow_y": null,
271
+ "padding": null,
272
+ "right": null,
273
+ "top": null,
274
+ "visibility": null,
275
+ "width": null
276
+ }
277
+ },
278
+ "31cea131c8714b4883daceee0b3a4414": {
279
+ "model_module": "@jupyter-widgets/controls",
280
+ "model_name": "ProgressStyleModel",
281
+ "model_module_version": "1.5.0",
282
+ "state": {
283
+ "_model_module": "@jupyter-widgets/controls",
284
+ "_model_module_version": "1.5.0",
285
+ "_model_name": "ProgressStyleModel",
286
+ "_view_count": null,
287
+ "_view_module": "@jupyter-widgets/base",
288
+ "_view_module_version": "1.2.0",
289
+ "_view_name": "StyleView",
290
+ "bar_color": null,
291
+ "description_width": ""
292
+ }
293
+ },
294
+ "32fd51563a5149a8b20e463a85f5ca0a": {
295
+ "model_module": "@jupyter-widgets/base",
296
+ "model_name": "LayoutModel",
297
+ "model_module_version": "1.2.0",
298
+ "state": {
299
+ "_model_module": "@jupyter-widgets/base",
300
+ "_model_module_version": "1.2.0",
301
+ "_model_name": "LayoutModel",
302
+ "_view_count": null,
303
+ "_view_module": "@jupyter-widgets/base",
304
+ "_view_module_version": "1.2.0",
305
+ "_view_name": "LayoutView",
306
+ "align_content": null,
307
+ "align_items": null,
308
+ "align_self": null,
309
+ "border": null,
310
+ "bottom": null,
311
+ "display": null,
312
+ "flex": null,
313
+ "flex_flow": null,
314
+ "grid_area": null,
315
+ "grid_auto_columns": null,
316
+ "grid_auto_flow": null,
317
+ "grid_auto_rows": null,
318
+ "grid_column": null,
319
+ "grid_gap": null,
320
+ "grid_row": null,
321
+ "grid_template_areas": null,
322
+ "grid_template_columns": null,
323
+ "grid_template_rows": null,
324
+ "height": null,
325
+ "justify_content": null,
326
+ "justify_items": null,
327
+ "left": null,
328
+ "margin": null,
329
+ "max_height": null,
330
+ "max_width": null,
331
+ "min_height": null,
332
+ "min_width": null,
333
+ "object_fit": null,
334
+ "object_position": null,
335
+ "order": null,
336
+ "overflow": null,
337
+ "overflow_x": null,
338
+ "overflow_y": null,
339
+ "padding": null,
340
+ "right": null,
341
+ "top": null,
342
+ "visibility": null,
343
+ "width": null
344
+ }
345
+ },
346
+ "516719f733c6440fb9bbd15ca3dc037a": {
347
+ "model_module": "@jupyter-widgets/controls",
348
+ "model_name": "DescriptionStyleModel",
349
+ "model_module_version": "1.5.0",
350
+ "state": {
351
+ "_model_module": "@jupyter-widgets/controls",
352
+ "_model_module_version": "1.5.0",
353
+ "_model_name": "DescriptionStyleModel",
354
+ "_view_count": null,
355
+ "_view_module": "@jupyter-widgets/base",
356
+ "_view_module_version": "1.2.0",
357
+ "_view_name": "StyleView",
358
+ "description_width": ""
359
+ }
360
+ },
361
+ "7876733281784505a7cce1549c4d4002": {
362
+ "model_module": "@jupyter-widgets/controls",
363
+ "model_name": "HBoxModel",
364
+ "model_module_version": "1.5.0",
365
+ "state": {
366
+ "_dom_classes": [],
367
+ "_model_module": "@jupyter-widgets/controls",
368
+ "_model_module_version": "1.5.0",
369
+ "_model_name": "HBoxModel",
370
+ "_view_count": null,
371
+ "_view_module": "@jupyter-widgets/controls",
372
+ "_view_module_version": "1.5.0",
373
+ "_view_name": "HBoxView",
374
+ "box_style": "",
375
+ "children": [
376
+ "IPY_MODEL_54ad98acf60a429b8689f2f39b83d679",
377
+ "IPY_MODEL_8c0b927c2cc945f9bc796d574dbe734b",
378
+ "IPY_MODEL_b28bb08b3f2a4529815182714df85d24"
379
+ ],
380
+ "layout": "IPY_MODEL_83005ceb9c614856994e3a3973b5b211"
381
+ }
382
+ },
383
+ "54ad98acf60a429b8689f2f39b83d679": {
384
+ "model_module": "@jupyter-widgets/controls",
385
+ "model_name": "HTMLModel",
386
+ "model_module_version": "1.5.0",
387
+ "state": {
388
+ "_dom_classes": [],
389
+ "_model_module": "@jupyter-widgets/controls",
390
+ "_model_module_version": "1.5.0",
391
+ "_model_name": "HTMLModel",
392
+ "_view_count": null,
393
+ "_view_module": "@jupyter-widgets/controls",
394
+ "_view_module_version": "1.5.0",
395
+ "_view_name": "HTMLView",
396
+ "description": "",
397
+ "description_tooltip": null,
398
+ "layout": "IPY_MODEL_bbd9ff305af5489fbea28e37695fd86d",
399
+ "placeholder": "​",
400
+ "style": "IPY_MODEL_a22736b7c4bd48c081b2d7696708a6e8",
401
+ "value": "Map: 100%"
402
+ }
403
+ },
404
+ "8c0b927c2cc945f9bc796d574dbe734b": {
405
+ "model_module": "@jupyter-widgets/controls",
406
+ "model_name": "FloatProgressModel",
407
+ "model_module_version": "1.5.0",
408
+ "state": {
409
+ "_dom_classes": [],
410
+ "_model_module": "@jupyter-widgets/controls",
411
+ "_model_module_version": "1.5.0",
412
+ "_model_name": "FloatProgressModel",
413
+ "_view_count": null,
414
+ "_view_module": "@jupyter-widgets/controls",
415
+ "_view_module_version": "1.5.0",
416
+ "_view_name": "ProgressView",
417
+ "bar_style": "",
418
+ "description": "",
419
+ "description_tooltip": null,
420
+ "layout": "IPY_MODEL_282755013feb4326827a731c1cbf2da1",
421
+ "max": 9094,
422
+ "min": 0,
423
+ "orientation": "horizontal",
424
+ "style": "IPY_MODEL_c66db78a54004b719e7c576406ac261b",
425
+ "value": 9094
426
+ }
427
+ },
428
+ "b28bb08b3f2a4529815182714df85d24": {
429
+ "model_module": "@jupyter-widgets/controls",
430
+ "model_name": "HTMLModel",
431
+ "model_module_version": "1.5.0",
432
+ "state": {
433
+ "_dom_classes": [],
434
+ "_model_module": "@jupyter-widgets/controls",
435
+ "_model_module_version": "1.5.0",
436
+ "_model_name": "HTMLModel",
437
+ "_view_count": null,
438
+ "_view_module": "@jupyter-widgets/controls",
439
+ "_view_module_version": "1.5.0",
440
+ "_view_name": "HTMLView",
441
+ "description": "",
442
+ "description_tooltip": null,
443
+ "layout": "IPY_MODEL_97424e4bbbdb4536ae10f65c5352ee27",
444
+ "placeholder": "​",
445
+ "style": "IPY_MODEL_0ee137e1367546bd8738a52935ee9b95",
446
+ "value": " 9094/9094 [00:44<00:00, 262.05 examples/s]"
447
+ }
448
+ },
449
+ "83005ceb9c614856994e3a3973b5b211": {
450
+ "model_module": "@jupyter-widgets/base",
451
+ "model_name": "LayoutModel",
452
+ "model_module_version": "1.2.0",
453
+ "state": {
454
+ "_model_module": "@jupyter-widgets/base",
455
+ "_model_module_version": "1.2.0",
456
+ "_model_name": "LayoutModel",
457
+ "_view_count": null,
458
+ "_view_module": "@jupyter-widgets/base",
459
+ "_view_module_version": "1.2.0",
460
+ "_view_name": "LayoutView",
461
+ "align_content": null,
462
+ "align_items": null,
463
+ "align_self": null,
464
+ "border": null,
465
+ "bottom": null,
466
+ "display": null,
467
+ "flex": null,
468
+ "flex_flow": null,
469
+ "grid_area": null,
470
+ "grid_auto_columns": null,
471
+ "grid_auto_flow": null,
472
+ "grid_auto_rows": null,
473
+ "grid_column": null,
474
+ "grid_gap": null,
475
+ "grid_row": null,
476
+ "grid_template_areas": null,
477
+ "grid_template_columns": null,
478
+ "grid_template_rows": null,
479
+ "height": null,
480
+ "justify_content": null,
481
+ "justify_items": null,
482
+ "left": null,
483
+ "margin": null,
484
+ "max_height": null,
485
+ "max_width": null,
486
+ "min_height": null,
487
+ "min_width": null,
488
+ "object_fit": null,
489
+ "object_position": null,
490
+ "order": null,
491
+ "overflow": null,
492
+ "overflow_x": null,
493
+ "overflow_y": null,
494
+ "padding": null,
495
+ "right": null,
496
+ "top": null,
497
+ "visibility": "hidden",
498
+ "width": null
499
+ }
500
+ },
501
+ "bbd9ff305af5489fbea28e37695fd86d": {
502
+ "model_module": "@jupyter-widgets/base",
503
+ "model_name": "LayoutModel",
504
+ "model_module_version": "1.2.0",
505
+ "state": {
506
+ "_model_module": "@jupyter-widgets/base",
507
+ "_model_module_version": "1.2.0",
508
+ "_model_name": "LayoutModel",
509
+ "_view_count": null,
510
+ "_view_module": "@jupyter-widgets/base",
511
+ "_view_module_version": "1.2.0",
512
+ "_view_name": "LayoutView",
513
+ "align_content": null,
514
+ "align_items": null,
515
+ "align_self": null,
516
+ "border": null,
517
+ "bottom": null,
518
+ "display": null,
519
+ "flex": null,
520
+ "flex_flow": null,
521
+ "grid_area": null,
522
+ "grid_auto_columns": null,
523
+ "grid_auto_flow": null,
524
+ "grid_auto_rows": null,
525
+ "grid_column": null,
526
+ "grid_gap": null,
527
+ "grid_row": null,
528
+ "grid_template_areas": null,
529
+ "grid_template_columns": null,
530
+ "grid_template_rows": null,
531
+ "height": null,
532
+ "justify_content": null,
533
+ "justify_items": null,
534
+ "left": null,
535
+ "margin": null,
536
+ "max_height": null,
537
+ "max_width": null,
538
+ "min_height": null,
539
+ "min_width": null,
540
+ "object_fit": null,
541
+ "object_position": null,
542
+ "order": null,
543
+ "overflow": null,
544
+ "overflow_x": null,
545
+ "overflow_y": null,
546
+ "padding": null,
547
+ "right": null,
548
+ "top": null,
549
+ "visibility": null,
550
+ "width": null
551
+ }
552
+ },
553
+ "a22736b7c4bd48c081b2d7696708a6e8": {
554
+ "model_module": "@jupyter-widgets/controls",
555
+ "model_name": "DescriptionStyleModel",
556
+ "model_module_version": "1.5.0",
557
+ "state": {
558
+ "_model_module": "@jupyter-widgets/controls",
559
+ "_model_module_version": "1.5.0",
560
+ "_model_name": "DescriptionStyleModel",
561
+ "_view_count": null,
562
+ "_view_module": "@jupyter-widgets/base",
563
+ "_view_module_version": "1.2.0",
564
+ "_view_name": "StyleView",
565
+ "description_width": ""
566
+ }
567
+ },
568
+ "282755013feb4326827a731c1cbf2da1": {
569
+ "model_module": "@jupyter-widgets/base",
570
+ "model_name": "LayoutModel",
571
+ "model_module_version": "1.2.0",
572
+ "state": {
573
+ "_model_module": "@jupyter-widgets/base",
574
+ "_model_module_version": "1.2.0",
575
+ "_model_name": "LayoutModel",
576
+ "_view_count": null,
577
+ "_view_module": "@jupyter-widgets/base",
578
+ "_view_module_version": "1.2.0",
579
+ "_view_name": "LayoutView",
580
+ "align_content": null,
581
+ "align_items": null,
582
+ "align_self": null,
583
+ "border": null,
584
+ "bottom": null,
585
+ "display": null,
586
+ "flex": null,
587
+ "flex_flow": null,
588
+ "grid_area": null,
589
+ "grid_auto_columns": null,
590
+ "grid_auto_flow": null,
591
+ "grid_auto_rows": null,
592
+ "grid_column": null,
593
+ "grid_gap": null,
594
+ "grid_row": null,
595
+ "grid_template_areas": null,
596
+ "grid_template_columns": null,
597
+ "grid_template_rows": null,
598
+ "height": null,
599
+ "justify_content": null,
600
+ "justify_items": null,
601
+ "left": null,
602
+ "margin": null,
603
+ "max_height": null,
604
+ "max_width": null,
605
+ "min_height": null,
606
+ "min_width": null,
607
+ "object_fit": null,
608
+ "object_position": null,
609
+ "order": null,
610
+ "overflow": null,
611
+ "overflow_x": null,
612
+ "overflow_y": null,
613
+ "padding": null,
614
+ "right": null,
615
+ "top": null,
616
+ "visibility": null,
617
+ "width": null
618
+ }
619
+ },
620
+ "c66db78a54004b719e7c576406ac261b": {
621
+ "model_module": "@jupyter-widgets/controls",
622
+ "model_name": "ProgressStyleModel",
623
+ "model_module_version": "1.5.0",
624
+ "state": {
625
+ "_model_module": "@jupyter-widgets/controls",
626
+ "_model_module_version": "1.5.0",
627
+ "_model_name": "ProgressStyleModel",
628
+ "_view_count": null,
629
+ "_view_module": "@jupyter-widgets/base",
630
+ "_view_module_version": "1.2.0",
631
+ "_view_name": "StyleView",
632
+ "bar_color": null,
633
+ "description_width": ""
634
+ }
635
+ },
636
+ "97424e4bbbdb4536ae10f65c5352ee27": {
637
+ "model_module": "@jupyter-widgets/base",
638
+ "model_name": "LayoutModel",
639
+ "model_module_version": "1.2.0",
640
+ "state": {
641
+ "_model_module": "@jupyter-widgets/base",
642
+ "_model_module_version": "1.2.0",
643
+ "_model_name": "LayoutModel",
644
+ "_view_count": null,
645
+ "_view_module": "@jupyter-widgets/base",
646
+ "_view_module_version": "1.2.0",
647
+ "_view_name": "LayoutView",
648
+ "align_content": null,
649
+ "align_items": null,
650
+ "align_self": null,
651
+ "border": null,
652
+ "bottom": null,
653
+ "display": null,
654
+ "flex": null,
655
+ "flex_flow": null,
656
+ "grid_area": null,
657
+ "grid_auto_columns": null,
658
+ "grid_auto_flow": null,
659
+ "grid_auto_rows": null,
660
+ "grid_column": null,
661
+ "grid_gap": null,
662
+ "grid_row": null,
663
+ "grid_template_areas": null,
664
+ "grid_template_columns": null,
665
+ "grid_template_rows": null,
666
+ "height": null,
667
+ "justify_content": null,
668
+ "justify_items": null,
669
+ "left": null,
670
+ "margin": null,
671
+ "max_height": null,
672
+ "max_width": null,
673
+ "min_height": null,
674
+ "min_width": null,
675
+ "object_fit": null,
676
+ "object_position": null,
677
+ "order": null,
678
+ "overflow": null,
679
+ "overflow_x": null,
680
+ "overflow_y": null,
681
+ "padding": null,
682
+ "right": null,
683
+ "top": null,
684
+ "visibility": null,
685
+ "width": null
686
+ }
687
+ },
688
+ "0ee137e1367546bd8738a52935ee9b95": {
689
+ "model_module": "@jupyter-widgets/controls",
690
+ "model_name": "DescriptionStyleModel",
691
+ "model_module_version": "1.5.0",
692
+ "state": {
693
+ "_model_module": "@jupyter-widgets/controls",
694
+ "_model_module_version": "1.5.0",
695
+ "_model_name": "DescriptionStyleModel",
696
+ "_view_count": null,
697
+ "_view_module": "@jupyter-widgets/base",
698
+ "_view_module_version": "1.2.0",
699
+ "_view_name": "StyleView",
700
+ "description_width": ""
701
+ }
702
+ }
703
+ }
704
+ }
705
+ },
706
+ "cells": [
707
+ {
708
+ "cell_type": "code",
709
+ "source": [
710
+ "! pip install datasets -q\n",
711
+ "! pip install transformers -q \n",
712
+ "! pip install evaluate -q\n",
713
+ "! pip install accelerate -q"
714
+ ],
715
+ "metadata": {
716
+ "id": "4a1gg5YCc3NA"
717
+ },
718
+ "execution_count": null,
719
+ "outputs": []
720
+ },
721
+ {
722
+ "cell_type": "code",
723
+ "source": [
724
+ "from google.colab import drive\n",
725
+ "drive.mount('/content/drive')\n",
726
+ "%cd \"/content/drive/MyDrive/Colab Notebooks/Project\""
727
+ ],
728
+ "metadata": {
729
+ "id": "WzMdj31Ktf_I",
730
+ "colab": {
731
+ "base_uri": "https://localhost:8080/"
732
+ },
733
+ "outputId": "4214aef0-e79c-4075-f745-134c4978b291"
734
+ },
735
+ "execution_count": null,
736
+ "outputs": [
737
+ {
738
+ "output_type": "stream",
739
+ "name": "stdout",
740
+ "text": [
741
+ "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n",
742
+ "/content/drive/MyDrive/Colab Notebooks/Project\n"
743
+ ]
744
+ }
745
+ ]
746
+ },
747
+ {
748
+ "cell_type": "code",
749
+ "source": [
750
+ "import transformers\n",
751
+ "from datasets import load_dataset, ClassLabel\n",
752
+ "from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments\n",
753
+ "import numpy as np\n",
754
+ "import evaluate"
755
+ ],
756
+ "metadata": {
757
+ "id": "rhBp0k13AqRC"
758
+ },
759
+ "execution_count": null,
760
+ "outputs": []
761
+ },
762
+ {
763
+ "cell_type": "markdown",
764
+ "source": [
765
+ "# Loading the Dataset"
766
+ ],
767
+ "metadata": {
768
+ "id": "8X7Or5qactFF"
769
+ }
770
+ },
771
+ {
772
+ "cell_type": "code",
773
+ "source": [
774
+ "dataset_dict = load_dataset('HUPD/hupd',\n",
775
+ " name='sample',\n",
776
+ " data_files=\"https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather\", \n",
777
+ " icpr_label=None,\n",
778
+ " train_filing_start_date='2016-01-01',\n",
779
+ " train_filing_end_date='2016-01-21',\n",
780
+ " val_filing_start_date='2016-01-22',\n",
781
+ " val_filing_end_date='2016-01-31',\n",
782
+ ")"
783
+ ],
784
+ "metadata": {
785
+ "id": "vm7-_ncug7I6",
786
+ "colab": {
787
+ "base_uri": "https://localhost:8080/",
788
+ "height": 86,
789
+ "referenced_widgets": [
790
+ "44bae0dd4d024583a4942516604af83a",
791
+ "dc7945ddd9844c9286f5d7aeb0a87e2c",
792
+ "b215c40be67f4e3c9556343a5e8b6a8f",
793
+ "ae9d91d81a414ea5a48d4a0374ee7cc5",
794
+ "5111fe0aaaa54f329374a1c3dedc6981",
795
+ "17b0e3d10405412dab8a0c5e99d78c5f",
796
+ "d88273c0d390433cad8e2ebb810fdb6d",
797
+ "f1375d9369b9429fa5ee83101f225bac",
798
+ "31cea131c8714b4883daceee0b3a4414",
799
+ "32fd51563a5149a8b20e463a85f5ca0a",
800
+ "516719f733c6440fb9bbd15ca3dc037a"
801
+ ]
802
+ },
803
+ "outputId": "4a7ca506-e35f-4b1e-d33c-550edb540dc1"
804
+ },
805
+ "execution_count": null,
806
+ "outputs": [
807
+ {
808
+ "output_type": "stream",
809
+ "name": "stderr",
810
+ "text": [
811
+ "WARNING:datasets.builder:Found cached dataset hupd (/root/.cache/huggingface/datasets/HUPD___hupd/sample-85e70a41d39c65dd/0.0.0/6920d2def8fd7767046c0470603357f76866e5a09c97e19571896bfdca521142)\n"
812
+ ]
813
+ },
814
+ {
815
+ "output_type": "display_data",
816
+ "data": {
817
+ "text/plain": [
818
+ " 0%| | 0/2 [00:00<?, ?it/s]"
819
+ ],
820
+ "application/vnd.jupyter.widget-view+json": {
821
+ "version_major": 2,
822
+ "version_minor": 0,
823
+ "model_id": "44bae0dd4d024583a4942516604af83a"
824
+ }
825
+ },
826
+ "metadata": {}
827
+ }
828
+ ]
829
+ },
830
+ {
831
+ "cell_type": "code",
832
+ "source": [
833
+ "raw_training_data = dataset_dict[\"train\"]\n",
834
+ "validation_data = dataset_dict[\"validation\"]"
835
+ ],
836
+ "metadata": {
837
+ "id": "FNPuthOVhJFg"
838
+ },
839
+ "execution_count": null,
840
+ "outputs": []
841
+ },
842
+ {
843
+ "cell_type": "markdown",
844
+ "source": [
845
+ "# Filtering Dataset to only include the relevant variables"
846
+ ],
847
+ "metadata": {
848
+ "id": "Wo-cOQEYmfbF"
849
+ }
850
+ },
851
+ {
852
+ "cell_type": "code",
853
+ "source": [
854
+ "features_to_remove = ['patent_number', 'title', 'background', 'summary', 'description', 'cpc_label', \n",
855
+ " 'ipc_label', 'filing_date', 'patent_issue_date', 'date_published', 'examiner_id']\n",
856
+ "# Removing irrelevant columns\n",
857
+ "raw_training_data = raw_training_data.remove_columns(features_to_remove)\n",
858
+ "validation_data = validation_data.remove_columns(features_to_remove)\n",
859
+ "\n",
860
+ "# Renaming Column names to match expected input\n",
861
+ "raw_training_data = raw_training_data.rename_column('decision', 'labels')\n",
862
+ "validation_data = validation_data.rename_column('decision', 'labels')"
863
+ ],
864
+ "metadata": {
865
+ "id": "8p0aweR7jwHF"
866
+ },
867
+ "execution_count": null,
868
+ "outputs": []
869
+ },
870
+ {
871
+ "cell_type": "markdown",
872
+ "source": [
873
+ "# Converting Dataset labels to encoded values"
874
+ ],
875
+ "metadata": {
876
+ "id": "pKay62q50mAQ"
877
+ }
878
+ },
879
+ {
880
+ "cell_type": "code",
881
+ "source": [
882
+ "features = raw_training_data.features.copy()\n",
883
+ "features[\"labels\"] = ClassLabel(names = [\"REJECTED\", \"PENDING\", \"ACCEPTED\"])\n",
884
+ "raw_training_data = raw_training_data.cast(features)\n",
885
+ "\n",
886
+ "features = validation_data.features.copy()\n",
887
+ "features[\"labels\"] = ClassLabel(names = [\"REJECTED\", \"PENDING\", \"ACCEPTED\"])\n",
888
+ "validation_data = validation_data.cast(features)"
889
+ ],
890
+ "metadata": {
891
+ "colab": {
892
+ "base_uri": "https://localhost:8080/"
893
+ },
894
+ "id": "ece-OlYxyJ7e",
895
+ "outputId": "8f42ef75-9bef-41fa-cf4c-1f9a1f9c88f9"
896
+ },
897
+ "execution_count": null,
898
+ "outputs": [
899
+ {
900
+ "output_type": "stream",
901
+ "name": "stderr",
902
+ "text": [
903
+ "WARNING:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/HUPD___hupd/sample-85e70a41d39c65dd/0.0.0/6920d2def8fd7767046c0470603357f76866e5a09c97e19571896bfdca521142/cache-e851499ec526ea46.arrow\n",
904
+ "WARNING:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/HUPD___hupd/sample-85e70a41d39c65dd/0.0.0/6920d2def8fd7767046c0470603357f76866e5a09c97e19571896bfdca521142/cache-1c918e033c2ee87e.arrow\n"
905
+ ]
906
+ }
907
+ ]
908
+ },
909
+ {
910
+ "cell_type": "markdown",
911
+ "source": [
912
+ "# Getting a Pre-Trained Model"
913
+ ],
914
+ "metadata": {
915
+ "id": "OQnpksYyh8KZ"
916
+ }
917
+ },
918
+ {
919
+ "cell_type": "code",
920
+ "source": [
921
+ "model_name = 'distilbert-base-cased'\n",
922
+ "\n",
923
+ "label2id = {\n",
924
+ " \"REJECTED\" : 0,\n",
925
+ " \"PENDING\" : 1,\n",
926
+ " \"ACCEPTED\": 2\n",
927
+ "}\n",
928
+ "\n",
929
+ "id2label = {\n",
930
+ " 0 : \"REJECTED\",\n",
931
+ " 1 : \"PENDING\",\n",
932
+ " 2 : \"ACCEPTED\"\n",
933
+ "}\n",
934
+ "\n",
935
+ "model = AutoModelForSequenceClassification.from_pretrained(\n",
936
+ " model_name, \n",
937
+ " num_labels = 3,\n",
938
+ " id2label=id2label,\n",
939
+ " label2id=label2id\n",
940
+ ")\n",
941
+ "\n",
942
+ "tokenizer = AutoTokenizer.from_pretrained(model_name)"
943
+ ],
944
+ "metadata": {
945
+ "id": "2a6MmqVai9EL",
946
+ "colab": {
947
+ "base_uri": "https://localhost:8080/"
948
+ },
949
+ "outputId": "d7020e61-fca6-49a6-bd7d-ceaae253bfb8"
950
+ },
951
+ "execution_count": null,
952
+ "outputs": [
953
+ {
954
+ "output_type": "stream",
955
+ "name": "stderr",
956
+ "text": [
957
+ "Some weights of the model checkpoint at distilbert-base-cased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_projector.bias']\n",
958
+ "- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
959
+ "- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
960
+ "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-cased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifier.weight', 'classifier.weight']\n",
961
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
962
+ ]
963
+ }
964
+ ]
965
+ },
966
+ {
967
+ "cell_type": "code",
968
+ "source": [
969
+ "def tokenize_function(data):\n",
970
+ " tokenized_data = tokenizer(data[\"abstract\"], padding = \"max_length\", truncation = True)\n",
971
+ " tokenized_data = tokenizer(data[\"claims\"], padding = \"max_length\", truncation = True)\n",
972
+ " return tokenized_data"
973
+ ],
974
+ "metadata": {
975
+ "id": "WMkiB9nF8Q6Z"
976
+ },
977
+ "execution_count": null,
978
+ "outputs": []
979
+ },
980
+ {
981
+ "cell_type": "code",
982
+ "source": [
983
+ "tokenized_training_data = raw_training_data.map(tokenize_function, batched = True)\n",
984
+ "tokenized_validation_data = validation_data.map(tokenize_function, batched = True)"
985
+ ],
986
+ "metadata": {
987
+ "id": "OzMvG9xd9Fct",
988
+ "colab": {
989
+ "base_uri": "https://localhost:8080/",
990
+ "height": 54,
991
+ "referenced_widgets": [
992
+ "7876733281784505a7cce1549c4d4002",
993
+ "54ad98acf60a429b8689f2f39b83d679",
994
+ "8c0b927c2cc945f9bc796d574dbe734b",
995
+ "b28bb08b3f2a4529815182714df85d24",
996
+ "83005ceb9c614856994e3a3973b5b211",
997
+ "bbd9ff305af5489fbea28e37695fd86d",
998
+ "a22736b7c4bd48c081b2d7696708a6e8",
999
+ "282755013feb4326827a731c1cbf2da1",
1000
+ "c66db78a54004b719e7c576406ac261b",
1001
+ "97424e4bbbdb4536ae10f65c5352ee27",
1002
+ "0ee137e1367546bd8738a52935ee9b95"
1003
+ ]
1004
+ },
1005
+ "outputId": "50fa0346-1191-47c1-b0de-6bef83fe0597"
1006
+ },
1007
+ "execution_count": null,
1008
+ "outputs": [
1009
+ {
1010
+ "output_type": "stream",
1011
+ "name": "stderr",
1012
+ "text": [
1013
+ "WARNING:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/HUPD___hupd/sample-85e70a41d39c65dd/0.0.0/6920d2def8fd7767046c0470603357f76866e5a09c97e19571896bfdca521142/cache-76692ae19051dcfe.arrow\n"
1014
+ ]
1015
+ },
1016
+ {
1017
+ "output_type": "display_data",
1018
+ "data": {
1019
+ "text/plain": [
1020
+ "Map: 0%| | 0/9094 [00:00<?, ? examples/s]"
1021
+ ],
1022
+ "application/vnd.jupyter.widget-view+json": {
1023
+ "version_major": 2,
1024
+ "version_minor": 0,
1025
+ "model_id": "7876733281784505a7cce1549c4d4002"
1026
+ }
1027
+ },
1028
+ "metadata": {}
1029
+ }
1030
+ ]
1031
+ },
1032
+ {
1033
+ "cell_type": "code",
1034
+ "source": [
1035
+ "# Removing Text Columns\n",
1036
+ "training_data = tokenized_training_data\n",
1037
+ "training_data = training_data.remove_columns([\"abstract\", \"claims\"])\n",
1038
+ "validation_data = tokenized_validation_data\n",
1039
+ "validation_data = validation_data.remove_columns([\"abstract\", \"claims\"])\n",
1040
+ "# Setting to return tensors\n",
1041
+ "training_data.set_format(\"torch\")\n",
1042
+ "validation_data.set_format(\"torch\")"
1043
+ ],
1044
+ "metadata": {
1045
+ "id": "gVEzcKUMq6ch"
1046
+ },
1047
+ "execution_count": null,
1048
+ "outputs": []
1049
+ },
1050
+ {
1051
+ "cell_type": "code",
1052
+ "source": [
1053
+ "# smaller_training_data = training_data.shuffle(seed = 129).select(range(1000))\n",
1054
+ "# smaller_validation_data = validation_data.shuffle(seed = 129).select(range(750))"
1055
+ ],
1056
+ "metadata": {
1057
+ "id": "9-g0Q76A9TXj"
1058
+ },
1059
+ "execution_count": null,
1060
+ "outputs": []
1061
+ },
1062
+ {
1063
+ "cell_type": "code",
1064
+ "source": [
1065
+ "accuracy = evaluate.load(\"accuracy\")\n",
1066
+ "\n",
1067
+ "def compute_metrics(eval_pred):\n",
1068
+ " logits, labels = eval_pred\n",
1069
+ " predictions = np.argmax(logits, axis=1)\n",
1070
+ " return accuracy.compute(predictions=predictions, references=labels)"
1071
+ ],
1072
+ "metadata": {
1073
+ "id": "UjSGNyMP5KZo"
1074
+ },
1075
+ "execution_count": null,
1076
+ "outputs": []
1077
+ },
1078
+ {
1079
+ "cell_type": "code",
1080
+ "source": [
1081
+ "training_args = TrainingArguments(\n",
1082
+ " output_dir=\"Bert-Patent-Model-2\",\n",
1083
+ " per_device_train_batch_size=4,\n",
1084
+ " per_device_eval_batch_size=4,\n",
1085
+ " num_train_epochs=12,\n",
1086
+ " weight_decay=0.01,\n",
1087
+ " evaluation_strategy=\"epoch\",\n",
1088
+ " save_strategy=\"epoch\",\n",
1089
+ " load_best_model_at_end=True,\n",
1090
+ " fp16=True,\n",
1091
+ " gradient_accumulation_steps=16,\n",
1092
+ " optim=\"adafactor\",\n",
1093
+ " resume_from_checkpoint=\"./Bert-Patent-Model/checkpoint-504\"\n",
1094
+ ")\n",
1095
+ "\n",
1096
+ "trainer = Trainer(\n",
1097
+ " model = model,\n",
1098
+ " args=training_args,\n",
1099
+ " train_dataset=training_data,\n",
1100
+ " eval_dataset=validation_data,\n",
1101
+ " tokenizer=tokenizer,\n",
1102
+ " compute_metrics=compute_metrics\n",
1103
+ ")"
1104
+ ],
1105
+ "metadata": {
1106
+ "id": "1wUBYokkBPmp"
1107
+ },
1108
+ "execution_count": null,
1109
+ "outputs": []
1110
+ },
1111
+ {
1112
+ "cell_type": "code",
1113
+ "source": [
1114
+ "transformers.logging.set_verbosity_info()"
1115
+ ],
1116
+ "metadata": {
1117
+ "id": "MSlmjffDEb4c"
1118
+ },
1119
+ "execution_count": null,
1120
+ "outputs": []
1121
+ },
1122
+ {
1123
+ "cell_type": "code",
1124
+ "source": [
1125
+ "trainer.train()"
1126
+ ],
1127
+ "metadata": {
1128
+ "colab": {
1129
+ "base_uri": "https://localhost:8080/",
1130
+ "height": 486
1131
+ },
1132
+ "id": "E1QAvQEeCBrR",
1133
+ "outputId": "2a64ed66-7c5e-4dab-818e-06fabc1a70cf"
1134
+ },
1135
+ "execution_count": null,
1136
+ "outputs": [
1137
+ {
1138
+ "output_type": "display_data",
1139
+ "data": {
1140
+ "text/plain": [
1141
+ "<IPython.core.display.HTML object>"
1142
+ ],
1143
+ "text/html": [
1144
+ "\n",
1145
+ " <div>\n",
1146
+ " \n",
1147
+ " <progress value='3024' max='3024' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1148
+ " [3024/3024 1:17:47, Epoch 11/12]\n",
1149
+ " </div>\n",
1150
+ " <table border=\"1\" class=\"dataframe\">\n",
1151
+ " <thead>\n",
1152
+ " <tr style=\"text-align: left;\">\n",
1153
+ " <th>Epoch</th>\n",
1154
+ " <th>Training Loss</th>\n",
1155
+ " <th>Validation Loss</th>\n",
1156
+ " <th>Accuracy</th>\n",
1157
+ " </tr>\n",
1158
+ " </thead>\n",
1159
+ " <tbody>\n",
1160
+ " <tr>\n",
1161
+ " <td>0</td>\n",
1162
+ " <td>No log</td>\n",
1163
+ " <td>0.932718</td>\n",
1164
+ " <td>0.556081</td>\n",
1165
+ " </tr>\n",
1166
+ " <tr>\n",
1167
+ " <td>1</td>\n",
1168
+ " <td>0.713200</td>\n",
1169
+ " <td>1.062583</td>\n",
1170
+ " <td>0.537387</td>\n",
1171
+ " </tr>\n",
1172
+ " <tr>\n",
1173
+ " <td>2</td>\n",
1174
+ " <td>0.713200</td>\n",
1175
+ " <td>1.149405</td>\n",
1176
+ " <td>0.545854</td>\n",
1177
+ " </tr>\n",
1178
+ " <tr>\n",
1179
+ " <td>3</td>\n",
1180
+ " <td>0.484300</td>\n",
1181
+ " <td>1.394087</td>\n",
1182
+ " <td>0.518474</td>\n",
1183
+ " </tr>\n",
1184
+ " <tr>\n",
1185
+ " <td>4</td>\n",
1186
+ " <td>0.484300</td>\n",
1187
+ " <td>1.625637</td>\n",
1188
+ " <td>0.520013</td>\n",
1189
+ " </tr>\n",
1190
+ " <tr>\n",
1191
+ " <td>5</td>\n",
1192
+ " <td>0.234500</td>\n",
1193
+ " <td>1.928906</td>\n",
1194
+ " <td>0.534638</td>\n",
1195
+ " </tr>\n",
1196
+ " <tr>\n",
1197
+ " <td>6</td>\n",
1198
+ " <td>0.234500</td>\n",
1199
+ " <td>2.101890</td>\n",
1200
+ " <td>0.535188</td>\n",
1201
+ " </tr>\n",
1202
+ " <tr>\n",
1203
+ " <td>7</td>\n",
1204
+ " <td>0.113600</td>\n",
1205
+ " <td>2.447903</td>\n",
1206
+ " <td>0.521553</td>\n",
1207
+ " </tr>\n",
1208
+ " <tr>\n",
1209
+ " <td>8</td>\n",
1210
+ " <td>0.113600</td>\n",
1211
+ " <td>2.633792</td>\n",
1212
+ " <td>0.512756</td>\n",
1213
+ " </tr>\n",
1214
+ " <tr>\n",
1215
+ " <td>9</td>\n",
1216
+ " <td>0.052100</td>\n",
1217
+ " <td>3.018095</td>\n",
1218
+ " <td>0.529250</td>\n",
1219
+ " </tr>\n",
1220
+ " <tr>\n",
1221
+ " <td>10</td>\n",
1222
+ " <td>0.052100</td>\n",
1223
+ " <td>3.211678</td>\n",
1224
+ " <td>0.522542</td>\n",
1225
+ " </tr>\n",
1226
+ " <tr>\n",
1227
+ " <td>11</td>\n",
1228
+ " <td>0.022200</td>\n",
1229
+ " <td>3.319586</td>\n",
1230
+ " <td>0.523532</td>\n",
1231
+ " </tr>\n",
1232
+ " </tbody>\n",
1233
+ "</table><p>"
1234
+ ]
1235
+ },
1236
+ "metadata": {}
1237
+ },
1238
+ {
1239
+ "output_type": "execute_result",
1240
+ "data": {
1241
+ "text/plain": [
1242
+ "TrainOutput(global_step=3024, training_loss=0.26791910230916327, metrics={'train_runtime': 4668.6932, 'train_samples_per_second': 41.518, 'train_steps_per_second': 0.648, 'total_flos': 2.563329616742707e+16, 'train_loss': 0.26791910230916327, 'epoch': 11.98})"
1243
+ ]
1244
+ },
1245
+ "metadata": {},
1246
+ "execution_count": 18
1247
+ }
1248
+ ]
1249
+ }
1250
+ ]
1251
+ }
app.py CHANGED
@@ -1,35 +1,103 @@
1
  import streamlit as st
2
- from transformers import pipeline
 
3
 
4
- if "sentiment" not in st.session_state:
5
- st.session_state.sentiment = ""
 
6
 
7
  if "score" not in st.session_state:
8
  st.session_state.score = ""
9
 
10
 
11
- def run_sentiment_model(text_in, model_in):
12
- classifier = pipeline(task="sentiment-analysis",
13
- model=model_in)
14
- analysis = classifier(text_in)
15
- st.session_state.sentiment = analysis[0]["label"]
16
- st.session_state.score = "{:.2f}".format(analysis[0]["score"] * 100)
 
 
 
 
 
 
 
 
 
17
 
18
 
19
- models_available = {"Roberta Large English": "siebert/sentiment-roberta-large-english",
20
- "Generic": "Seethal/sentiment_analysis_generic_dataset",
21
- "Twitter Roberta": "cardiffnlp/twitter-roberta-base-sentiment"}
 
22
 
23
- st.title("Sentiment Analysis Section (Milestone-2)")
24
- text_input = st.text_area(
25
- label="Enter the text to analyze", value="I Love Pizza")
26
- model_picked = st.selectbox(
27
- "Choose a model to run on", options=models_available.keys())
 
 
 
 
28
 
29
- st.button("Submit", on_click=run_sentiment_model, args=(
30
- text_input, models_available[model_picked]))
31
 
32
- st.markdown(body="Sentiment: {}, Confidence Score: {} %".format(
33
- st.session_state.sentiment, st.session_state.score))
34
 
35
- st.title("Patentability Score Section (Milestone-3)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
3
+ from datasets import load_dataset
4
 
5
+ # Milestone-3
6
+ if "viability" not in st.session_state:
7
+ st.session_state.viability = ""
8
 
9
  if "score" not in st.session_state:
10
  st.session_state.score = ""
11
 
12
 
13
+ def get_patent_score(pipeline, abstract, claims):
14
+ abstract_score = pipeline(abstract)
15
+ claims_score = pipeline(claims)
16
+ abstract_label = abstract_score[0]["label"]
17
+ claims_label = claims_score[0]["label"]
18
+ st.session_state.score = "{:.2f}".format(
19
+ ((abstract_score[0]["score"] + claims_score[0]["score"]) / 2) * 100
20
+ )
21
+ if abstract_label == claims_label:
22
+ st.session_state.viability = abstract_label
23
+ else:
24
+ if abstract_score[0]["score"] > claims_score[0]["label"]:
25
+ st.session_state.viability = abstract_label
26
+ else:
27
+ st.session_state.viability = claims_label
28
 
29
 
30
+ checkpoint_file = "./checkpoint-3024"
31
+ model = AutoModelForSequenceClassification.from_pretrained(checkpoint_file)
32
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint_file)
33
+ pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
34
 
35
+ dataset_dict = load_dataset('HUPD/hupd',
36
+ name='sample',
37
+ data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
38
+ icpr_label=None,
39
+ train_filing_start_date='2016-01-01',
40
+ train_filing_end_date='2016-01-21',
41
+ val_filing_start_date='2016-01-22',
42
+ val_filing_end_date='2016-01-31',
43
+ )
44
 
45
+ dataset = dataset_dict["train"]
 
46
 
47
+ abstract_dict = {}
48
+ claims_dict = {}
49
 
50
+ for i in range(10):
51
+ abstract_dict[dataset["title"][i]] = dataset["abstract"][i]
52
+ claims_dict[dataset["title"][i]] = dataset["claims"][i]
53
+
54
+ st.title("Patent Vibility Score Checker")
55
+
56
+ chosen_patent = st.selectbox(
57
+ "Chose a patent to run the checker on", options=abstract_dict.keys())
58
+ abstract = st.text_area(
59
+ label="Abstract",
60
+ value=abstract_dict[chosen_patent]
61
+ )
62
+ claims = st.text_area(
63
+ label="Claims",
64
+ value=claims_dict[chosen_patent]
65
+ )
66
+
67
+ st.button("Check Viability", on_click=get_patent_score,
68
+ options=(pipeline, abstract, claims))
69
+
70
+ st.markdown(body="Outcome: {}, Score: {}%".format(
71
+ st.session_state.viability, st.session_state.score))
72
+
73
+ # Milestone-2
74
+ # if "sentiment" not in st.session_state:
75
+ # st.session_state.sentiment = ""
76
+
77
+ # if "score" not in st.session_state:
78
+ # st.session_state.score = ""
79
+
80
+
81
+ # def run_model(text_in, model_in):
82
+ # classifier = pipeline(task="sentiment-analysis",
83
+ # model=model_in)
84
+ # analysis = classifier(text_in)
85
+ # st.session_state.sentiment = analysis[0]["label"]
86
+ # st.session_state.score = "{:.2f}".format(analysis[0]["score"] * 100)
87
+
88
+
89
+ # models_available = {"Roberta Large English": "siebert/sentiment-roberta-large-english",
90
+ # "Generic": "Seethal/sentiment_analysis_generic_dataset",
91
+ # "Twitter Roberta": "cardiffnlp/twitter-roberta-base-sentiment"}
92
+
93
+ # st.title("Sentiment Analysis Web Application")
94
+ # text_input = st.text_area(
95
+ # label="Enter the text to analyze", value="I Love Pizza")
96
+ # model_picked = st.selectbox(
97
+ # "Choose a model to run on", options=models_available.keys())
98
+
99
+ # st.button("Submit", on_click=run_model, args=(
100
+ # text_input, models_available[model_picked]))
101
+
102
+ # st.markdown(body="Sentiment: {}, Confidence Score: {} %".format(
103
+ # st.session_state.sentiment, st.session_state.score))