EmicoBinsfinder commited on
Commit
e785b84
1 Parent(s): 00db84e

Changing Layout and Model Call

Browse files
Files changed (1) hide show
  1. app.py +83 -100
app.py CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
4
  import torch
5
  import torch.nn as nn
6
  import transformers
7
- from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, AutoModel
8
  import pandas as pd
9
  import tensorflow as tf
10
  import numpy as np
@@ -27,14 +27,14 @@ extra_stopwords = ['ii', 'iii'] # Can add extra stopwords to be removed from dat
27
  all_stopwords.extend(extra_stopwords)
28
 
29
  ########### GET CLAIMED TRAINED MODEL ###########
30
- auth_token = os.environ.get("AUTH_TOKEN_SECRET")
31
- #model_path = os.environ.get("MODEL_PATH")
32
 
33
  model = LlamaForCausalLM.from_pretrained(
34
- "Claimed/CapybaraProper", use_auth_token=auth_token,
35
  load_in_8bit=True,
36
- device_map="auto") #low_cpu_mem_usage=True)
37
- tokenizer = LlamaTokenizer.from_pretrained("Claimed/CapybaraProper", use_auth_token=auth_token)
 
38
  ########## DEFINING FUNCTIONS ###################
39
 
40
  def mean_pooling(model_output, attention_mask):
@@ -108,7 +108,6 @@ def convert_saved_embeddings(embedding_string):
108
  embedding = torch.from_numpy(embedding)
109
  return embedding
110
 
111
-
112
  ########## LOADING PRE-COMPUTED EMBEDDINGS ##########
113
 
114
  def clean_data(input, type='Dataframe'):
@@ -182,13 +181,8 @@ def classifier(userin, SearchType):
182
  broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, SearchType, Number, Sensitivity='High')
183
 
184
  return broad_scope_predictions
185
-
186
 
187
  def generateresponse(history, temp, top_p, tokens):
188
-
189
- """
190
- Model definition here:
191
- """
192
 
193
  global model
194
  global tokenizer
@@ -200,36 +194,22 @@ def generateresponse(history, temp, top_p, tokens):
200
  {user}
201
  ### Response:"""
202
 
203
- inputs = tokenizer(
204
- PROMPT,
205
- return_tensors="pt",
206
- )
207
- input_ids = inputs["input_ids"].cuda()
208
-
209
- generation_config = GenerationConfig(
210
  temperature=temp,
211
  top_p=top_p,
212
- repetition_penalty=1.15,
213
- )
214
- print("Generating...")
215
- generation_output = model.generate(
216
- input_ids=input_ids,
217
- generation_config=generation_config,
218
- return_dict_in_generate=True,
219
- output_scores=True,
220
- max_new_tokens=tokens,
221
- )
222
- output = []
223
- for s in generation_output.sequences:
224
- output.append(tokenizer.decode(s))
225
- print(tokenizer.decode(s))
226
-
227
- outputs = (output[0].split('### Response:'))[1]
228
 
229
  response = f"Response: {outputs}"
230
- history[-1][1] = response
231
- print(history)
232
- return history
233
 
234
 
235
  def run_model(userin, dropd):
@@ -239,39 +219,61 @@ def run_model(userin, dropd):
239
 
240
  if dropd in ["Apparatus Claim", "Method of Use Claim", "Method Claim"]:
241
  PROMPT = claim_selector(userin, dropd)
242
- elif dropd in ["Generate a Description", "Generate a Abstract", "What are the Benefits/Technical Effects"]:
243
  PROMPT = desc_selector(userin, dropd)
244
 
245
- inputs = tokenizer(
246
- PROMPT,
247
- return_tensors="pt",
248
- )
249
- input_ids = inputs["input_ids"].cuda()
250
-
251
- generation_config = GenerationConfig(
252
- temperature=0.6,
253
  top_p=0.95,
254
- repetition_penalty=1.15,
255
- )
256
- print("Generating...")
257
- generation_output = model.generate(
258
- input_ids=input_ids,
259
- generation_config=generation_config,
260
- return_dict_in_generate=True,
261
- output_scores=True,
262
- max_new_tokens=256,
263
- )
264
- output = []
265
- for s in generation_output.sequences:
266
- output.append(tokenizer.decode(s))
267
- print(tokenizer.decode(s))
268
-
269
- outputs = (output[0].split('### Response:'))[1]
270
 
271
  response = f"Response: {outputs}"
272
-
273
  return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
 
 
 
275
  def claim_selector(userin, dropd):
276
 
277
  PROMPT = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
@@ -302,13 +304,10 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
302
 
303
  gr.Markdown("""
304
  # CLAIMED - A GENERATIVE TOOLKIT FOR PATENT ATTORNEYS
305
-
306
  The patenting process can be incredibly time-consuming and expensive. We're on a mission to change that.
307
 
308
  Welcome to our demo! We've trained Meta's Llama on over 200k entries, with a focus on tasks related to the intellectual property domain.
309
-
310
  Please note that this is for research purposes and shouldn't be used commercially.
311
-
312
  None of the outputs of this model, taken in part or in its entirety, constitutes legal advice. If you are seeking protection for you intellectual property, consult a registered patent/trademark attorney.
313
 
314
  """)
@@ -331,7 +330,7 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
331
  gr.Markdown("""
332
  Use this tool to expand your patent claim into a description. You can also use this tool to generate abstracts and give you ideas about the benefit of an invention by changing the settings in the dropdown menu.
333
  """)
334
- Descriptionchoices = gr.Dropdown(["Generate a Description", "Generate a Abstract", "What are the Benefits/Technical Effects"], label='Choose Generation Type Here')
335
  with gr.Row(scale=1, min_width=600):
336
 
337
  text1 = gr.Textbox(label="Input",
@@ -340,24 +339,21 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
340
  with gr.Row():
341
  btn = gr.Button("Submit")
342
  btn.click(fn=desc_selector, inputs=[text1, Descriptionchoices]).then(run_model, inputs=[text1, Descriptionchoices], outputs=text2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
- # with gr.Tab("Knowledge Graph"):
345
- # gr.Markdown("""
346
- # Use this tool to generate a knowledge graph of your invention. This will help highlight the links between features.
347
- # """)
348
- # with gr.Row(scale=1, min_width=600):
349
- # text1 = gr.Textbox(label="Input",
350
- # placeholder='Type in your idea here!')
351
- # text2 = gr.Textbox(label="Output")
352
-
353
- # with gr.Tab("Prosecution Ideator"):
354
- # gr.Markdown("""
355
- # Use this tool to generate ideas for how to overcome objections to novelty and inventive step. Outputs are in the problem-solution format.
356
- # """)
357
- # with gr.Row(scale=1, min_width=600):
358
- # text1 = gr.Textbox(label="Input",
359
- # placeholder='Type in your idea here!')
360
- # text2 = gr.Textbox(label="Output")
361
 
362
  # with gr.Tab("Claimed Infill"):
363
  # gr.Markdown("""
@@ -376,7 +372,6 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
376
  with gr.Tab("CPC Search Tool"):
377
  gr.Markdown("""
378
  Use this tool to classify your invention according to the Cooperative Patent Classification system.
379
-
380
  Click on the link to initiate either an Espacenet or Google Patents classification search using the generated classifications. You can specify which you would like using the dropdown menu.
381
  """)
382
 
@@ -389,18 +384,11 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
389
  classify_btn = gr.Button("Classify")
390
  classify_btn.click(fn=classifier, inputs=[userin, ClassifyChoices] , outputs=output)
391
 
392
-
393
  gr.Markdown("""
394
-
395
  # THE CHATBOT
396
-
397
  Do you want a bit more freedom over the outputs you generate? No worries, you can use a chatbot version of our model below. You can ask it anything.
398
-
399
  If you're concerned about a particular output, hit the flag button and we will use that information to improve the model.
400
-
401
-
402
  """)
403
-
404
 
405
  chatbot = gr.Chatbot([], elem_id="Claimed Assistant").style(height=500)
406
  with gr.Row():
@@ -422,16 +410,11 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
422
 
423
  gr.Markdown("""
424
  # HAVE AN IDEA? GET IT CLAIMED
425
-
426
  In the future, we are looking to expand our model's capabilities further to assist in a range of IP related tasks.
427
-
428
  If you are interested in using a more powerful model that we have trained, or if you have any suggestions of features you would like to see us add, please get in touch!
429
-
430
  As far as data is concerned, you have nothing to worry about! We don't store any of your inputs to use for further training, we're not OpenAI.
431
 
432
  """)
433
-
434
- #
435
-
436
  demo.queue(concurrency_count=9)
437
- demo.launch()
 
 
4
  import torch
5
  import torch.nn as nn
6
  import transformers
7
+ from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, AutoModel, pipeline
8
  import pandas as pd
9
  import tensorflow as tf
10
  import numpy as np
 
27
  all_stopwords.extend(extra_stopwords)
28
 
29
  ########### GET CLAIMED TRAINED MODEL ###########
30
+ tokenizer = LlamaTokenizer.from_pretrained("samwit/koala-7b")
 
31
 
32
  model = LlamaForCausalLM.from_pretrained(
33
+ "samwit/koala-7b",
34
  load_in_8bit=True,
35
+ device_map='auto',
36
+ )
37
+
38
  ########## DEFINING FUNCTIONS ###################
39
 
40
  def mean_pooling(model_output, attention_mask):
 
108
  embedding = torch.from_numpy(embedding)
109
  return embedding
110
 
 
111
  ########## LOADING PRE-COMPUTED EMBEDDINGS ##########
112
 
113
  def clean_data(input, type='Dataframe'):
 
181
  broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, SearchType, Number, Sensitivity='High')
182
 
183
  return broad_scope_predictions
 
184
 
185
  def generateresponse(history, temp, top_p, tokens):
 
 
 
 
186
 
187
  global model
188
  global tokenizer
 
194
  {user}
195
  ### Response:"""
196
 
197
+ pipe = pipeline(
198
+ "text-generation",
199
+ model=model,
200
+ tokenizer=tokenizer,
201
+ max_length=512,
 
 
202
  temperature=temp,
203
  top_p=top_p,
204
+ repetition_penalty=1.15
205
+ )
206
+
207
+ outputs = pipe(PROMPT)
208
+ outputs = outputs[0]['generated_text']
209
+ outputs = str(outputs).split('### Response')[1]
 
 
 
 
 
 
 
 
 
 
210
 
211
  response = f"Response: {outputs}"
212
+ return response
 
 
213
 
214
 
215
  def run_model(userin, dropd):
 
219
 
220
  if dropd in ["Apparatus Claim", "Method of Use Claim", "Method Claim"]:
221
  PROMPT = claim_selector(userin, dropd)
222
+ elif dropd in ["Generate a Description Paragraph", "Generate a Abstract", "What are the Benefits/Technical Effects"]:
223
  PROMPT = desc_selector(userin, dropd)
224
 
225
+ pipe = pipeline(
226
+ "text-generation",
227
+ model=model,
228
+ tokenizer=tokenizer,
229
+ max_length=512,
230
+ temperature=0.7,
 
 
231
  top_p=0.95,
232
+ repetition_penalty=1.15
233
+ )
234
+
235
+ outputs = pipe(PROMPT)
236
+
237
+ outputs = outputs[0]['generated_text']
238
+ outputs = str(outputs).split('### Response')[1]
 
 
 
 
 
 
 
 
 
239
 
240
  response = f"Response: {outputs}"
 
241
  return response
242
+
243
+ def prosecute(application, priorart, dropd):
244
+
245
+ global model
246
+ global tokenizer
247
+
248
+ pipe = pipeline(
249
+ "text-generation",
250
+ model=model,
251
+ tokenizer=tokenizer,
252
+ max_length=512,
253
+ temperature=0.7,
254
+ top_p=0.95,
255
+ repetition_penalty=1.15
256
+ )
257
+
258
+ PROMPT = f"""
259
+ Below is an instruction that describes a task. Write a response that appropriately completes the request.
260
+ ### Instruction:
261
+ Draft a patent novelty/inventive step argument using the {dropd} approach:
262
+
263
+ Application: {application}
264
+
265
+ Prior Art: {priorart}
266
+
267
+ ### Response:"""
268
+
269
+ outputs = pipe(PROMPT)
270
+
271
+ outputs = outputs[0]['generated_text']
272
+ outputs = str(outputs).split('### Response')[1]
273
 
274
+ response = f"Response: {outputs}"
275
+ return response
276
+
277
  def claim_selector(userin, dropd):
278
 
279
  PROMPT = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
 
304
 
305
  gr.Markdown("""
306
  # CLAIMED - A GENERATIVE TOOLKIT FOR PATENT ATTORNEYS
 
307
  The patenting process can be incredibly time-consuming and expensive. We're on a mission to change that.
308
 
309
  Welcome to our demo! We've trained Meta's Llama on over 200k entries, with a focus on tasks related to the intellectual property domain.
 
310
  Please note that this is for research purposes and shouldn't be used commercially.
 
311
  None of the outputs of this model, taken in part or in its entirety, constitutes legal advice. If you are seeking protection for you intellectual property, consult a registered patent/trademark attorney.
312
 
313
  """)
 
330
  gr.Markdown("""
331
  Use this tool to expand your patent claim into a description. You can also use this tool to generate abstracts and give you ideas about the benefit of an invention by changing the settings in the dropdown menu.
332
  """)
333
+ Descriptionchoices = gr.Dropdown(["Generate a Description Paragraph", "Generate a Abstract", "What are the Benefits/Technical Effects"], label='Choose Generation Type Here')
334
  with gr.Row(scale=1, min_width=600):
335
 
336
  text1 = gr.Textbox(label="Input",
 
339
  with gr.Row():
340
  btn = gr.Button("Submit")
341
  btn.click(fn=desc_selector, inputs=[text1, Descriptionchoices]).then(run_model, inputs=[text1, Descriptionchoices], outputs=text2)
342
+
343
+ with gr.Tab("Prosecution Beta"):
344
+ gr.Markdown("""
345
+ Use this tool to generate ideas for how to overcome objections to novelty and inventive step. Outputs are in the EPO's problem-solution format. For now, this tool only works on relatively short inputs, so maybe try with some simple claims or short paragraphs.
346
+ """)
347
+ dropd = gr.Dropdown(["Problem Solution", "Windsurfing/Pozzoli"], label='Choose Generation Type Here')
348
+ with gr.Row(scale=1, min_width=600):
349
+ with gr.Column():
350
+ application = gr.Text(label="Present Invention")
351
+ priorart = gr.Text(label="Prior Art Document")
352
+ text2 = gr.Textbox(label="Output")
353
+ with gr.Row():
354
+ btn = gr.Button("Submit")
355
+ btn.click(fn=prosecute, inputs=[application, priorart, dropd], outputs=text2)
356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
 
358
  # with gr.Tab("Claimed Infill"):
359
  # gr.Markdown("""
 
372
  with gr.Tab("CPC Search Tool"):
373
  gr.Markdown("""
374
  Use this tool to classify your invention according to the Cooperative Patent Classification system.
 
375
  Click on the link to initiate either an Espacenet or Google Patents classification search using the generated classifications. You can specify which you would like using the dropdown menu.
376
  """)
377
 
 
384
  classify_btn = gr.Button("Classify")
385
  classify_btn.click(fn=classifier, inputs=[userin, ClassifyChoices] , outputs=output)
386
 
 
387
  gr.Markdown("""
 
388
  # THE CHATBOT
 
389
  Do you want a bit more freedom over the outputs you generate? No worries, you can use a chatbot version of our model below. You can ask it anything.
 
390
  If you're concerned about a particular output, hit the flag button and we will use that information to improve the model.
 
 
391
  """)
 
392
 
393
  chatbot = gr.Chatbot([], elem_id="Claimed Assistant").style(height=500)
394
  with gr.Row():
 
410
 
411
  gr.Markdown("""
412
  # HAVE AN IDEA? GET IT CLAIMED
 
413
  In the future, we are looking to expand our model's capabilities further to assist in a range of IP related tasks.
 
414
  If you are interested in using a more powerful model that we have trained, or if you have any suggestions of features you would like to see us add, please get in touch!
 
415
  As far as data is concerned, you have nothing to worry about! We don't store any of your inputs to use for further training, we're not OpenAI.
416
 
417
  """)
 
 
 
418
  demo.queue(concurrency_count=9)
419
+ demo.launch(share=True)
420
+