Spaces:
Runtime error
Runtime error
EmicoBinsfinder
commited on
Commit
•
e785b84
1
Parent(s):
00db84e
Changing Layout and Model Call
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
|
|
4 |
import torch
|
5 |
import torch.nn as nn
|
6 |
import transformers
|
7 |
-
from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, AutoModel
|
8 |
import pandas as pd
|
9 |
import tensorflow as tf
|
10 |
import numpy as np
|
@@ -27,14 +27,14 @@ extra_stopwords = ['ii', 'iii'] # Can add extra stopwords to be removed from dat
|
|
27 |
all_stopwords.extend(extra_stopwords)
|
28 |
|
29 |
########### GET CLAIMED TRAINED MODEL ###########
|
30 |
-
|
31 |
-
#model_path = os.environ.get("MODEL_PATH")
|
32 |
|
33 |
model = LlamaForCausalLM.from_pretrained(
|
34 |
-
|
35 |
load_in_8bit=True,
|
36 |
-
device_map=
|
37 |
-
|
|
|
38 |
########## DEFINING FUNCTIONS ###################
|
39 |
|
40 |
def mean_pooling(model_output, attention_mask):
|
@@ -108,7 +108,6 @@ def convert_saved_embeddings(embedding_string):
|
|
108 |
embedding = torch.from_numpy(embedding)
|
109 |
return embedding
|
110 |
|
111 |
-
|
112 |
########## LOADING PRE-COMPUTED EMBEDDINGS ##########
|
113 |
|
114 |
def clean_data(input, type='Dataframe'):
|
@@ -182,13 +181,8 @@ def classifier(userin, SearchType):
|
|
182 |
broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, SearchType, Number, Sensitivity='High')
|
183 |
|
184 |
return broad_scope_predictions
|
185 |
-
|
186 |
|
187 |
def generateresponse(history, temp, top_p, tokens):
|
188 |
-
|
189 |
-
"""
|
190 |
-
Model definition here:
|
191 |
-
"""
|
192 |
|
193 |
global model
|
194 |
global tokenizer
|
@@ -200,36 +194,22 @@ def generateresponse(history, temp, top_p, tokens):
|
|
200 |
{user}
|
201 |
### Response:"""
|
202 |
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
generation_config = GenerationConfig(
|
210 |
temperature=temp,
|
211 |
top_p=top_p,
|
212 |
-
repetition_penalty=1.15
|
213 |
-
)
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
return_dict_in_generate=True,
|
219 |
-
output_scores=True,
|
220 |
-
max_new_tokens=tokens,
|
221 |
-
)
|
222 |
-
output = []
|
223 |
-
for s in generation_output.sequences:
|
224 |
-
output.append(tokenizer.decode(s))
|
225 |
-
print(tokenizer.decode(s))
|
226 |
-
|
227 |
-
outputs = (output[0].split('### Response:'))[1]
|
228 |
|
229 |
response = f"Response: {outputs}"
|
230 |
-
|
231 |
-
print(history)
|
232 |
-
return history
|
233 |
|
234 |
|
235 |
def run_model(userin, dropd):
|
@@ -239,39 +219,61 @@ def run_model(userin, dropd):
|
|
239 |
|
240 |
if dropd in ["Apparatus Claim", "Method of Use Claim", "Method Claim"]:
|
241 |
PROMPT = claim_selector(userin, dropd)
|
242 |
-
elif dropd in ["Generate a Description", "Generate a Abstract", "What are the Benefits/Technical Effects"]:
|
243 |
PROMPT = desc_selector(userin, dropd)
|
244 |
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
generation_config = GenerationConfig(
|
252 |
-
temperature=0.6,
|
253 |
top_p=0.95,
|
254 |
-
repetition_penalty=1.15
|
255 |
-
)
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
output_scores=True,
|
262 |
-
max_new_tokens=256,
|
263 |
-
)
|
264 |
-
output = []
|
265 |
-
for s in generation_output.sequences:
|
266 |
-
output.append(tokenizer.decode(s))
|
267 |
-
print(tokenizer.decode(s))
|
268 |
-
|
269 |
-
outputs = (output[0].split('### Response:'))[1]
|
270 |
|
271 |
response = f"Response: {outputs}"
|
272 |
-
|
273 |
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
|
|
|
|
|
|
275 |
def claim_selector(userin, dropd):
|
276 |
|
277 |
PROMPT = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
@@ -302,13 +304,10 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
|
|
302 |
|
303 |
gr.Markdown("""
|
304 |
# CLAIMED - A GENERATIVE TOOLKIT FOR PATENT ATTORNEYS
|
305 |
-
|
306 |
The patenting process can be incredibly time-consuming and expensive. We're on a mission to change that.
|
307 |
|
308 |
Welcome to our demo! We've trained Meta's Llama on over 200k entries, with a focus on tasks related to the intellectual property domain.
|
309 |
-
|
310 |
Please note that this is for research purposes and shouldn't be used commercially.
|
311 |
-
|
312 |
None of the outputs of this model, taken in part or in its entirety, constitutes legal advice. If you are seeking protection for you intellectual property, consult a registered patent/trademark attorney.
|
313 |
|
314 |
""")
|
@@ -331,7 +330,7 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
|
|
331 |
gr.Markdown("""
|
332 |
Use this tool to expand your patent claim into a description. You can also use this tool to generate abstracts and give you ideas about the benefit of an invention by changing the settings in the dropdown menu.
|
333 |
""")
|
334 |
-
Descriptionchoices = gr.Dropdown(["Generate a Description", "Generate a Abstract", "What are the Benefits/Technical Effects"], label='Choose Generation Type Here')
|
335 |
with gr.Row(scale=1, min_width=600):
|
336 |
|
337 |
text1 = gr.Textbox(label="Input",
|
@@ -340,24 +339,21 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
|
|
340 |
with gr.Row():
|
341 |
btn = gr.Button("Submit")
|
342 |
btn.click(fn=desc_selector, inputs=[text1, Descriptionchoices]).then(run_model, inputs=[text1, Descriptionchoices], outputs=text2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
|
344 |
-
# with gr.Tab("Knowledge Graph"):
|
345 |
-
# gr.Markdown("""
|
346 |
-
# Use this tool to generate a knowledge graph of your invention. This will help highlight the links between features.
|
347 |
-
# """)
|
348 |
-
# with gr.Row(scale=1, min_width=600):
|
349 |
-
# text1 = gr.Textbox(label="Input",
|
350 |
-
# placeholder='Type in your idea here!')
|
351 |
-
# text2 = gr.Textbox(label="Output")
|
352 |
-
|
353 |
-
# with gr.Tab("Prosecution Ideator"):
|
354 |
-
# gr.Markdown("""
|
355 |
-
# Use this tool to generate ideas for how to overcome objections to novelty and inventive step. Outputs are in the problem-solution format.
|
356 |
-
# """)
|
357 |
-
# with gr.Row(scale=1, min_width=600):
|
358 |
-
# text1 = gr.Textbox(label="Input",
|
359 |
-
# placeholder='Type in your idea here!')
|
360 |
-
# text2 = gr.Textbox(label="Output")
|
361 |
|
362 |
# with gr.Tab("Claimed Infill"):
|
363 |
# gr.Markdown("""
|
@@ -376,7 +372,6 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
|
|
376 |
with gr.Tab("CPC Search Tool"):
|
377 |
gr.Markdown("""
|
378 |
Use this tool to classify your invention according to the Cooperative Patent Classification system.
|
379 |
-
|
380 |
Click on the link to initiate either an Espacenet or Google Patents classification search using the generated classifications. You can specify which you would like using the dropdown menu.
|
381 |
""")
|
382 |
|
@@ -389,18 +384,11 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
|
|
389 |
classify_btn = gr.Button("Classify")
|
390 |
classify_btn.click(fn=classifier, inputs=[userin, ClassifyChoices] , outputs=output)
|
391 |
|
392 |
-
|
393 |
gr.Markdown("""
|
394 |
-
|
395 |
# THE CHATBOT
|
396 |
-
|
397 |
Do you want a bit more freedom over the outputs you generate? No worries, you can use a chatbot version of our model below. You can ask it anything.
|
398 |
-
|
399 |
If you're concerned about a particular output, hit the flag button and we will use that information to improve the model.
|
400 |
-
|
401 |
-
|
402 |
""")
|
403 |
-
|
404 |
|
405 |
chatbot = gr.Chatbot([], elem_id="Claimed Assistant").style(height=500)
|
406 |
with gr.Row():
|
@@ -422,16 +410,11 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
|
|
422 |
|
423 |
gr.Markdown("""
|
424 |
# HAVE AN IDEA? GET IT CLAIMED
|
425 |
-
|
426 |
In the future, we are looking to expand our model's capabilities further to assist in a range of IP related tasks.
|
427 |
-
|
428 |
If you are interested in using a more powerful model that we have trained, or if you have any suggestions of features you would like to see us add, please get in touch!
|
429 |
-
|
430 |
As far as data is concerned, you have nothing to worry about! We don't store any of your inputs to use for further training, we're not OpenAI.
|
431 |
|
432 |
""")
|
433 |
-
|
434 |
-
#
|
435 |
-
|
436 |
demo.queue(concurrency_count=9)
|
437 |
-
demo.launch()
|
|
|
|
4 |
import torch
|
5 |
import torch.nn as nn
|
6 |
import transformers
|
7 |
+
from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, AutoModel, pipeline
|
8 |
import pandas as pd
|
9 |
import tensorflow as tf
|
10 |
import numpy as np
|
|
|
27 |
all_stopwords.extend(extra_stopwords)
|
28 |
|
29 |
########### GET CLAIMED TRAINED MODEL ###########
|
30 |
+
tokenizer = LlamaTokenizer.from_pretrained("samwit/koala-7b")
|
|
|
31 |
|
32 |
model = LlamaForCausalLM.from_pretrained(
|
33 |
+
"samwit/koala-7b",
|
34 |
load_in_8bit=True,
|
35 |
+
device_map='auto',
|
36 |
+
)
|
37 |
+
|
38 |
########## DEFINING FUNCTIONS ###################
|
39 |
|
40 |
def mean_pooling(model_output, attention_mask):
|
|
|
108 |
embedding = torch.from_numpy(embedding)
|
109 |
return embedding
|
110 |
|
|
|
111 |
########## LOADING PRE-COMPUTED EMBEDDINGS ##########
|
112 |
|
113 |
def clean_data(input, type='Dataframe'):
|
|
|
181 |
broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, SearchType, Number, Sensitivity='High')
|
182 |
|
183 |
return broad_scope_predictions
|
|
|
184 |
|
185 |
def generateresponse(history, temp, top_p, tokens):
|
|
|
|
|
|
|
|
|
186 |
|
187 |
global model
|
188 |
global tokenizer
|
|
|
194 |
{user}
|
195 |
### Response:"""
|
196 |
|
197 |
+
pipe = pipeline(
|
198 |
+
"text-generation",
|
199 |
+
model=model,
|
200 |
+
tokenizer=tokenizer,
|
201 |
+
max_length=512,
|
|
|
|
|
202 |
temperature=temp,
|
203 |
top_p=top_p,
|
204 |
+
repetition_penalty=1.15
|
205 |
+
)
|
206 |
+
|
207 |
+
outputs = pipe(PROMPT)
|
208 |
+
outputs = outputs[0]['generated_text']
|
209 |
+
outputs = str(outputs).split('### Response')[1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
response = f"Response: {outputs}"
|
212 |
+
return response
|
|
|
|
|
213 |
|
214 |
|
215 |
def run_model(userin, dropd):
|
|
|
219 |
|
220 |
if dropd in ["Apparatus Claim", "Method of Use Claim", "Method Claim"]:
|
221 |
PROMPT = claim_selector(userin, dropd)
|
222 |
+
elif dropd in ["Generate a Description Paragraph", "Generate a Abstract", "What are the Benefits/Technical Effects"]:
|
223 |
PROMPT = desc_selector(userin, dropd)
|
224 |
|
225 |
+
pipe = pipeline(
|
226 |
+
"text-generation",
|
227 |
+
model=model,
|
228 |
+
tokenizer=tokenizer,
|
229 |
+
max_length=512,
|
230 |
+
temperature=0.7,
|
|
|
|
|
231 |
top_p=0.95,
|
232 |
+
repetition_penalty=1.15
|
233 |
+
)
|
234 |
+
|
235 |
+
outputs = pipe(PROMPT)
|
236 |
+
|
237 |
+
outputs = outputs[0]['generated_text']
|
238 |
+
outputs = str(outputs).split('### Response')[1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
|
240 |
response = f"Response: {outputs}"
|
|
|
241 |
return response
|
242 |
+
|
243 |
+
def prosecute(application, priorart, dropd):
|
244 |
+
|
245 |
+
global model
|
246 |
+
global tokenizer
|
247 |
+
|
248 |
+
pipe = pipeline(
|
249 |
+
"text-generation",
|
250 |
+
model=model,
|
251 |
+
tokenizer=tokenizer,
|
252 |
+
max_length=512,
|
253 |
+
temperature=0.7,
|
254 |
+
top_p=0.95,
|
255 |
+
repetition_penalty=1.15
|
256 |
+
)
|
257 |
+
|
258 |
+
PROMPT = f"""
|
259 |
+
Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
260 |
+
### Instruction:
|
261 |
+
Draft a patent novelty/inventive step argument using the {dropd} approach:
|
262 |
+
|
263 |
+
Application: {application}
|
264 |
+
|
265 |
+
Prior Art: {priorart}
|
266 |
+
|
267 |
+
### Response:"""
|
268 |
+
|
269 |
+
outputs = pipe(PROMPT)
|
270 |
+
|
271 |
+
outputs = outputs[0]['generated_text']
|
272 |
+
outputs = str(outputs).split('### Response')[1]
|
273 |
|
274 |
+
response = f"Response: {outputs}"
|
275 |
+
return response
|
276 |
+
|
277 |
def claim_selector(userin, dropd):
|
278 |
|
279 |
PROMPT = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
|
|
304 |
|
305 |
gr.Markdown("""
|
306 |
# CLAIMED - A GENERATIVE TOOLKIT FOR PATENT ATTORNEYS
|
|
|
307 |
The patenting process can be incredibly time-consuming and expensive. We're on a mission to change that.
|
308 |
|
309 |
Welcome to our demo! We've trained Meta's Llama on over 200k entries, with a focus on tasks related to the intellectual property domain.
|
|
|
310 |
Please note that this is for research purposes and shouldn't be used commercially.
|
|
|
311 |
None of the outputs of this model, taken in part or in its entirety, constitutes legal advice. If you are seeking protection for you intellectual property, consult a registered patent/trademark attorney.
|
312 |
|
313 |
""")
|
|
|
330 |
gr.Markdown("""
|
331 |
Use this tool to expand your patent claim into a description. You can also use this tool to generate abstracts and give you ideas about the benefit of an invention by changing the settings in the dropdown menu.
|
332 |
""")
|
333 |
+
Descriptionchoices = gr.Dropdown(["Generate a Description Paragraph", "Generate a Abstract", "What are the Benefits/Technical Effects"], label='Choose Generation Type Here')
|
334 |
with gr.Row(scale=1, min_width=600):
|
335 |
|
336 |
text1 = gr.Textbox(label="Input",
|
|
|
339 |
with gr.Row():
|
340 |
btn = gr.Button("Submit")
|
341 |
btn.click(fn=desc_selector, inputs=[text1, Descriptionchoices]).then(run_model, inputs=[text1, Descriptionchoices], outputs=text2)
|
342 |
+
|
343 |
+
with gr.Tab("Prosecution Beta"):
|
344 |
+
gr.Markdown("""
|
345 |
+
Use this tool to generate ideas for how to overcome objections to novelty and inventive step. Outputs are in the EPO's problem-solution format. For now, this tool only works on relatively short inputs, so maybe try with some simple claims or short paragraphs.
|
346 |
+
""")
|
347 |
+
dropd = gr.Dropdown(["Problem Solution", "Windsurfing/Pozzoli"], label='Choose Generation Type Here')
|
348 |
+
with gr.Row(scale=1, min_width=600):
|
349 |
+
with gr.Column():
|
350 |
+
application = gr.Text(label="Present Invention")
|
351 |
+
priorart = gr.Text(label="Prior Art Document")
|
352 |
+
text2 = gr.Textbox(label="Output")
|
353 |
+
with gr.Row():
|
354 |
+
btn = gr.Button("Submit")
|
355 |
+
btn.click(fn=prosecute, inputs=[application, priorart, dropd], outputs=text2)
|
356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
|
358 |
# with gr.Tab("Claimed Infill"):
|
359 |
# gr.Markdown("""
|
|
|
372 |
with gr.Tab("CPC Search Tool"):
|
373 |
gr.Markdown("""
|
374 |
Use this tool to classify your invention according to the Cooperative Patent Classification system.
|
|
|
375 |
Click on the link to initiate either an Espacenet or Google Patents classification search using the generated classifications. You can specify which you would like using the dropdown menu.
|
376 |
""")
|
377 |
|
|
|
384 |
classify_btn = gr.Button("Classify")
|
385 |
classify_btn.click(fn=classifier, inputs=[userin, ClassifyChoices] , outputs=output)
|
386 |
|
|
|
387 |
gr.Markdown("""
|
|
|
388 |
# THE CHATBOT
|
|
|
389 |
Do you want a bit more freedom over the outputs you generate? No worries, you can use a chatbot version of our model below. You can ask it anything.
|
|
|
390 |
If you're concerned about a particular output, hit the flag button and we will use that information to improve the model.
|
|
|
|
|
391 |
""")
|
|
|
392 |
|
393 |
chatbot = gr.Chatbot([], elem_id="Claimed Assistant").style(height=500)
|
394 |
with gr.Row():
|
|
|
410 |
|
411 |
gr.Markdown("""
|
412 |
# HAVE AN IDEA? GET IT CLAIMED
|
|
|
413 |
In the future, we are looking to expand our model's capabilities further to assist in a range of IP related tasks.
|
|
|
414 |
If you are interested in using a more powerful model that we have trained, or if you have any suggestions of features you would like to see us add, please get in touch!
|
|
|
415 |
As far as data is concerned, you have nothing to worry about! We don't store any of your inputs to use for further training, we're not OpenAI.
|
416 |
|
417 |
""")
|
|
|
|
|
|
|
418 |
demo.queue(concurrency_count=9)
|
419 |
+
demo.launch(share=True)
|
420 |
+
|