Spaces:

GT4SD
/

PatentToolkit

Runtime error

App Files Files Community

EmicoBinsfinder commited on Apr 15, 2023

Commit

e785b84

•

1 Parent(s): 00db84e

Changing Layout and Model Call

Browse files

Files changed (1) hide show

app.py +83 -100

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 import torch
 import torch.nn as nn
 import transformers
-from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, AutoModel
 import pandas as pd
 import tensorflow as tf
 import numpy as np
@@ -27,14 +27,14 @@ extra_stopwords = ['ii', 'iii'] # Can add extra stopwords to be removed from dat
 all_stopwords.extend(extra_stopwords)
 ########### GET CLAIMED TRAINED MODEL ###########
-auth_token = os.environ.get("AUTH_TOKEN_SECRET")
-#model_path = os.environ.get("MODEL_PATH")
 model = LlamaForCausalLM.from_pretrained(
-     "Claimed/CapybaraProper", use_auth_token=auth_token,
     load_in_8bit=True,
-    device_map="auto") #low_cpu_mem_usage=True)
-tokenizer = LlamaTokenizer.from_pretrained("Claimed/CapybaraProper", use_auth_token=auth_token)
 ########## DEFINING FUNCTIONS ###################
 def mean_pooling(model_output, attention_mask):
@@ -108,7 +108,6 @@ def convert_saved_embeddings(embedding_string):
     embedding = torch.from_numpy(embedding)
     return embedding
 ########## LOADING PRE-COMPUTED EMBEDDINGS ##########
 def clean_data(input, type='Dataframe'):
@@ -182,13 +181,8 @@ def classifier(userin, SearchType):
     broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, SearchType, Number, Sensitivity='High')
     return broad_scope_predictions
 def generateresponse(history, temp, top_p, tokens):
-    """
-    Model definition here:
-    """
     global model
     global tokenizer
@@ -200,36 +194,22 @@ def generateresponse(history, temp, top_p, tokens):
     {user}
     ### Response:"""
-    inputs = tokenizer(
-        PROMPT,
-        return_tensors="pt",
-    )
-    input_ids = inputs["input_ids"].cuda()
-    generation_config = GenerationConfig(
         temperature=temp,
         top_p=top_p,
-        repetition_penalty=1.15,
-    )
-    print("Generating...")
-    generation_output = model.generate(
-        input_ids=input_ids,
-        generation_config=generation_config,
-        return_dict_in_generate=True,
-        output_scores=True,
-        max_new_tokens=tokens,
-    )
-    output = []
-    for s in generation_output.sequences:
-        output.append(tokenizer.decode(s))
-        print(tokenizer.decode(s))
-    outputs = (output[0].split('### Response:'))[1]
     response = f"Response: {outputs}"
-    history[-1][1] = response
-    print(history)
-    return history
 def run_model(userin, dropd):
@@ -239,39 +219,61 @@ def run_model(userin, dropd):
     if dropd in ["Apparatus Claim", "Method of Use Claim", "Method Claim"]:
         PROMPT = claim_selector(userin, dropd)
-    elif dropd in ["Generate a Description", "Generate a Abstract", "What are the Benefits/Technical Effects"]:
         PROMPT = desc_selector(userin, dropd)
-    inputs = tokenizer(
-        PROMPT,
-        return_tensors="pt",
-    )
-    input_ids = inputs["input_ids"].cuda()
-    generation_config = GenerationConfig(
-        temperature=0.6,
         top_p=0.95,
-        repetition_penalty=1.15,
-    )
-    print("Generating...")
-    generation_output = model.generate(
-        input_ids=input_ids,
-        generation_config=generation_config,
-        return_dict_in_generate=True,
-        output_scores=True,
-        max_new_tokens=256,
-    )
-    output = []
-    for s in generation_output.sequences:
-        output.append(tokenizer.decode(s))
-        print(tokenizer.decode(s))
-    outputs = (output[0].split('### Response:'))[1]
     response = f"Response: {outputs}"
     return response
 def claim_selector(userin, dropd):
     PROMPT = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
@@ -302,13 +304,10 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
     gr.Markdown("""
     # CLAIMED - A GENERATIVE TOOLKIT FOR PATENT ATTORNEYS
     The patenting process can be incredibly time-consuming and expensive. We're on a mission to change that.
     Welcome to our demo! We've trained Meta's Llama on over 200k entries, with a focus on tasks related to the intellectual property domain.
     Please note that this is for research purposes and shouldn't be used commercially.
     None of the outputs of this model, taken in part or in its entirety, constitutes legal advice. If you are seeking protection for you intellectual property, consult a registered patent/trademark attorney.
     """)
@@ -331,7 +330,7 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
         gr.Markdown("""
         Use this tool to expand your patent claim into a description. You can also use this tool to generate abstracts and give you ideas about the benefit of an invention by changing the settings in the dropdown menu.
          """)
-        Descriptionchoices = gr.Dropdown(["Generate a Description", "Generate a Abstract", "What are the Benefits/Technical Effects"], label='Choose Generation Type Here')
         with gr.Row(scale=1, min_width=600):
             text1 = gr.Textbox(label="Input",
@@ -340,24 +339,21 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
         with gr.Row():
             btn = gr.Button("Submit")
             btn.click(fn=desc_selector, inputs=[text1, Descriptionchoices]).then(run_model, inputs=[text1, Descriptionchoices], outputs=text2)
-    # with gr.Tab("Knowledge Graph"):
-    #     gr.Markdown("""
-    #     Use this tool to generate a knowledge graph of your invention. This will help highlight the links between features.
-    #     """)
-    #     with gr.Row(scale=1, min_width=600):
-    #         text1 = gr.Textbox(label="Input",
-    #                           placeholder='Type in your idea here!')
-    #         text2 = gr.Textbox(label="Output")
-    # with gr.Tab("Prosecution Ideator"):
-    #     gr.Markdown("""
-    #     Use this tool to generate ideas for how to overcome objections to novelty and inventive step. Outputs are in the problem-solution format.
-    #     """)
-    #     with gr.Row(scale=1, min_width=600):
-    #         text1 = gr.Textbox(label="Input",
-    #                           placeholder='Type in your idea here!')
-    #         text2 = gr.Textbox(label="Output")
     # with gr.Tab("Claimed Infill"):
     #     gr.Markdown("""
@@ -376,7 +372,6 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
     with gr.Tab("CPC Search Tool"):
         gr.Markdown("""
         Use this tool to classify your invention according to the Cooperative Patent Classification system.
         Click on the link to initiate either an Espacenet or Google Patents classification search using the generated classifications. You can specify which you would like using the dropdown menu.
         """)
@@ -389,18 +384,11 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
             classify_btn = gr.Button("Classify")
             classify_btn.click(fn=classifier, inputs=[userin, ClassifyChoices] , outputs=output)
     gr.Markdown("""
     # THE CHATBOT
     Do you want a bit more freedom over the outputs you generate? No worries, you can use a chatbot version of our model below. You can ask it anything.
     If you're concerned about a particular output, hit the flag button and we will use that information to improve the model.
     """)
     chatbot = gr.Chatbot([], elem_id="Claimed Assistant").style(height=500)
     with gr.Row():
@@ -422,16 +410,11 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
     gr.Markdown("""
     # HAVE AN IDEA? GET IT CLAIMED
     In the future, we are looking to expand our model's capabilities further to assist in a range of IP related tasks.
     If you are interested in using a more powerful model that we have trained, or if you have any suggestions of features you would like to see us add, please get in touch!
     As far as data is concerned, you have nothing to worry about! We don't store any of your inputs to use for further training, we're not OpenAI.
     """)
-#
 demo.queue(concurrency_count=9)
-demo.launch()

 import torch
 import torch.nn as nn
 import transformers
+from transformers import AutoTokenizer, AutoConfig, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, AutoModel, pipeline
 import pandas as pd
 import tensorflow as tf
 import numpy as np
 all_stopwords.extend(extra_stopwords)
 ########### GET CLAIMED TRAINED MODEL ###########
+tokenizer = LlamaTokenizer.from_pretrained("samwit/koala-7b")
 model = LlamaForCausalLM.from_pretrained(
+    "samwit/koala-7b",
     load_in_8bit=True,
+    device_map='auto',
+)
 ########## DEFINING FUNCTIONS ###################
 def mean_pooling(model_output, attention_mask):
     embedding = torch.from_numpy(embedding)
     return embedding
 ########## LOADING PRE-COMPUTED EMBEDDINGS ##########
 def clean_data(input, type='Dataframe'):
     broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, SearchType, Number, Sensitivity='High')
     return broad_scope_predictions
 def generateresponse(history, temp, top_p, tokens):
     global model
     global tokenizer
     {user}
     ### Response:"""
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_length=512,
         temperature=temp,
         top_p=top_p,
+        repetition_penalty=1.15
+    )
+    outputs = pipe(PROMPT)
+    outputs = outputs[0]['generated_text']
+    outputs = str(outputs).split('### Response')[1]
     response = f"Response: {outputs}"
+    return response
 def run_model(userin, dropd):
     if dropd in ["Apparatus Claim", "Method of Use Claim", "Method Claim"]:
         PROMPT = claim_selector(userin, dropd)
+    elif dropd in ["Generate a Description Paragraph", "Generate a Abstract", "What are the Benefits/Technical Effects"]:
         PROMPT = desc_selector(userin, dropd)
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_length=512,
+        temperature=0.7,
         top_p=0.95,
+        repetition_penalty=1.15
+    )
+    outputs = pipe(PROMPT)
+    outputs = outputs[0]['generated_text']
+    outputs = str(outputs).split('### Response')[1]
     response = f"Response: {outputs}"
     return response
+def prosecute(application, priorart, dropd):
+    global model
+    global tokenizer
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_length=512,
+        temperature=0.7,
+        top_p=0.95,
+        repetition_penalty=1.15
+    )
+    PROMPT = f"""
+    Below is an instruction that describes a task. Write a response that appropriately completes the request.
+    ### Instruction:
+    Draft a patent novelty/inventive step argument using the {dropd} approach:
+    Application: {application}
+    Prior Art: {priorart}
+    ### Response:"""
+    outputs = pipe(PROMPT)
+    outputs = outputs[0]['generated_text']
+    outputs = str(outputs).split('### Response')[1]
+    response = f"Response: {outputs}"
+    return response
 def claim_selector(userin, dropd):
     PROMPT = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
     gr.Markdown("""
     # CLAIMED - A GENERATIVE TOOLKIT FOR PATENT ATTORNEYS
     The patenting process can be incredibly time-consuming and expensive. We're on a mission to change that.
     Welcome to our demo! We've trained Meta's Llama on over 200k entries, with a focus on tasks related to the intellectual property domain.
     Please note that this is for research purposes and shouldn't be used commercially.
     None of the outputs of this model, taken in part or in its entirety, constitutes legal advice. If you are seeking protection for you intellectual property, consult a registered patent/trademark attorney.
     """)
         gr.Markdown("""
         Use this tool to expand your patent claim into a description. You can also use this tool to generate abstracts and give you ideas about the benefit of an invention by changing the settings in the dropdown menu.
          """)
+        Descriptionchoices = gr.Dropdown(["Generate a Description Paragraph", "Generate a Abstract", "What are the Benefits/Technical Effects"], label='Choose Generation Type Here')
         with gr.Row(scale=1, min_width=600):
             text1 = gr.Textbox(label="Input",
         with gr.Row():
             btn = gr.Button("Submit")
             btn.click(fn=desc_selector, inputs=[text1, Descriptionchoices]).then(run_model, inputs=[text1, Descriptionchoices], outputs=text2)
+    with gr.Tab("Prosecution Beta"):
+        gr.Markdown("""
+        Use this tool to generate ideas for how to overcome objections to novelty and inventive step. Outputs are in the EPO's problem-solution format. For now, this tool only works on relatively short inputs, so maybe try with some simple claims or short paragraphs.
+         """)
+        dropd = gr.Dropdown(["Problem Solution", "Windsurfing/Pozzoli"], label='Choose Generation Type Here')
+        with gr.Row(scale=1, min_width=600):
+            with gr.Column():
+              application = gr.Text(label="Present Invention")
+              priorart = gr.Text(label="Prior Art Document")
+            text2 = gr.Textbox(label="Output")
+        with gr.Row():
+            btn = gr.Button("Submit")
+            btn.click(fn=prosecute, inputs=[application, priorart, dropd], outputs=text2)
     # with gr.Tab("Claimed Infill"):
     #     gr.Markdown("""
     with gr.Tab("CPC Search Tool"):
         gr.Markdown("""
         Use this tool to classify your invention according to the Cooperative Patent Classification system.
         Click on the link to initiate either an Espacenet or Google Patents classification search using the generated classifications. You can specify which you would like using the dropdown menu.
         """)
             classify_btn = gr.Button("Classify")
             classify_btn.click(fn=classifier, inputs=[userin, ClassifyChoices] , outputs=output)
     gr.Markdown("""
     # THE CHATBOT
     Do you want a bit more freedom over the outputs you generate? No worries, you can use a chatbot version of our model below. You can ask it anything.
     If you're concerned about a particular output, hit the flag button and we will use that information to improve the model.
     """)
     chatbot = gr.Chatbot([], elem_id="Claimed Assistant").style(height=500)
     with gr.Row():
     gr.Markdown("""
     # HAVE AN IDEA? GET IT CLAIMED
     In the future, we are looking to expand our model's capabilities further to assist in a range of IP related tasks.
     If you are interested in using a more powerful model that we have trained, or if you have any suggestions of features you would like to see us add, please get in touch!
     As far as data is concerned, you have nothing to worry about! We don't store any of your inputs to use for further training, we're not OpenAI.
     """)
 demo.queue(concurrency_count=9)
+demo.launch(share=True)