Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer | |
from datasets import load_dataset | |
# Milestone-3 | |
if "viability" not in st.session_state: | |
st.session_state.viability = "" | |
if "score" not in st.session_state: | |
st.session_state.score = "" | |
def get_patent_score(pipeline, abstract, claims): | |
abstract_score = pipeline( | |
abstract, pad_to_max_length=True, truncation=True) | |
claims_score = pipeline(claims, pad_to_max_length=True, truncation=True) | |
abstract_label = abstract_score[0]["label"] | |
claims_label = claims_score[0]["label"] | |
st.session_state.score = "{:.2f}".format( | |
((abstract_score[0]["score"] + claims_score[0]["score"]) / 2) * 100 | |
) | |
if abstract_label == claims_label: | |
st.session_state.viability = abstract_label | |
else: | |
if abstract_score[0]["score"] > claims_score[0]["score"]: | |
st.session_state.viability = abstract_label | |
else: | |
st.session_state.viability = claims_label | |
checkpoint_file = "./checkpoint-3024" | |
model = AutoModelForSequenceClassification.from_pretrained(checkpoint_file) | |
tokenizer = AutoTokenizer.from_pretrained( | |
checkpoint_file, pad_to_max_length=True) | |
pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer) | |
dataset_dict = load_dataset('HUPD/hupd', | |
name='sample', | |
data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather", | |
icpr_label=None, | |
train_filing_start_date='2016-01-01', | |
train_filing_end_date='2016-01-21', | |
val_filing_start_date='2016-01-22', | |
val_filing_end_date='2016-01-31', | |
) | |
dataset = dataset_dict["train"] | |
abstract_dict = {} | |
claims_dict = {} | |
for i in range(10): | |
abstract_dict[dataset["title"][i]] = dataset["abstract"][i] | |
claims_dict[dataset["title"][i]] = dataset["claims"][i] | |
st.title("Patent Vibility Score Checker") | |
chosen_patent = st.selectbox( | |
"Chose a patent to run the checker on", options=abstract_dict.keys()) | |
abstract = st.text_area( | |
label="Abstract", | |
value=abstract_dict[chosen_patent] | |
) | |
claims = st.text_area( | |
label="Claims", | |
value=claims_dict[chosen_patent] | |
) | |
st.button("Check Viability", on_click=get_patent_score, | |
args=(pipeline, abstract, claims)) | |
st.markdown(body="Outcome: {}, Score: {}%".format( | |
st.session_state.viability, st.session_state.score)) | |
get_patent_score(pipeline=pipeline, abstract=abstract, claims=claims) | |
# Milestone-2 | |
# if "sentiment" not in st.session_state: | |
# st.session_state.sentiment = "" | |
# if "score" not in st.session_state: | |
# st.session_state.score = "" | |
# def run_model(text_in, model_in): | |
# classifier = pipeline(task="sentiment-analysis", | |
# model=model_in) | |
# analysis = classifier(text_in) | |
# st.session_state.sentiment = analysis[0]["label"] | |
# st.session_state.score = "{:.2f}".format(analysis[0]["score"] * 100) | |
# models_available = {"Roberta Large English": "siebert/sentiment-roberta-large-english", | |
# "Generic": "Seethal/sentiment_analysis_generic_dataset", | |
# "Twitter Roberta": "cardiffnlp/twitter-roberta-base-sentiment"} | |
# st.title("Sentiment Analysis Web Application") | |
# text_input = st.text_area( | |
# label="Enter the text to analyze", value="I Love Pizza") | |
# model_picked = st.selectbox( | |
# "Choose a model to run on", options=models_available.keys()) | |
# st.button("Submit", on_click=run_model, args=( | |
# text_input, models_available[model_picked])) | |
# st.markdown(body="Sentiment: {}, Confidence Score: {} %".format( | |
# st.session_state.sentiment, st.session_state.score)) | |