Spaces:

nam194
/

Review_company_analysis_and_Resume_parsing

Running

Review_company_analysis_and_Resume_parsing

File size: 3,780 Bytes

9e5b4bd
 
988450a
4e55f8f
 
988450a
4e55f8f
 
 
 
988450a
 
4e55f8f
 
 
 
 
 
 
 
 
 
 
 
 
 
1984dbe
988450a
 
4e55f8f
 
 
988450a
246d50e
988450a
4e55f8f
 
 
 
 
246d50e
 
 
 
 
4e55f8f
 
 
 
 
 
 
988450a
79a32e1
988450a
9e5b4bd
 
 
 
 
 
4e55f8f
 
79a32e1
 
988450a
 
9e5b4bd
ebfa8f4
9e5b4bd
988450a
9e5b4bd
988450a
 
9e5b4bd
988450a
9e5b4bd

import numpy as np
import gradio as gr
from imports import *
from huggingface_hub import login
login(token="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")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dict_ = {
    0: "negative",
    1: "positive",
    2: "neutral"}
tokenizer_sent = AutoTokenizer.from_pretrained("nam194/sentiment", use_fast=False)
model_sent = AutoModelForSequenceClassification.from_pretrained("nam194/sentiment", num_labels=3, use_auth_token=True).to(device)
def cvt2cls(data):
    data = list(set(data))
    try: 
        data.remove(20)
    except:
        pass
    for i, num in enumerate(data):
        if num == 20:
            continue
        if num>=10:
            data[i] -= 10
    return data
ner_tags = {0: 'B-chỗ để xe', 1: 'B-con người', 2: 'B-công việc', 3: 'B-cơ sở vật chất', 4: 'B-dự án', 5: 'B-lương', 6: 'B-môi trường làm việc', 7: 'B-ot/thời gian', 8: 'B-văn phòng', 9: 'B-đãi ngộ', 10: 'I-chỗ để xe', 11: 'I-con người', 12: 'I-công việc', 13: 'I-cơ sở vật chất', 14: 'I-dự án', 15: 'I-lương', 16: 'I-môi trường làm việc', 17: 'I-ot/thời gian', 18: 'I-văn phòng', 19: 'I-đãi ngộ', 20: 'O'}
topic_tags = {0: 'chỗ để xe', 1: 'con người', 2: 'công việc', 3: 'cơ sở vật chất', 4: 'dự án', 5: 'lương', 6: 'môi trường làm việc', 7: 'ot/thời gian', 8: 'văn phòng', 9: 'đãi ngộ'}
config = RobertaConfig.from_pretrained("nam194/ner", num_labels=21)
tokenizer_topic = AutoTokenizer.from_pretrained("nam194/ner", use_fast=False) 
model_topic = PhoBertLstmCrf.from_pretrained("nam194/ner", config=config, from_tf=False).to(device)
model_topic.resize_token_embeddings(len(tokenizer_topic)) 


def sentiment(sent: str):
    sent_ = normalize(text=sent) # segment input sentence, maybe raise ConnectionError: HTTPConnectionPool())
    input_sent = torch.tensor([tokenizer_sent.encode(sent_)]).to(device)
    with torch.no_grad():
        out_sent = model_sent(input_sent)
        logits_sent = out_sent.logits.softmax(dim=-1).tolist()[0]
        pred_sent = dict_[np.argmax(logits_sent)]

    sent = replace_all(text=sent) # segment input sentence, maybe raise ConnectionError: HTTPConnectionPool())
    sent_segment = sent.split(".")
    for i, s in enumerate(sent_segment):
        s = s.strip()
        sent_segment[i] = underthesea.word_tokenize(s, format="text").split()
    dump = [[i, 'O'] for s in sent_segment for i in s]
    dump_set = NerDataset(feature_for_phobert([dump], tokenizer=tokenizer_topic, use_crf=True))    
    dump_iter = DataLoader(dump_set, batch_size=1)
    with torch.no_grad():
        for idx, batch in enumerate(dump_iter):
            batch = { k:v.to(device) for k, v in batch.items() }        
            outputs = model_topic(**batch)
    pred_topic = list(set([topic_tags[i] for i in cvt2cls(outputs["tags"][0])]))
    return str({"sentiment": pred_sent, "topic": pred_topic})
    

def flip_image(x):
    return np.fliplr(x)


with gr.Blocks() as demo:
    gr.Markdown("Demo projects Review Company and Resume parser phase 1.")
    with gr.Tab("Review Company"):
        text_input = gr.Textbox(label="Input sentence:", placeholder="input here...")
        text_output = gr.Textbox(label="Result:")
        text_button = gr.Button("Predict")
    with gr.Tab("Extract infomation from resume"):
        with gr.Row():
            image_input = gr.Image()
            image_output = gr.Image()
        image_button = gr.Button("Predict")

    # with gr.Accordion("Open for More!"):
    #     gr.Markdown("Look at me...")

    text_button.click(sentiment, inputs=text_input, outputs=text_output)
    image_button.click(flip_image, inputs=image_input, outputs=image_output)

demo.launch()