Spaces:
Sleeping
Sleeping
File size: 2,781 Bytes
19d4726 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import logging
import os
from typing import List, Tuple
import gradio as gr
import pandas as pd
import spacy
from transformers import AutoModelForTokenClassification, AutoTokenizer
try:
nlp = spacy.load("pt_core_news_sm")
except Exception:
os.system("python -m spacy download pt_core_news_sm")
nlp = spacy.load("pt_core_news_sm")
model = AutoModelForTokenClassification.from_pretrained("Emanuel/bertimbau-base-pos")
tokenizer = AutoTokenizer.from_pretrained("Emanuel/bertimbau-base-pos")
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
def predict(text, nlp, logger=None) -> Tuple[List[str], List[str]]:
doc = nlp(text)
tokens = [token.text for token in doc]
logger.info("Starting predictions for sentence: {}".format(text))
input_tokens = tokenizer(
tokens,
return_tensors="pt",
is_split_into_words=True,
return_offsets_mapping=True,
return_special_tokens_mask=True,
)
output = model(input_tokens["input_ids"])
i_token = 0
labels = []
for off, is_special_token, pred in zip(
input_tokens["offset_mapping"][0],
input_tokens["special_tokens_mask"][0],
output.logits[0],
):
if is_special_token or off[0] > 0:
continue
label = model.config.__dict__["id2label"][int(pred.argmax(axis=-1))]
if logger is not None:
logger.info("{}, {}, {}".format(off, tokens[i_token], label))
labels.append(label)
i_token += 1
return tokens, labels
def text_analysis(text):
tokens, labels = predict(text, nlp, logger)
pos_count = pd.DataFrame(
{
"token": tokens,
"etiqueta": labels,
}
)
pos_tokens = []
for token, label in zip(tokens, labels):
pos_tokens.extend([(token, label), (" ", None)])
return pos_tokens, pos_count
css = open("style.css").read()
top_html = open("top.html").read()
bottom_html = open("bottom.html").read()
with gr.Blocks(css=css) as demo:
gr.HTML(top_html)
text = gr.Textbox(placeholder="Insira um texto...", label="Texto de entrada")
output_highlighted = gr.HighlightedText()
output_df = gr.Dataframe()
submit_btn = gr.Button("Enviar")
submit_btn.click(
fn=text_analysis, inputs=text, outputs=[output_highlighted, output_df]
)
examples = gr.Examples(
examples=[
[
"A população não poderia ter acesso a relatórios que explicassem, por exemplo, os motivos exatos de atrasos em obras de linhas e estações."
],
["Filme 'Star Wars : Os Últimos Jedi' ganha trailer definitivo; assista."],
],
inputs=[text],
label="Exemplos",
)
gr.HTML(bottom_html)
demo.launch()
|