|
|
|
import os |
|
|
|
os.system("pip3 install torch==1.10.1+cpu torchvision==0.11.2+cpu torchaudio==0.10.1+cpu -f " |
|
"https://download.pytorch.org/whl/cpu/torch_stable.html") |
|
|
|
import gradio as gr |
|
from transformers import pipeline |
|
|
|
import spacy |
|
from spacy import displacy |
|
|
|
ner_map = {0: '0', |
|
1: 'B-OSOBA', |
|
2: 'I-OSOBA', |
|
3: 'B-ORGANIZÁCIA', |
|
4: 'I-ORGANIZÁCIA', |
|
5: 'B-LOKALITA', |
|
6: 'I-LOKALITA'} |
|
|
|
options = {"ents": ["OSOBA", |
|
"ORGANIZÁCIA", |
|
"LOKALITA"], |
|
"colors": {"OSOBA": "lightblue", |
|
"ORGANIZÁCIA": "lightcoral", |
|
"LOKALITA": "lightgreen"}} |
|
|
|
ner_pipeline = pipeline(task='ner', model="crabz/slovakbert-ner") |
|
nlp = spacy.blank("sk") |
|
|
|
|
|
def postprocess(classifications): |
|
entities = [] |
|
for i in range(len(classifications)): |
|
if classifications[i]['entity'] != 0: |
|
if ner_map[classifications[i]['entity']][0] == 'B': |
|
j = i + 1 |
|
while j < len(classifications) and ner_map[classifications[j]['entity']][0] == 'I': |
|
j += 1 |
|
entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'], |
|
classifications[j - 1]['end'])) |
|
while True: |
|
merged = False |
|
to_remove = [] |
|
merged_entities = [] |
|
for i in range(len(entities)): |
|
for j in range(i + 1, len(entities)): |
|
if entities[i] != entities[j] and entities[i][0] == entities[j][0] and \ |
|
(entities[i][2] == entities[j][1] or entities[i][1] == entities[j][2]): |
|
to_remove.append(entities[i]) |
|
to_remove.append(entities[j]) |
|
|
|
new_start = min(entities[i][1], entities[j][1]) |
|
new_end = max(entities[i][2], entities[j][2]) |
|
merged_entities.append((entities[i][0], new_start, new_end)) |
|
merged = True |
|
break |
|
if merged: |
|
break |
|
for ent in to_remove: |
|
entities.remove(ent) |
|
entities += merged_entities |
|
if not merged: |
|
break |
|
return entities |
|
|
|
|
|
def set_entities(sentence, entities): |
|
doc = nlp(sentence) |
|
ents = [] |
|
for ee in entities: |
|
ents.append(doc.char_span(ee[1], ee[2], ee[0])) |
|
doc.ents = ents |
|
return doc |
|
|
|
|
|
def apply_ner(sentence: str): |
|
classifications = ner_pipeline(sentence) |
|
entities = postprocess(classifications) |
|
doc = set_entities(sentence, entities) |
|
displacy_html = displacy.render(doc, style="ent", options=options) |
|
return displacy_html |
|
|
|
|
|
intf = gr.Interface(fn=apply_ner, inputs="text", outputs="html", title='Slovak Named Entity Recognition', |
|
allow_flagging=False, |
|
examples=[["Laboratóriá Úradu verejného zdravotníctva sekvenovaním potvrdili výskyt ďalších " |
|
"štyroch prípadov variantu omikron na Slovensku."], |
|
["Čaputová opakovane tvrdí, že \"spravodlivosť na Slovensku neplatí vždy pre všetkých " |
|
"rovnako\"."], |
|
["Informácie o týchto veľkolepých plánoch prišli týždeň po tom, ako sa japonský " |
|
"miliardár Jusaku Maezawa vrátil z 12-dňového pobytu na Medzinárodnej vesmírnej stanici " |
|
"(ISS), čím sa stal prvým vesmírnym turistom, ktorý cestoval na ISS za viac ako desať " |
|
"rokov."], |
|
["Minister financií a líder mandátovo najsilnejšieho hnutia OĽaNO Igor Matovič " |
|
"upozorňuje, že následky tretej vlny budú na Slovensku veľmi veľké."], |
|
["Začiatkom roka 2021 sa objavili nezhody medzi Richardom Sulíkom a šéfom hnutia OĽANO " |
|
"Igorom Matovičom, ktoré v istej miere pretrvávajú aj dodnes."]], |
|
description="Named-entity recognition (NER) labels named-entities in unstructured text. This " |
|
"implementation supports three labels: person (OSOBA), organization (ORGANIZÁCIA) and " |
|
"location (LOKALITA). You can try out one of the examples below or type your own " |
|
"sentence. Don't forget to use double quotes (\" \") instead of curved quotes („ “)", |
|
article="") |
|
intf.launch() |
|
|