sk-ner / app.py
crabz's picture
uncomment pytorch installation
dad5c8f
raw
history blame
4.67 kB
# -*- coding: utf-8 -*-
import os
os.system("pip3 install torch==1.10.1+cpu torchvision==0.11.2+cpu torchaudio==0.10.1+cpu -f "
"https://download.pytorch.org/whl/cpu/torch_stable.html")
import gradio as gr
from transformers import pipeline
import spacy
from spacy import displacy
ner_map = {0: '0',
1: 'B-OSOBA',
2: 'I-OSOBA',
3: 'B-ORGANIZÁCIA',
4: 'I-ORGANIZÁCIA',
5: 'B-LOKALITA',
6: 'I-LOKALITA'}
options = {"ents": ["OSOBA",
"ORGANIZÁCIA",
"LOKALITA"],
"colors": {"OSOBA": "lightblue",
"ORGANIZÁCIA": "lightcoral",
"LOKALITA": "lightgreen"}}
ner_pipeline = pipeline(task='ner', model="crabz/slovakbert-ner")
nlp = spacy.blank("sk")
def postprocess(classifications):
entities = []
for i in range(len(classifications)):
if classifications[i]['entity'] != 0:
if ner_map[classifications[i]['entity']][0] == 'B':
j = i + 1
while j < len(classifications) and ner_map[classifications[j]['entity']][0] == 'I':
j += 1
entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'],
classifications[j - 1]['end']))
while True:
merged = False
to_remove = []
merged_entities = []
for i in range(len(entities)):
for j in range(i + 1, len(entities)):
if entities[i] != entities[j] and entities[i][0] == entities[j][0] and \
(entities[i][2] == entities[j][1] or entities[i][1] == entities[j][2]):
to_remove.append(entities[i])
to_remove.append(entities[j])
new_start = min(entities[i][1], entities[j][1])
new_end = max(entities[i][2], entities[j][2])
merged_entities.append((entities[i][0], new_start, new_end))
merged = True
break
if merged:
break
for ent in to_remove:
entities.remove(ent)
entities += merged_entities
if not merged:
break
return entities
def set_entities(sentence, entities):
doc = nlp(sentence)
ents = []
for ee in entities:
ents.append(doc.char_span(ee[1], ee[2], ee[0]))
doc.ents = ents
return doc
def apply_ner(sentence: str):
classifications = ner_pipeline(sentence)
entities = postprocess(classifications)
doc = set_entities(sentence, entities)
displacy_html = displacy.render(doc, style="ent", options=options)
return displacy_html
intf = gr.Interface(fn=apply_ner, inputs="text", outputs="html", title='Slovak Named Entity Recognition',
allow_flagging=False,
examples=[["Laboratóriá Úradu verejného zdravotníctva sekvenovaním potvrdili výskyt ďalších "
"štyroch prípadov variantu omikron na Slovensku."],
["Čaputová opakovane tvrdí, že \"spravodlivosť na Slovensku neplatí vždy pre všetkých "
"rovnako\"."],
["Informácie o týchto veľkolepých plánoch prišli týždeň po tom, ako sa japonský "
"miliardár Jusaku Maezawa vrátil z 12-dňového pobytu na Medzinárodnej vesmírnej stanici "
"(ISS), čím sa stal prvým vesmírnym turistom, ktorý cestoval na ISS za viac ako desať "
"rokov."],
["Minister financií a líder mandátovo najsilnejšieho hnutia OĽaNO Igor Matovič "
"upozorňuje, že následky tretej vlny budú na Slovensku veľmi veľké."],
["Začiatkom roka 2021 sa objavili nezhody medzi Richardom Sulíkom a šéfom hnutia OĽANO "
"Igorom Matovičom, ktoré v istej miere pretrvávajú aj dodnes."]],
description="Named-entity recognition (NER) labels named-entities in unstructured text. This "
"implementation supports three labels: person (OSOBA), organization (ORGANIZÁCIA) and "
"location (LOKALITA). You can try out one of the examples below or type your own "
"sentence. Don't forget to use double quotes (\" \") instead of curved quotes („ “)",
article="")
intf.launch()