address-extraction / predict.py
duoquote
Update model files and tokenizer configuration
d1aed23
raw
history blame contribute delete
No virus
2.71 kB
import torch
import orjson
from transformers import pipeline
from transformers import BertTokenizerFast, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
nlp = pipeline(
"ner",
model="./model",
tokenizer=tokenizer,
device=0 if torch.cuda.is_available() else -1,
)
def get_entities(tokens):
entities = []
entity = None
for token in tokens:
if token["entity"].startswith("B-"):
if entity:
entity["score"] /= entity["token_count"]
entities.append(entity)
entity = {
"label": token["entity"][2:],
"ranges": [token["start"], token["end"]],
"score": token["score"],
"token_count": 1,
}
elif token["entity"].startswith("I-"):
if entity and entity["label"] == token["entity"][2:]:
entity["ranges"][1] = token["end"]
entity["token_count"] += 1
entity["score"] += token["score"]
else:
if entity:
entity["ranges"][1] = token["end"]
entity["token_count"] += 1
entity["score"] += token["score"]
entity["score"] /= entity["token_count"]
entities.append(entity)
entity = None
else:
if entity:
entity["score"] /= entity["token_count"]
entities.append(entity)
entity = None
if entity:
entity["score"] /= entity["token_count"]
entities.append(entity)
return entities
def process(text):
nlp_output = nlp(text)
entities = get_entities(nlp_output)
for entity in entities:
print(f"{text[entity['ranges'][0]:entity['ranges'][1]]:<35} {entity['label']:>15} {entity['score'] * 100:.2f}%")
print("Average Score: ", sum([token["score"] for token in nlp_output]) / len(nlp_output))
print("Labels Found: ", len(entities))
print("-" * 70)
if __name__ == "__main__":
examples = [
"Osmangazi Mahallesi, Hoca Ahmet Yesevi Cd. No:34, 16050 Osmangazi/Bursa",
"Karşıyaka Mahallesi, Mavişehir Caddesi No: 91, Daire 4, 35540 Karşıyaka/İzmir",
"Selçuklu Mahallesi, Atatürk Bulvarı No: 55, 42050 Selçuklu/Konya",
"Alsancak Mahallesi, 1475. Sk. No:3, 35220 Konak/İzmir",
"Kocatepe Mahallesi, Yaşam Caddesi 3. Sokak No:4, 06420 Bayrampaşa/İstanbul",
]
for example in examples:
print(example)
process(example)
while True:
text = input("Enter text: ")
if not text:
break
process(text)