Spaces:
Runtime error
Runtime error
import spacy | |
from spacy.matcher import Matcher | |
def get_entities(sent): | |
## chunk 1 | |
ent1 = "" | |
ent2 = "" | |
prv_tok_dep = "" # dependency tag of previous token in the sentence | |
prv_tok_text = "" # previous token in the sentence | |
prefix = "" | |
modifier = "" | |
############################################################# | |
for tok in nlp(sent): | |
## chunk 2 | |
# if token is a punctuation mark then move on to the next token | |
if tok.dep_ != "punct": | |
# check: token is a compound word or not | |
if tok.dep_ == "compound": | |
prefix = tok.text | |
# if the previous word was also a 'compound' then add the current word to it | |
if prv_tok_dep == "compound": | |
prefix = prv_tok_text + " " + tok.text | |
# check: token is a modifier or not | |
if tok.dep_.endswith("mod") == True: | |
modifier = tok.text | |
# if the previous word was also a 'compound' then add the current word to it | |
if prv_tok_dep == "compound": | |
modifier = prv_tok_text + " " + tok.text | |
## chunk 3 | |
if tok.dep_.find("subj") == True: | |
ent1 = modifier + " " + prefix + " " + tok.text | |
prefix = "" | |
modifier = "" | |
prv_tok_dep = "" | |
prv_tok_text = "" | |
## chunk 4 | |
if tok.dep_.find("obj") == True: | |
ent2 = modifier + " " + prefix + " " + tok.text | |
## chunk 5 | |
# update variables | |
prv_tok_dep = tok.dep_ | |
prv_tok_text = tok.text | |
############################################################# | |
return [ent1.strip(), ent2.strip()] | |
def get_relation(sent): | |
nlp = spacy.load('en_core_web_sm') | |
doc = nlp(sent) | |
# Matcher class object | |
matcher = Matcher(nlp.vocab) | |
#define the pattern | |
pattern = [{'DEP':'ROOT'}, | |
{'DEP':'prep','OP':"?"}, | |
{'DEP':'agent','OP':"?"}, | |
{'POS':'ADJ','OP':"?"}] | |
matcher.add('matching_pattern', patterns=[pattern]) | |
matches = matcher(doc) | |
k = len(matches) - 1 | |
span = doc[matches[k][1]:matches[k][2]] | |
return(span.text) | |