|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from os import path |
|
|
|
class UDlexPT: |
|
def __init__(self): |
|
self.tags = ["ADJ", "ADP", "ADV", "AUX", "CCONJ", "DET", "INTJ", \ |
|
"NOUN", "NUM", "PRON", "SCONJ", "VERB"] |
|
self.master = {} |
|
self.words = 0 |
|
self.entries = 0 |
|
nEnt = [0]*len(self.tags) |
|
nNAE = [0]*len(self.tags) |
|
nEnD = [0]*len(self.tags) |
|
infile = open(path.dirname(__file__)+"/WORDmaster.txt") |
|
for line in infile: |
|
buf = line[:-1].split(",") |
|
tg = buf[1].split(" ") |
|
self.master.update({buf[0]:tg}) |
|
self.words += 1 |
|
|
|
if (len(tg) == 1): |
|
nNAE[self.tags.index(tg[0])] += 1 |
|
for t in tg: |
|
nEnt[self.tags.index(t)] += 1 |
|
infile.close() |
|
self.t = [] |
|
i = 0 |
|
for t in self.tags: |
|
self.t.append({}) |
|
infile = open(path.dirname(__file__)+"/"+t+".tsv") |
|
for line in infile: |
|
buf = line[:-1].split("\t") |
|
entry = self.t[i].get(buf[0],"none") |
|
if (entry == "none"): |
|
self.t[i].update({buf[0]:[[buf[1],buf[2]]]}) |
|
else: |
|
entry.append([buf[1],buf[2]]) |
|
self.t[i].update({buf[0]:entry}) |
|
self.entries += 1 |
|
nEnD[self.tags.index(t)] += 1 |
|
infile.close() |
|
i += 1 |
|
print("UDlexPT read with", self.words, "distinct words and", self.entries, "entries") |
|
print("{:5} & {:6} & {:6} & {:6} \\\\ \\hline".format("tag","total","amb","non-amb")) |
|
accW, accN, accE = 0, 0, 0 |
|
for t in self.tags: |
|
print("{:5} & {:6} & {:6} & {:6} & {:6} \\\\ \\hline".format(t, \ |
|
nEnt[self.tags.index(t)], \ |
|
nEnt[self.tags.index(t)]-nNAE[self.tags.index(t)], \ |
|
nNAE[self.tags.index(t)], \ |
|
nEnD[self.tags.index(t)])) |
|
accW += nEnt[self.tags.index(t)] |
|
accN += nNAE[self.tags.index(t)] |
|
accE += nEnD[self.tags.index(t)] |
|
print("{:5} & {:6} & {:6} & {:6} & {:6} \\\\ \\hline".format("total", self.words, self.words-accN, accN, accE)) |
|
def sget(self, word): |
|
tags = self.master.get(word,"none") |
|
if (tags == "none"): |
|
return [] |
|
else: |
|
ans = [] |
|
for t in tags: |
|
a = self.t[self.tags.index(t)].get(word) |
|
|
|
|
|
for n in a: |
|
ans.append([n[0],t,n[1]]) |
|
return ans |
|
def exists(self, word): |
|
tags = self.master.get(word,"none") |
|
if (tags == "none"): |
|
return False |
|
else: |
|
return True |
|
def pget(self, word, tag): |
|
a = self.t[self.tags.index(tag)].get(word,"none") |
|
if (a == "none"): |
|
return [] |
|
else: |
|
ans = [] |
|
for n in a: |
|
ans.append([n[0],tag,n[1]]) |
|
return ans |
|
def pexists(self, word, tag): |
|
a = self.t[self.tags.index(tag)].get(word,"none") |
|
if (a == "none"): |
|
return False |
|
else: |
|
return True |
|
def theTags(self, word): |
|
ts = self.master.get(word,"none") |
|
if (ts == "none"): |
|
return [] |
|
else: |
|
return ts |
|
|
|
|