maxmon
commited on
Commit
•
9078d0a
1
Parent(s):
c0116dd
refactor: entity_extracy.py
Browse files
utils/anno/cls/text_classification.py
CHANGED
@@ -31,12 +31,14 @@ def text_classification(src_txt, type_arr):
|
|
31 |
|
32 |
if __name__ == '__main__':
|
33 |
# type_arr = ['好评', '差评']
|
34 |
-
type_arr_txt = "是差评、不是差评"
|
|
|
35 |
type_arr = txt_2_list(type_arr_txt)
|
36 |
txts = [
|
37 |
-
'这个商品真不错',
|
38 |
-
'用着不行',
|
39 |
-
'没用过这么好的东西'
|
|
|
40 |
]
|
41 |
for txt in txts:
|
42 |
result = text_classification(txt, type_arr)
|
|
|
31 |
|
32 |
if __name__ == '__main__':
|
33 |
# type_arr = ['好评', '差评']
|
34 |
+
# type_arr_txt = "是差评、不是差评"
|
35 |
+
type_arr_txt = "天气查询、股票查询、其他"
|
36 |
type_arr = txt_2_list(type_arr_txt)
|
37 |
txts = [
|
38 |
+
# '这个商品真不错',
|
39 |
+
# '用着不行',
|
40 |
+
# '没用过这么好的东西',
|
41 |
+
'今天天气怎么样',
|
42 |
]
|
43 |
for txt in txts:
|
44 |
result = text_classification(txt, type_arr)
|
utils/anno/ner/entity_extract.py
CHANGED
@@ -36,6 +36,9 @@ def extract_named_entities(src_txt, type_arr):
|
|
36 |
for item in j:
|
37 |
s = item['start']
|
38 |
e = item['end']
|
|
|
|
|
|
|
39 |
# 修正标注错误的实体坐标
|
40 |
if src_txt[s:e] != item['name']:
|
41 |
for i in range(len(src_txt)):
|
|
|
36 |
for item in j:
|
37 |
s = item['start']
|
38 |
e = item['end']
|
39 |
+
# 过滤非目标实体类型
|
40 |
+
if not type_arr.__contains__(item['type']):
|
41 |
+
continue
|
42 |
# 修正标注错误的实体坐标
|
43 |
if src_txt[s:e] != item['name']:
|
44 |
for i in range(len(src_txt)):
|