maxmon commited on
Commit
9078d0a
1 Parent(s): c0116dd

refactor: entity_extracy.py

Browse files
utils/anno/cls/text_classification.py CHANGED
@@ -31,12 +31,14 @@ def text_classification(src_txt, type_arr):
31
 
32
  if __name__ == '__main__':
33
  # type_arr = ['好评', '差评']
34
- type_arr_txt = "是差评、不是差评"
 
35
  type_arr = txt_2_list(type_arr_txt)
36
  txts = [
37
- '这个商品真不错',
38
- '用着不行',
39
- '没用过这么好的东西'
 
40
  ]
41
  for txt in txts:
42
  result = text_classification(txt, type_arr)
 
31
 
32
  if __name__ == '__main__':
33
  # type_arr = ['好评', '差评']
34
+ # type_arr_txt = "是差评、不是差评"
35
+ type_arr_txt = "天气查询、股票查询、其他"
36
  type_arr = txt_2_list(type_arr_txt)
37
  txts = [
38
+ # '这个商品真不错',
39
+ # '用着不行',
40
+ # '没用过这么好的东西',
41
+ '今天天气怎么样',
42
  ]
43
  for txt in txts:
44
  result = text_classification(txt, type_arr)
utils/anno/ner/entity_extract.py CHANGED
@@ -36,6 +36,9 @@ def extract_named_entities(src_txt, type_arr):
36
  for item in j:
37
  s = item['start']
38
  e = item['end']
 
 
 
39
  # 修正标注错误的实体坐标
40
  if src_txt[s:e] != item['name']:
41
  for i in range(len(src_txt)):
 
36
  for item in j:
37
  s = item['start']
38
  e = item['end']
39
+ # 过滤非目标实体类型
40
+ if not type_arr.__contains__(item['type']):
41
+ continue
42
  # 修正标注错误的实体坐标
43
  if src_txt[s:e] != item['name']:
44
  for i in range(len(src_txt)):