Spaces:

maxmon
/

auto_anno

Runtime error

maxmon commited on May 10, 2023

Commit

9078d0a

•

1 Parent(s): c0116dd

refactor: entity_extracy.py

Files changed (2) hide show

utils/anno/cls/text_classification.py CHANGED Viewed

@@ -31,12 +31,14 @@ def text_classification(src_txt, type_arr):
 if __name__ == '__main__':
     # type_arr = ['好评', '差评']
-    type_arr_txt = "是差评、不是差评"
     type_arr = txt_2_list(type_arr_txt)
     txts = [
-        '这个商品真不错',
-        '用着不行',
-        '没用过这么好的东西'
     ]
     for txt in txts:
         result = text_classification(txt, type_arr)

 if __name__ == '__main__':
     # type_arr = ['好评', '差评']
+    # type_arr_txt = "是差评、不是差评"
+    type_arr_txt = "天气查询、股票查询、其他"
     type_arr = txt_2_list(type_arr_txt)
     txts = [
+        # '这个商品真不错',
+        # '用着不行',
+        # '没用过这么好的东西',
+        '今天天气怎么样',
     ]
     for txt in txts:
         result = text_classification(txt, type_arr)

utils/anno/ner/entity_extract.py CHANGED Viewed

@@ -36,6 +36,9 @@ def extract_named_entities(src_txt, type_arr):
     for item in j:
         s = item['start']
         e = item['end']
         # 修正标注错误的实体坐标
         if src_txt[s:e] != item['name']:
             for i in range(len(src_txt)):

     for item in j:
         s = item['start']
         e = item['end']
+        # 过滤非目标实体类型
+        if not type_arr.__contains__(item['type']):
+            continue
         # 修正标注错误的实体坐标
         if src_txt[s:e] != item['name']:
             for i in range(len(src_txt)):