auto_anno / utils /format /txt_2_list.py
maxmon
feat: 优化NER和分类的格式化结果
c6ad667
raw
history blame
332 Bytes
import re
def txt_2_list(txt):
split_token = r'[ ,、,;;《》<>]'
rm_token = r'["\'”“‘’。.!!?? 【】\[\]]'
arr = re.split(split_token, txt)
arr = [re.sub(rm_token, '', item) for item in arr if item != '']
# 从大到小排序
arr.sort(key=lambda x: len(x), reverse=True)
return arr