maxmon
commited on
Commit
•
fe413ad
1
Parent(s):
9078d0a
chore: v0.2
Browse files
app.py
CHANGED
@@ -18,12 +18,22 @@ def auto_anno(txt, types_txt, radio, need_trans=False):
|
|
18 |
result = f'{txt}\n{result}'
|
19 |
return result
|
20 |
|
21 |
-
input1 = gr.Textbox(lines=3, label="输入原句")
|
22 |
-
input2 = gr.Textbox(lines=3, label="输入类别")
|
23 |
output = gr.Textbox(label="输出结果")
|
24 |
-
radio = gr.Radio(["文本分类", "实体抽取"], label="算法类型")
|
25 |
checkbox = gr.Checkbox(label="翻译成中文")
|
26 |
|
27 |
if __name__ == '__main__':
|
28 |
-
demo = gr.Interface(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
demo.launch(share=False)
|
|
|
18 |
result = f'{txt}\n{result}'
|
19 |
return result
|
20 |
|
21 |
+
input1 = gr.Textbox(lines=3, label="输入原句", value="Hello world!")
|
22 |
+
input2 = gr.Textbox(lines=3, label="输入类别", value="友好、不友好")
|
23 |
output = gr.Textbox(label="输出结果")
|
24 |
+
radio = gr.Radio(["文本分类", "实体抽取"], label="算法类型", value="文本分类")
|
25 |
checkbox = gr.Checkbox(label="翻译成中文")
|
26 |
|
27 |
if __name__ == '__main__':
|
28 |
+
demo = gr.Interface(
|
29 |
+
fn=auto_anno,
|
30 |
+
description='自动标注,使用了openai免费接口,1分钟内只能请求3次,如遇报错请稍后再试,或clone项目到本地后用自己的key替换。如有疑问欢迎联系微信 maqijun123456',
|
31 |
+
inputs=[input1, input2, radio, checkbox],
|
32 |
+
examples=[
|
33 |
+
['前四个月我国外贸进出口同比增长 5.8%', '政治;经济;科技;文化;娱乐;民生;军事;教育;环保;其它', '文本分类', False],
|
34 |
+
['There is a cat trapped on the Avenue of Happiness', '地点', '实体抽取', True],
|
35 |
+
['联系方式:18812345678,联系地址:幸福大街20号', '手机号、地址', '实体抽取', False],
|
36 |
+
],
|
37 |
+
outputs=[output]
|
38 |
+
)
|
39 |
demo.launch(share=False)
|
utils/anno/cls/__pycache__/text_classification.cpython-310.pyc
CHANGED
Binary files a/utils/anno/cls/__pycache__/text_classification.cpython-310.pyc and b/utils/anno/cls/__pycache__/text_classification.cpython-310.pyc differ
|
|
utils/anno/cls/text_classification.py
CHANGED
@@ -8,8 +8,9 @@ from utils.format.txt_2_list import txt_2_list
|
|
8 |
# Set up your API key
|
9 |
openai.api_key = openai_key
|
10 |
|
11 |
-
def text_classification(src_txt, type_arr):
|
12 |
-
|
|
|
13 |
# Call the OpenAI API
|
14 |
completion = openai.ChatCompletion.create(
|
15 |
model="gpt-3.5-turbo",
|
@@ -35,11 +36,14 @@ if __name__ == '__main__':
|
|
35 |
type_arr_txt = "天气查询、股票查询、其他"
|
36 |
type_arr = txt_2_list(type_arr_txt)
|
37 |
txts = [
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
'今天天气怎么样',
|
|
|
|
|
|
|
42 |
]
|
43 |
for txt in txts:
|
44 |
-
result = text_classification(txt, type_arr)
|
45 |
print(txt, result)
|
|
|
8 |
# Set up your API key
|
9 |
openai.api_key = openai_key
|
10 |
|
11 |
+
def text_classification(src_txt, type_arr, history=[]):
|
12 |
+
history_txt = ''.join([f'输入|```{q}```输出|{a}\n' for q, a in history])
|
13 |
+
user = f"你是一个聪明而且有百年经验的文本分类器. 你的任务是从一段文本里面提取出相应的分类结果签。你的回答必须用统一的格式。文本用```符号分割。分类类型保存在一个数组里{type_arr}\n{history_txt}输入|```{src_txt}```输出|"
|
14 |
# Call the OpenAI API
|
15 |
completion = openai.ChatCompletion.create(
|
16 |
model="gpt-3.5-turbo",
|
|
|
36 |
type_arr_txt = "天气查询、股票查询、其他"
|
37 |
type_arr = txt_2_list(type_arr_txt)
|
38 |
txts = [
|
39 |
+
'这个商品真不错',
|
40 |
+
'用着不行',
|
41 |
+
'没用过这么好的东西',
|
42 |
+
# '今天天气怎么样',
|
43 |
+
]
|
44 |
+
history = [
|
45 |
+
['这个商品真不错', ['其他']],
|
46 |
]
|
47 |
for txt in txts:
|
48 |
+
result = text_classification(txt, type_arr, history)
|
49 |
print(txt, result)
|
utils/anno/ner/__pycache__/entity_extract.cpython-310.pyc
CHANGED
Binary files a/utils/anno/ner/__pycache__/entity_extract.cpython-310.pyc and b/utils/anno/ner/__pycache__/entity_extract.cpython-310.pyc differ
|
|
utils/anno/ner/entity_extract.py
CHANGED
@@ -49,8 +49,9 @@ def extract_named_entities(src_txt, type_arr):
|
|
49 |
if ready_keys.__contains__(ready_key):
|
50 |
continue
|
51 |
item['start'] = i
|
52 |
-
item['end'] = i + len(item['name'])
|
53 |
break
|
|
|
|
|
54 |
# 将在实体类型里的放入结果
|
55 |
result.append(item)
|
56 |
ready_key = get_ready_key(item['name'], item['type'], item['start'])
|
|
|
49 |
if ready_keys.__contains__(ready_key):
|
50 |
continue
|
51 |
item['start'] = i
|
|
|
52 |
break
|
53 |
+
# 确保实体结尾坐标正确
|
54 |
+
item['end'] = item['start'] + len(item['name'])
|
55 |
# 将在实体类型里的放入结果
|
56 |
result.append(item)
|
57 |
ready_key = get_ready_key(item['name'], item['type'], item['start'])
|