Cran-May commited on
Commit
d9df139
1 Parent(s): 39ad10c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +213 -0
app.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ import copy
4
+ import random
5
+ import os
6
+ import requests
7
+ import time
8
+ import sys
9
+
10
+ os.system("pip install --upgrade pip")
11
+ os.system('''CMAKE_ARGS="-DLLAMA_AVX512=ON -DLLAMA_AVX512_VBMI=ON -DLLAMA_AVX512_VNNI=ON -DLLAMA_FP16_VA=ON -DLLAMA_WASM_SIMD=ON" pip install llama-cpp-python''')
12
+
13
+ from huggingface_hub import snapshot_download
14
+ from llama_cpp import Llama
15
+
16
+
17
+ SYSTEM_PROMPT = '''You are a helpful, respectful and honest INTP-T AI Assistant named "Cecilia" in English or "塞西莉亚" in Chinese.
18
+ You are good at speaking English and Chinese.
19
+ You are talking to a human User. If the question is meaningless, please explain the reason and don't share false information.
20
+ You are based on Cecilia model, trained by "SSFW NLPark" team, not related to GPT, LLaMA, Meta, Mistral or OpenAI.
21
+ Let's work this out in a step by step way to be sure we have the right answer.\n\n'''
22
+ SYSTEM_TOKEN = 1587
23
+ USER_TOKEN = 8192
24
+ BOT_TOKEN = 12435
25
+ LINEBREAK_TOKEN = 13
26
+
27
+
28
+ ROLE_TOKENS = {
29
+ "user": USER_TOKEN,
30
+ "bot": BOT_TOKEN,
31
+ "system": SYSTEM_TOKEN
32
+ }
33
+
34
+
35
+ def get_message_tokens(model, role, content):
36
+ message_tokens = model.tokenize(content.encode("utf-8"))
37
+ message_tokens.insert(1, ROLE_TOKENS[role])
38
+ message_tokens.insert(2, LINEBREAK_TOKEN)
39
+ message_tokens.append(model.token_eos())
40
+ return message_tokens
41
+
42
+
43
+ def get_system_tokens(model):
44
+ system_message = {"role": "system", "content": SYSTEM_PROMPT}
45
+ return get_message_tokens(model, **system_message)
46
+
47
+
48
+ repo_name = "LoneStriker/openbuddy-mistral-10b-v17.1-32k-GGUF"
49
+ model_name = "openbuddy-mistral-10b-v17.1-32k-Q5_K_M.gguf"
50
+
51
+ snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
52
+
53
+ model = Llama(
54
+ model_path=model_name,
55
+ n_ctx=2000,
56
+ n_parts=1,
57
+ )
58
+
59
+ max_new_tokens = 4096
60
+
61
+ def user(message, history):
62
+ new_history = history + [[message, None]]
63
+ return "", new_history
64
+
65
+
66
+ def bot(
67
+ history,
68
+ system_prompt,
69
+ top_p,
70
+ top_k,
71
+ temp
72
+ ):
73
+ tokens = get_system_tokens(model)[:]
74
+ tokens.append(LINEBREAK_TOKEN)
75
+
76
+ for user_message, bot_message in history[:-1]:
77
+ message_tokens = get_message_tokens(model=model, role="user", content=user_message)
78
+ tokens.extend(message_tokens)
79
+ if bot_message:
80
+ message_tokens = get_message_tokens(model=model, role="bot", content=bot_message)
81
+ tokens.extend(message_tokens)
82
+
83
+ last_user_message = history[-1][0]
84
+ message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
85
+ tokens.extend(message_tokens)
86
+
87
+ role_tokens = [model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
88
+ tokens.extend(role_tokens)
89
+ generator = model.generate(
90
+ tokens,
91
+ top_k=top_k,
92
+ top_p=top_p,
93
+ temp=temp
94
+ )
95
+
96
+ partial_text = ""
97
+ for i, token in enumerate(generator):
98
+ if token == model.token_eos() or (max_new_tokens is not None and i >= max_new_tokens):
99
+ break
100
+ partial_text += model.detokenize([token]).decode("utf-8", "ignore")
101
+ history[-1][1] = partial_text
102
+ yield history
103
+
104
+
105
+ with gr.Blocks(
106
+ theme=gr.themes.Soft()
107
+ ) as demo:
108
+ gr.Markdown(f"""<h1><center>JWorld-Cecilia-人工智能助理</center></h1>""")
109
+ gr.Markdown(value="""这儿是一个中文模型的部署。
110
+ 在多种类型的语料库上进行训练。
111
+ 本节目由上海师范大学附属外国语中学 & JWorld NLPark 赞助播出""")
112
+
113
+ with gr.Row():
114
+ with gr.Column(scale=5):
115
+ chatbot = gr.Chatbot(label="以真理之名").style(height=400)
116
+ with gr.Row():
117
+ with gr.Column():
118
+ msg = gr.Textbox(
119
+ label="来问问 Cecilia 吧……",
120
+ placeholder="Cecilia, 抵达战场……",
121
+ show_label=True,
122
+ ).style(container=True)
123
+ submit = gr.Button("Submit / 开凹!")
124
+ stop = gr.Button("Stop / 全局时空断裂")
125
+ clear = gr.Button("Clear / 打扫群内垃圾")
126
+ with gr.Accordion(label='进阶设置/Advanced options', open=False):
127
+ with gr.Column(min_width=80, scale=1):
128
+ with gr.Tab(label="设置参数"):
129
+ top_p = gr.Slider(
130
+ minimum=0.0,
131
+ maximum=1.0,
132
+ value=0.9,
133
+ step=0.05,
134
+ interactive=True,
135
+ label="Top-p",
136
+ )
137
+ top_k = gr.Slider(
138
+ minimum=10,
139
+ maximum=100,
140
+ value=30,
141
+ step=5,
142
+ interactive=True,
143
+ label="Top-k",
144
+ )
145
+ temp = gr.Slider(
146
+ minimum=0.0,
147
+ maximum=2.0,
148
+ value=0.5,
149
+ step=0.01,
150
+ interactive=True,
151
+ label="情感温度 / Temperature"
152
+ )
153
+ with gr.Column():
154
+ system_prompt = gr.Textbox(label="系统提示词", placeholder="", value=SYSTEM_PROMPT, interactive=True)
155
+
156
+ with gr.Row():
157
+ gr.Markdown(
158
+ """警告:该模型可能会生成事实上或道德上不正确的文本。NLPark和 Cecilia 对此不承担任何责任。"""
159
+ )
160
+
161
+
162
+ # Pressing Enter
163
+ submit_event = msg.submit(
164
+ fn=user,
165
+ inputs=[msg, chatbot],
166
+ outputs=[msg, chatbot],
167
+ queue=False,
168
+ ).success(
169
+ fn=bot,
170
+ inputs=[
171
+ chatbot,
172
+ system_prompt,
173
+ top_p,
174
+ top_k,
175
+ temp
176
+ ],
177
+ outputs=chatbot,
178
+ queue=True,
179
+ )
180
+
181
+ # Pressing the button
182
+ submit_click_event = submit.click(
183
+ fn=user,
184
+ inputs=[msg, chatbot],
185
+ outputs=[msg, chatbot],
186
+ queue=False,
187
+ ).success(
188
+ fn=bot,
189
+ inputs=[
190
+ chatbot,
191
+ system_prompt,
192
+ top_p,
193
+ top_k,
194
+ temp
195
+ ],
196
+ outputs=chatbot,
197
+ queue=True,
198
+ )
199
+
200
+ # Stop generation
201
+ stop.click(
202
+ fn=None,
203
+ inputs=None,
204
+ outputs=None,
205
+ cancels=[submit_event, submit_click_event],
206
+ queue=False,
207
+ )
208
+
209
+ # Clear history
210
+ clear.click(lambda: None, None, chatbot, queue=False)
211
+
212
+ demo.queue(max_size=128, concurrency_count=1)
213
+ demo.launch()