IlyaGusev commited on
Commit
3055013
1 Parent(s): bae456b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -54
app.py CHANGED
@@ -26,16 +26,6 @@ from llama_cpp import Llama
26
 
27
 
28
  SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
29
- SYSTEM_TOKEN = 1788
30
- USER_TOKEN = 1404
31
- BOT_TOKEN = 9225
32
- LINEBREAK_TOKEN = 13
33
-
34
- ROLE_TOKENS = {
35
- "user": USER_TOKEN,
36
- "bot": BOT_TOKEN,
37
- "system": SYSTEM_TOKEN
38
- }
39
 
40
  LOADER_MAPPING = {
41
  ".csv": (CSVLoader, {}),
@@ -52,37 +42,42 @@ LOADER_MAPPING = {
52
  ".txt": (TextLoader, {"encoding": "utf8"}),
53
  }
54
 
55
- directory = "."
56
- model_url = "https://huggingface.co/IlyaGusev/saiga2_13b_gguf/resolve/main/model-q4_K.gguf"
57
- repo_name = "IlyaGusev/saiga2_13b_gguf"
58
- model_name = "model-q4_K.gguf"
59
- final_model_path = os.path.join(directory, model_name)
60
- embedder_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
61
-
62
- print("Downloading all files...")
63
- rm_files = [os.path.join(directory, f) for f in os.listdir(directory)]
64
- for f in rm_files:
65
- if os.path.isfile(f):
66
- os.remove(f)
67
- else:
68
- shutil.rmtree(f)
69
-
70
- if not os.path.exists(final_model_path):
71
- with open(final_model_path, "wb") as f:
72
- http_get(model_url, f)
73
- os.chmod(final_model_path, 0o777)
74
- print("Files downloaded!")
75
-
76
- model = Llama(
77
- model_path=final_model_path,
78
- n_ctx=2000,
79
- n_parts=1,
80
- )
81
 
82
- print("Model loaded!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- max_new_tokens = 1500
85
- embeddings = HuggingFaceEmbeddings(model_name=embedder_name)
86
 
87
  def get_uuid():
88
  return str(uuid4())
@@ -97,11 +92,9 @@ def load_single_document(file_path: str) -> Document:
97
 
98
 
99
  def get_message_tokens(model, role, content):
100
- message_tokens = model.tokenize(content.encode("utf-8"))
101
- message_tokens.insert(1, ROLE_TOKENS[role])
102
- message_tokens.insert(2, LINEBREAK_TOKEN)
103
- message_tokens.append(model.token_eos())
104
- return message_tokens
105
 
106
 
107
  def get_system_tokens(model):
@@ -136,7 +129,7 @@ def build_index(file_paths, db, chunk_size, chunk_overlap, file_warning):
136
 
137
  db = Chroma.from_documents(
138
  fixed_documents,
139
- embeddings,
140
  client_settings=Settings(
141
  anonymized_telemetry=False
142
  )
@@ -151,7 +144,7 @@ def user(message, history, system_prompt):
151
 
152
 
153
  def retrieve(history, db, retrieved_docs, k_documents):
154
- context = ""
155
  if db:
156
  last_user_message = history[-1][0]
157
  retriever = db.as_retriever(search_kwargs={"k": k_documents})
@@ -172,25 +165,25 @@ def bot(
172
  if not history:
173
  return
174
 
175
- tokens = get_system_tokens(model)[:]
176
  tokens.append(LINEBREAK_TOKEN)
177
 
178
  for user_message, bot_message in history[:-1]:
179
- message_tokens = get_message_tokens(model=model, role="user", content=user_message)
180
  tokens.extend(message_tokens)
181
  if bot_message:
182
- message_tokens = get_message_tokens(model=model, role="bot", content=bot_message)
183
  tokens.extend(message_tokens)
184
 
185
  last_user_message = history[-1][0]
186
  if retrieved_docs:
187
  last_user_message = f"Контекст: {retrieved_docs}\n\nИспользуя контекст, ответь на вопрос: {last_user_message}"
188
- message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
189
  tokens.extend(message_tokens)
190
 
191
- role_tokens = [model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
192
  tokens.extend(role_tokens)
193
- generator = model.generate(
194
  tokens,
195
  top_k=top_k,
196
  top_p=top_p,
@@ -199,9 +192,9 @@ def bot(
199
 
200
  partial_text = ""
201
  for i, token in enumerate(generator):
202
- if token == model.token_eos() or (max_new_tokens is not None and i >= max_new_tokens):
203
  break
204
- partial_text += model.detokenize([token]).decode("utf-8", "ignore")
205
  history[-1][1] = partial_text
206
  yield history
207
 
 
26
 
27
 
28
  SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
 
 
 
 
 
 
 
 
 
 
29
 
30
  LOADER_MAPPING = {
31
  ".csv": (CSVLoader, {}),
 
42
  ".txt": (TextLoader, {"encoding": "utf8"}),
43
  }
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ def load_model(
47
+ directory: str = ".",
48
+ model_name: str = "model-q4_K.gguf",
49
+ model_url: str = "https://huggingface.co/IlyaGusev/saiga2_13b_gguf/resolve/main/model-q4_K.gguf"
50
+ ):
51
+ final_model_path = os.path.join(directory, model_name)
52
+
53
+ print("Downloading all files...")
54
+ rm_files = [os.path.join(directory, f) for f in os.listdir(directory)]
55
+ for f in rm_files:
56
+ if os.path.isfile(f):
57
+ os.remove(f)
58
+ else:
59
+ shutil.rmtree(f)
60
+
61
+ if not os.path.exists(final_model_path):
62
+ with open(final_model_path, "wb") as f:
63
+ http_get(model_url, f)
64
+ os.chmod(final_model_path, 0o777)
65
+ print("Files downloaded!")
66
+
67
+ model = Llama(
68
+ model_path=final_model_path,
69
+ n_ctx=2000,
70
+ n_parts=1,
71
+ )
72
+
73
+ print("Model loaded!")
74
+ return model
75
+
76
+ MAX_NEW_TOKENS = 1500
77
+ EMBEDDER_NAME = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
78
+ EMBEDDER = HuggingFaceEmbeddings(model_name=EMBEDDER_NAME)
79
+ MODEL = load_model()
80
 
 
 
81
 
82
  def get_uuid():
83
  return str(uuid4())
 
92
 
93
 
94
  def get_message_tokens(model, role, content):
95
+ content = f"{role}\n{content}\n</s>"
96
+ content = content.encode("utf-8")
97
+ return model.tokenize(content, special=True)
 
 
98
 
99
 
100
  def get_system_tokens(model):
 
129
 
130
  db = Chroma.from_documents(
131
  fixed_documents,
132
+ EMBEDDER,
133
  client_settings=Settings(
134
  anonymized_telemetry=False
135
  )
 
144
 
145
 
146
  def retrieve(history, db, retrieved_docs, k_documents):
147
+ retrieved_docs = ""
148
  if db:
149
  last_user_message = history[-1][0]
150
  retriever = db.as_retriever(search_kwargs={"k": k_documents})
 
165
  if not history:
166
  return
167
 
168
+ tokens = get_system_tokens(MODEL)[:]
169
  tokens.append(LINEBREAK_TOKEN)
170
 
171
  for user_message, bot_message in history[:-1]:
172
+ message_tokens = get_message_tokens(model=MODEL, role="user", content=user_message)
173
  tokens.extend(message_tokens)
174
  if bot_message:
175
+ message_tokens = get_message_tokens(model=MODEL, role="bot", content=bot_message)
176
  tokens.extend(message_tokens)
177
 
178
  last_user_message = history[-1][0]
179
  if retrieved_docs:
180
  last_user_message = f"Контекст: {retrieved_docs}\n\nИспользуя контекст, ответь на вопрос: {last_user_message}"
181
+ message_tokens = get_message_tokens(model=MODEL, role="user", content=last_user_message)
182
  tokens.extend(message_tokens)
183
 
184
+ role_tokens = [MODEL.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
185
  tokens.extend(role_tokens)
186
+ generator = MODEL.generate(
187
  tokens,
188
  top_k=top_k,
189
  top_p=top_p,
 
192
 
193
  partial_text = ""
194
  for i, token in enumerate(generator):
195
+ if token == MODEL.token_eos() or (MAX_NEW_TOKENS is not None and i >= MAX_NEW_TOKENS):
196
  break
197
+ partial_text += MODEL.detokenize([token]).decode("utf-8", "ignore")
198
  history[-1][1] = partial_text
199
  yield history
200