IlyaGusev commited on
Commit
eaf0bb2
1 Parent(s): e55c02f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -95,11 +95,6 @@ def get_system_tokens(model):
95
  return get_message_tokens(model, **system_message)
96
 
97
 
98
- def upload_files(files, file_paths):
99
- file_paths = [f.name for f in files]
100
- return file_paths
101
-
102
-
103
  def process_text(text):
104
  lines = text.split("\n")
105
  lines = [line for line in lines if len(line.strip()) > 2]
@@ -109,17 +104,24 @@ def process_text(text):
109
  return text
110
 
111
 
 
 
 
 
 
112
  def build_index(file_paths, db, chunk_size, chunk_overlap, file_warning):
113
  documents = [load_single_document(path) for path in file_paths]
114
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
115
  documents = text_splitter.split_documents(documents)
 
116
  fixed_documents = []
117
  for doc in documents:
118
  doc.page_content = process_text(doc.page_content)
119
  if not doc.page_content:
120
  continue
121
  fixed_documents.append(doc)
122
-
 
123
  db = Chroma.from_documents(
124
  fixed_documents,
125
  EMBEDDER,
@@ -127,15 +129,11 @@ def build_index(file_paths, db, chunk_size, chunk_overlap, file_warning):
127
  anonymized_telemetry=False
128
  )
129
  )
 
130
  file_warning = f"Загружено {len(fixed_documents)} фрагментов! Можно задавать вопросы."
131
  return db, file_warning
132
 
133
 
134
- def user(message, history, system_prompt):
135
- new_history = history + [[message, None]]
136
- return "", new_history
137
-
138
-
139
  def retrieve(history, db, retrieved_docs, k_documents):
140
  retrieved_docs = ""
141
  if db:
@@ -145,6 +143,11 @@ def retrieve(history, db, retrieved_docs, k_documents):
145
  retrieved_docs = "\n\n".join([doc.page_content for doc in docs])
146
  return retrieved_docs
147
 
 
 
 
 
 
148
 
149
  def bot(
150
  history,
 
95
  return get_message_tokens(model, **system_message)
96
 
97
 
 
 
 
 
 
98
  def process_text(text):
99
  lines = text.split("\n")
100
  lines = [line for line in lines if len(line.strip()) > 2]
 
104
  return text
105
 
106
 
107
+ def upload_files(files, file_paths):
108
+ file_paths = [f.name for f in files]
109
+ return file_paths
110
+
111
+
112
  def build_index(file_paths, db, chunk_size, chunk_overlap, file_warning):
113
  documents = [load_single_document(path) for path in file_paths]
114
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
115
  documents = text_splitter.split_documents(documents)
116
+ print("Documents after split:", len(documents))
117
  fixed_documents = []
118
  for doc in documents:
119
  doc.page_content = process_text(doc.page_content)
120
  if not doc.page_content:
121
  continue
122
  fixed_documents.append(doc)
123
+ print("Documents after processing:", len(fixed_documents))
124
+
125
  db = Chroma.from_documents(
126
  fixed_documents,
127
  EMBEDDER,
 
129
  anonymized_telemetry=False
130
  )
131
  )
132
+ print("Embeddings calculated!")
133
  file_warning = f"Загружено {len(fixed_documents)} фрагментов! Можно задавать вопросы."
134
  return db, file_warning
135
 
136
 
 
 
 
 
 
137
  def retrieve(history, db, retrieved_docs, k_documents):
138
  retrieved_docs = ""
139
  if db:
 
143
  retrieved_docs = "\n\n".join([doc.page_content for doc in docs])
144
  return retrieved_docs
145
 
146
+
147
+ def user(message, history, system_prompt):
148
+ new_history = history + [[message, None]]
149
+ return "", new_history
150
+
151
 
152
  def bot(
153
  history,