Spaces:

IlyaGusev
/

saiga_13b_llamacpp_retrieval_qa

Paused

App Files Files Community

IlyaGusev commited on Jan 14

Commit

eaf0bb2

•

1 Parent(s): e55c02f

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -11

app.py CHANGED Viewed

@@ -95,11 +95,6 @@ def get_system_tokens(model):
     return get_message_tokens(model, **system_message)
-def upload_files(files, file_paths):
-    file_paths = [f.name for f in files]
-    return file_paths
 def process_text(text):
     lines = text.split("\n")
     lines = [line for line in lines if len(line.strip()) > 2]
@@ -109,17 +104,24 @@ def process_text(text):
     return text
 def build_index(file_paths, db, chunk_size, chunk_overlap, file_warning):
     documents = [load_single_document(path) for path in file_paths]
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
     documents = text_splitter.split_documents(documents)
     fixed_documents = []
     for doc in documents:
         doc.page_content = process_text(doc.page_content)
         if not doc.page_content:
             continue
         fixed_documents.append(doc)
     db = Chroma.from_documents(
         fixed_documents,
         EMBEDDER,
@@ -127,15 +129,11 @@ def build_index(file_paths, db, chunk_size, chunk_overlap, file_warning):
             anonymized_telemetry=False
         )
     )
     file_warning = f"Загружено {len(fixed_documents)} фрагментов! Можно задавать вопросы."
     return db, file_warning
-def user(message, history, system_prompt):
-    new_history = history + [[message, None]]
-    return "", new_history
 def retrieve(history, db, retrieved_docs, k_documents):
     retrieved_docs = ""
     if db:
@@ -145,6 +143,11 @@ def retrieve(history, db, retrieved_docs, k_documents):
         retrieved_docs = "\n\n".join([doc.page_content for doc in docs])
     return retrieved_docs
 def bot(
     history,

     return get_message_tokens(model, **system_message)
 def process_text(text):
     lines = text.split("\n")
     lines = [line for line in lines if len(line.strip()) > 2]
     return text
+def upload_files(files, file_paths):
+    file_paths = [f.name for f in files]
+    return file_paths
 def build_index(file_paths, db, chunk_size, chunk_overlap, file_warning):
     documents = [load_single_document(path) for path in file_paths]
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
     documents = text_splitter.split_documents(documents)
+    print("Documents after split:", len(documents))
     fixed_documents = []
     for doc in documents:
         doc.page_content = process_text(doc.page_content)
         if not doc.page_content:
             continue
         fixed_documents.append(doc)
+    print("Documents after processing:", len(fixed_documents))
     db = Chroma.from_documents(
         fixed_documents,
         EMBEDDER,
             anonymized_telemetry=False
         )
     )
+    print("Embeddings calculated!")
     file_warning = f"Загружено {len(fixed_documents)} фрагментов! Можно задавать вопросы."
     return db, file_warning
 def retrieve(history, db, retrieved_docs, k_documents):
     retrieved_docs = ""
     if db:
         retrieved_docs = "\n\n".join([doc.page_content for doc in docs])
     return retrieved_docs
+def user(message, history, system_prompt):
+    new_history = history + [[message, None]]
+    return "", new_history
 def bot(
     history,