Odi

Running on Zero

eaglesarezzo commited on Sep 26

Commit

e6fab7f

•

1 Parent(s): ca44406

Update rag_backend.py

Files changed (1) hide show

rag_backend.py CHANGED Viewed

@@ -27,8 +27,23 @@ class Backend:
     def create_index_for_query_engine(self, matched_path):
-        documents = SimpleDirectoryReader(input_dir=matched_path).load_data()
         storage_context = StorageContext.from_defaults()
         nodes = SentenceSplitter(chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n").get_nodes_from_documents(documents)
         index = VectorStoreIndex(nodes, embed_model=self.embed_model)

     def create_index_for_query_engine(self, matched_path):
+        print(f"Attempting to read files from: {matched_path}")
+        documents = []
+        for root, dirs, files in os.walk(matched_path):
+            for file in files:
+                file_path = os.path.join(root, file)
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                        doc = Document(text=content, metadata={"source": file_path})
+                        documents.append(doc)
+                    print(f"Successfully read file: {file_path}")
+                except Exception as e:
+                    print(f"Error reading file {file_path}: {str(e)}")
+        print(f"Number of documents loaded: {len(documents)}")
         storage_context = StorageContext.from_defaults()
         nodes = SentenceSplitter(chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n").get_nodes_from_documents(documents)
         index = VectorStoreIndex(nodes, embed_model=self.embed_model)