Spaces:
eaglesarezzo
/
Running on Zero

eaglesarezzo commited on
Commit
e6fab7f
1 Parent(s): ca44406

Update rag_backend.py

Browse files
Files changed (1) hide show
  1. rag_backend.py +17 -2
rag_backend.py CHANGED
@@ -27,8 +27,23 @@ class Backend:
27
 
28
 
29
  def create_index_for_query_engine(self, matched_path):
30
-
31
- documents = SimpleDirectoryReader(input_dir=matched_path).load_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  storage_context = StorageContext.from_defaults()
33
  nodes = SentenceSplitter(chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n").get_nodes_from_documents(documents)
34
  index = VectorStoreIndex(nodes, embed_model=self.embed_model)
 
27
 
28
 
29
  def create_index_for_query_engine(self, matched_path):
30
+ print(f"Attempting to read files from: {matched_path}")
31
+
32
+ documents = []
33
+ for root, dirs, files in os.walk(matched_path):
34
+ for file in files:
35
+ file_path = os.path.join(root, file)
36
+ try:
37
+ with open(file_path, 'r', encoding='utf-8') as f:
38
+ content = f.read()
39
+ doc = Document(text=content, metadata={"source": file_path})
40
+ documents.append(doc)
41
+ print(f"Successfully read file: {file_path}")
42
+ except Exception as e:
43
+ print(f"Error reading file {file_path}: {str(e)}")
44
+
45
+ print(f"Number of documents loaded: {len(documents)}")
46
+
47
  storage_context = StorageContext.from_defaults()
48
  nodes = SentenceSplitter(chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n").get_nodes_from_documents(documents)
49
  index = VectorStoreIndex(nodes, embed_model=self.embed_model)