Spaces:

sabazo
/

insurance_advisor_wb

Sleeping

App Files Files Community

sabazo commited on Jul 9

Commit

429c288

•

2 Parent(s): 6d93a64 af07445

Merge pull request #19 from almutareb/fetch_vectortosre_hfspace

Browse files

Files changed (2) hide show

rag_app/loading_data/load_S3_vector_stores.py +33 -31
rag_app/structured_tools/structured_tools.py +3 -1

rag_app/loading_data/load_S3_vector_stores.py CHANGED Viewed

@@ -32,41 +32,43 @@ embeddings = SentenceTransformerEmbeddings(model_name=model_name)
 ## FAISS
 def get_faiss_vs():
-    # Initialize an S3 client with unsigned configuration for public access
-    s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
-    # Define the destination for the downloaded file
-    VS_DESTINATION = FAISS_INDEX_PATH + ".zip"
-    try:
-        # Download the pre-prepared vectorized index from the S3 bucket
-        print("Downloading the pre-prepared FAISS vectorized index from S3...")
-        s3.download_file(S3_LOCATION, FAISS_VS_NAME, VS_DESTINATION)
-        # Extract the downloaded zip file
-        with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
-            zip_ref.extractall('./vectorstore/')
-        print("Download and extraction completed.")
-        return FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
-    except Exception as e:
-        print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)
-    #faissdb = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
 ## Chroma DB
 def get_chroma_vs():
-    # Initialize an S3 client with unsigned configuration for public access
-    s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
-    VS_DESTINATION = CHROMA_DIRECTORY+".zip"
-    try:
-        # Download the pre-prepared vectorized index from the S3 bucket
-        print("Downloading the pre-prepared chroma vectorstore from S3...")
-        s3.download_file(S3_LOCATION, CHROMA_VS_NAME, VS_DESTINATION)
-        with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
-            zip_ref.extractall('./vectorstore/')
-        print("Download and extraction completed.")
-        chromadb = Chroma(persist_directory=CHROMA_DIRECTORY, embedding_function=embeddings)
-        #chromadb.get()
-    except Exception as e:
-        print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)

 ## FAISS
 def get_faiss_vs():
+    if os.listdir(FAISS_INDEX_PATH) == 0:
+        # Initialize an S3 client with unsigned configuration for public access
+        s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
+        # Define the destination for the downloaded file
+        VS_DESTINATION = FAISS_INDEX_PATH + ".zip"
+        try:
+            # Download the pre-prepared vectorized index from the S3 bucket
+            print("Downloading the pre-prepared FAISS vectorized index from S3...")
+            s3.download_file(S3_LOCATION, FAISS_VS_NAME, VS_DESTINATION)
+            # Extract the downloaded zip file
+            with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
+                zip_ref.extractall('./vectorstore/')
+            print("Download and extraction completed.")
+            return FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
+        except Exception as e:
+            print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)
+        #faissdb = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
 ## Chroma DB
 def get_chroma_vs():
+    if os.listdir(CHROMA_DIRECTORY) == 0:
+        # Initialize an S3 client with unsigned configuration for public access
+        s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
+        VS_DESTINATION = CHROMA_DIRECTORY+".zip"
+        try:
+            # Download the pre-prepared vectorized index from the S3 bucket
+            print("Downloading the pre-prepared chroma vectorstore from S3...")
+            s3.download_file(S3_LOCATION, CHROMA_VS_NAME, VS_DESTINATION)
+            with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
+                zip_ref.extractall('./vectorstore/')
+            print("Download and extraction completed.")
+            chromadb = Chroma(persist_directory=CHROMA_DIRECTORY, embedding_function=embeddings)
+            #chromadb.get()
+        except Exception as e:
+            print(f"Error during downloading or extracting from S3: {e}", file=sys.stderr)

rag_app/structured_tools/structured_tools.py CHANGED Viewed

@@ -8,7 +8,7 @@ from langchain_community.embeddings.sentence_transformer import (
 )
 from langchain_community.vectorstores import Chroma
 import ast
 import chromadb
 from rag_app.utils.utils import (
@@ -23,6 +23,8 @@ import os
 persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
 embedding_model = os.getenv("EMBEDDING_MODEL")
 @tool
 def memory_search(query:str) -> str:

 )
 from langchain_community.vectorstores import Chroma
 import ast
+from rag_app.loading_data.load_S3_vector_stores import get_chroma_vs
 import chromadb
 from rag_app.utils.utils import (
 persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
 embedding_model = os.getenv("EMBEDDING_MODEL")
+if os.listdir(persist_directory) == 0:
+    get_chroma_vs()
 @tool
 def memory_search(query:str) -> str: