Spaces:

BitBasher
/

EduConnect

Sleeping

App Files Files Community

dtyago commited on Feb 21

Commit

11fcf53

•

1 Parent(s): 045145e

Implemented async for performance gain

Browse files

Files changed (3) hide show

app/api/userchat.py +1 -1
app/main.py +4 -3
app/utils/chat_rag.py +37 -34

app/api/userchat.py CHANGED Viewed

@@ -11,7 +11,7 @@ async def chat_with_llama(user_input: str = Body(..., embed=True), current_user:
     # Example logic for model inference (pseudo-code, adjust as necessary)
     try:
         user_id = current_user["user_id"]
-        model_response = llm_infer(user_collection_name=sanitize_collection_name(user_id), prompt=user_input)
         # Optionally, store chat history
         # chromadb_face_helper.store_chat_history(user_id=current_user["user_id"], user_input=user_input, model_response=model_response)
     except Exception as e:

     # Example logic for model inference (pseudo-code, adjust as necessary)
     try:
         user_id = current_user["user_id"]
+        model_response = await llm_infer(user_collection_name=sanitize_collection_name(user_id), prompt=user_input)
         # Optionally, store chat history
         # chromadb_face_helper.store_chat_history(user_id=current_user["user_id"], user_input=user_input, model_response=model_response)
     except Exception as e:

app/main.py CHANGED Viewed

@@ -11,7 +11,7 @@ from admin import admin_functions as admin
 from utils.db import UserFaceEmbeddingFunction,ChromaDBFaceHelper
 from api import userlogin, userlogout, userchat, userupload
 from utils.db import ChromaDBFaceHelper
-from utils.chat_rag import load_llm
 app = FastAPI()
@@ -42,8 +42,9 @@ async def startup_event():
     chromadb_face_helper = ChromaDBFaceHelper(db_path) # Used by APIs
     # Perform any other startup tasks here
-    # Load the LLM is a singleton class call
-    load_llm()
     print(f"MODEL_PATH in main.py = {os.getenv('MODEL_PATH')} ")

 from utils.db import UserFaceEmbeddingFunction,ChromaDBFaceHelper
 from api import userlogin, userlogout, userchat, userupload
 from utils.db import ChromaDBFaceHelper
+from utils.chat_rag import LlamaModelSingleton
 app = FastAPI()
     chromadb_face_helper = ChromaDBFaceHelper(db_path) # Used by APIs
     # Perform any other startup tasks here
+    # Preload the LLM model
+    await LlamaModelSingleton.get_instance()
+    print("LLM model loaded and ready.")
     print(f"MODEL_PATH in main.py = {os.getenv('MODEL_PATH')} ")

app/utils/chat_rag.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import os
 import re
 import hashlib
 from langchain.document_loaders import PyPDFLoader
@@ -47,7 +48,7 @@ def sanitize_collection_name(email):
 # Modify vectordb initialization to be dynamic based on user_id
-def get_vectordb_for_user(user_collection_name):
     # Get Chromadb location
     CHROMADB_LOC = os.getenv('CHROMADB_LOC')
@@ -60,9 +61,9 @@ def get_vectordb_for_user(user_collection_name):
 vectordb_cache = {}
-def get_vectordb_for_user_cached(user_collection_name):
     if user_collection_name not in vectordb_cache:
-        vectordb_cache[user_collection_name] = get_vectordb_for_user(user_collection_name)
     return vectordb_cache[user_collection_name]
@@ -93,42 +94,44 @@ def pdf_to_vec(filename, user_collection_name):
     return(vectordb)
     #return collection  # Return the collection as the asset
 class LlamaModelSingleton:
     _instance = None
-    def __new__(cls):
         if cls._instance is None:
-            print('Loading LLM model...')
-            cls._instance = super(LlamaModelSingleton, cls).__new__(cls)
-            # Model loading logic
-            model_path = os.getenv("MODEL_PATH")
-            cls._instance.llm = LlamaCpp(
-                #streaming = True,
-                model_path=model_path,
-                n_gpu_layers=-1,
-                n_batch=512,
-                temperature=0.1,
-                top_p=1,
-                #verbose=False,
-                #callback_manager=callback_manager,
-                max_tokens=2000,
-            )
-            print(f'Model loaded from {model_path}')
-        return cls._instance.llm
-def load_llm():
-    return LlamaModelSingleton()
 #step 5, to instantiate once to create default_chain,router_chain,destination_chains into chain and set vectordb. so will not re-create per prompt
-def default_chain(llm, user_collection_name):
     # Get Chromadb location
     CHROMADB_LOC = os.getenv('CHROMADB_LOC')
-    vectordb = get_vectordb_for_user_cached(user_collection_name)  # Use the dynamic vectordb based on user_id
     sum_template = """
     As a machine learning education specialist, our expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.
@@ -209,13 +212,13 @@ def default_chain(llm, user_collection_name):
     return default_chain,router_chain,destination_chains
 # Adjust llm_infer to accept user_id and use it for user-specific processing
-def llm_infer(user_collection_name, prompt):
-    llm = load_llm()  # load_llm is singleton for entire system
-    vectordb = get_vectordb_for_user_cached(user_collection_name) # Vector collection for each us.
-    default_chain, router_chain, destination_chains = get_or_create_chain(user_collection_name, llm)  # Now user-specific
     chain = MultiPromptChain(
         router_chain=router_chain,
@@ -231,13 +234,13 @@ def llm_infer(user_collection_name, prompt):
 # Assuming a simplified caching mechanism for demonstration
 chain_cache = {}
-def get_or_create_chain(user_collection_name, llm):
     if 'default_chain' in chain_cache and 'router_chain' in chain_cache:
         default_chain = chain_cache['default_chain']
         router_chain = chain_cache['router_chain']
         destination_chains = chain_cache['destination_chains']
     else:
-        vectordb = get_vectordb_for_user_cached(user_collection_name)  # User-specific vector database
         sum_template = """
         As a machine learning education specialist, our expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.

 import os
 import re
 import hashlib
+import asyncio
 from langchain.document_loaders import PyPDFLoader
 # Modify vectordb initialization to be dynamic based on user_id
+async def get_vectordb_for_user(user_collection_name):
     # Get Chromadb location
     CHROMADB_LOC = os.getenv('CHROMADB_LOC')
 vectordb_cache = {}
+async def get_vectordb_for_user_cached(user_collection_name):
     if user_collection_name not in vectordb_cache:
+        vectordb_cache[user_collection_name] = await get_vectordb_for_user(user_collection_name)
     return vectordb_cache[user_collection_name]
     return(vectordb)
     #return collection  # Return the collection as the asset
+# Assuming LlamaModelSingleton is updated to support async instantiation
 class LlamaModelSingleton:
     _instance = None
+    @classmethod
+    async def get_instance(cls):
         if cls._instance is None:
+            cls._instance = cls._load_llm()  # Assuming _load_llm is synchronous, if not, use an executor
+        return cls._instance
+    @staticmethod
+    def _load_llm():
+        print('Loading LLM model...')
+        model_path = os.getenv("MODEL_PATH")
+        llm = LlamaCpp(
+            model_path=model_path,
+            n_gpu_layers=-1,
+            n_batch=512,
+            temperature=0.1,
+            top_p=1,
+            max_tokens=2000,
+        )
+        print(f'Model loaded from {model_path}')
+        return llm
+async def load_llm():
+    return await LlamaModelSingleton.get_instance()
 #step 5, to instantiate once to create default_chain,router_chain,destination_chains into chain and set vectordb. so will not re-create per prompt
+async def default_chain(llm, user_collection_name):
     # Get Chromadb location
     CHROMADB_LOC = os.getenv('CHROMADB_LOC')
+    vectordb = await get_vectordb_for_user_cached(user_collection_name)  # Use the dynamic vectordb based on user_id
     sum_template = """
     As a machine learning education specialist, our expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.
     return default_chain,router_chain,destination_chains
 # Adjust llm_infer to accept user_id and use it for user-specific processing
+async def llm_infer(user_collection_name, prompt):
+    llm = await load_llm()  # load_llm is singleton for entire system
+    vectordb = await get_vectordb_for_user_cached(user_collection_name) # Vector collection for each us.
+    default_chain, router_chain, destination_chains = await get_or_create_chain(user_collection_name, llm)  # Now user-specific
     chain = MultiPromptChain(
         router_chain=router_chain,
 # Assuming a simplified caching mechanism for demonstration
 chain_cache = {}
+async def get_or_create_chain(user_collection_name, llm):
     if 'default_chain' in chain_cache and 'router_chain' in chain_cache:
         default_chain = chain_cache['default_chain']
         router_chain = chain_cache['router_chain']
         destination_chains = chain_cache['destination_chains']
     else:
+        vectordb = await get_vectordb_for_user_cached(user_collection_name)  # User-specific vector database
         sum_template = """
         As a machine learning education specialist, our expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.