dtyago commited on
Commit
10399f1
β€’
1 Parent(s): 16b83b8

Implemented LLM model and wired it to APIs

Browse files
README.md CHANGED
@@ -42,6 +42,6 @@ EduConnect/
42
  β”‚ └── images/ # UI rendering images for administration page
43
  β”œβ”€β”€ Dockerfile # Docker configuration for setting up the environment
44
  β”œβ”€β”€ requirements.txt # Lists all Python library dependencies
45
- β”œβ”€β”€ entrypoint.sh
46
  └── .env # Environment variables for configuration settings
47
  ```
 
42
  β”‚ └── images/ # UI rendering images for administration page
43
  β”œβ”€β”€ Dockerfile # Docker configuration for setting up the environment
44
  β”œβ”€β”€ requirements.txt # Lists all Python library dependencies
45
+ β”œβ”€β”€ entrypoint.sh # To download model from hugging face
46
  └── .env # Environment variables for configuration settings
47
  ```
app/admin/admin_functions.py CHANGED
@@ -20,7 +20,7 @@ async def register_user(db, email: str, name: str, role: str, file: UploadFile =
20
  :return: email
21
  """
22
  unique_filename = f"{email}.jpg" # Use the email as the filename
23
- file_path = f"/home/user/data/tmp/{unique_filename}" # Specify your upload directory
24
 
25
  # Ensure the directory exists
26
  os.makedirs(os.path.dirname(file_path), exist_ok=True)
@@ -35,7 +35,7 @@ async def register_user(db, email: str, name: str, role: str, file: UploadFile =
35
 
36
  if cropped_face is not None:
37
 
38
- # Here you can store the embeddings along with user details in ChromaDB
39
  # chroma_db.save_embeddings(user_id, embeddings)
40
  db.upsert(images=[cropped_face], ids=[email], metadatas=[{"name":name, "role":role}])
41
  return {"status": "User registered successfully", "image": cropped_face}
@@ -68,7 +68,7 @@ def verify_admin_password(submitted_user: str, submitted_password: str) -> bool:
68
  return False
69
 
70
  # Additional Admin Functions
71
- # You could include other administrative functionalities here, such as:
72
  # - Listing all registered users.
73
  # - Moderating chat messages or viewing chat history.
74
  # - Managing system settings or configurations.
 
20
  :return: email
21
  """
22
  unique_filename = f"{email}.jpg" # Use the email as the filename
23
+ file_path = f"/home/user/data/tmp/{unique_filename}" # Specify our upload directory
24
 
25
  # Ensure the directory exists
26
  os.makedirs(os.path.dirname(file_path), exist_ok=True)
 
35
 
36
  if cropped_face is not None:
37
 
38
+ # Here we can store the embeddings along with user details in ChromaDB
39
  # chroma_db.save_embeddings(user_id, embeddings)
40
  db.upsert(images=[cropped_face], ids=[email], metadatas=[{"name":name, "role":role}])
41
  return {"status": "User registered successfully", "image": cropped_face}
 
68
  return False
69
 
70
  # Additional Admin Functions
71
+ # we could include other administrative functionalities here, such as:
72
  # - Listing all registered users.
73
  # - Moderating chat messages or viewing chat history.
74
  # - Managing system settings or configurations.
app/api/userchat.py CHANGED
@@ -1,16 +1,23 @@
1
  from fastapi import APIRouter, Depends, HTTPException, Body
2
  from ..dependencies import get_current_user
 
3
  from typing import Any
4
 
5
  router = APIRouter()
6
 
7
  @router.post("/user/chat")
8
  async def chat_with_llama(user_input: str = Body(..., embed=True), current_user: Any = Depends(get_current_user)):
9
- # Implement your logic to interact with LlamaV2 LLM here.
10
- # Example response, replace with actual chat logic
11
- chat_response = "Hello, how can I assist you today?"
 
 
 
 
 
 
12
  return {
13
- "response": chat_response,
14
  "user_id": current_user["user_id"],
15
  "name": current_user["name"],
16
  "role": current_user["role"]
 
1
  from fastapi import APIRouter, Depends, HTTPException, Body
2
  from ..dependencies import get_current_user
3
+ from ..utils.chat_rag import llm_infer
4
  from typing import Any
5
 
6
  router = APIRouter()
7
 
8
  @router.post("/user/chat")
9
  async def chat_with_llama(user_input: str = Body(..., embed=True), current_user: Any = Depends(get_current_user)):
10
+ # Example logic for model inference (pseudo-code, adjust as necessary)
11
+ try:
12
+ user_id = current_user["user_id"]
13
+ model_response = llm_infer(user_id=user_id, prompt=user_input)
14
+ # Optionally, store chat history
15
+ # chromadb_face_helper.store_chat_history(user_id=current_user["user_id"], user_input=user_input, model_response=model_response)
16
+ except Exception as e:
17
+ raise HTTPException(status_code=500, detail=str(e))
18
+
19
  return {
20
+ "ai_response": model_response,
21
  "user_id": current_user["user_id"],
22
  "name": current_user["name"],
23
  "role": current_user["role"]
app/api/userlogout.py CHANGED
@@ -1,5 +1,5 @@
1
  from fastapi import APIRouter, Depends, HTTPException
2
- from ..utils.db import tinydb_helper # Ensure this import is correct based on your project structure
3
  from ..dependencies import oauth2_scheme
4
 
5
  router = APIRouter()
 
1
  from fastapi import APIRouter, Depends, HTTPException
2
+ from ..utils.db import tinydb_helper # Ensure this import is correct based on our project structure
3
  from ..dependencies import oauth2_scheme
4
 
5
  router = APIRouter()
app/api/userupload.py CHANGED
@@ -1,22 +1,38 @@
1
  from typing import Any
2
- from fastapi import APIRouter, Depends, UploadFile, File
3
  import os
4
  from app.dependencies import get_current_user
 
 
5
 
6
  router = APIRouter()
7
 
8
  @router.post("/user/upload")
9
  async def upload_file(file: UploadFile = File(...), current_user: Any = Depends(get_current_user)):
 
 
 
10
  upload_dir = "/home/user/data/uploads"
11
- # Ensure the upload directory exists
12
- os.makedirs(upload_dir, exist_ok=True) # This creates the directory if it doesn't exist, does nothing otherwise
13
 
14
  file_location = f"{upload_dir}/{file.filename}"
15
  with open(file_location, "wb") as buffer:
16
  contents = await file.read()
17
  buffer.write(contents)
 
 
 
 
 
 
 
 
 
 
 
 
18
  return {
19
- "status": "File uploaded successfully.",
20
  "user_id": current_user["user_id"],
21
  "name": current_user["name"],
22
  "role": current_user["role"]
 
1
  from typing import Any
2
+ from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
3
  import os
4
  from app.dependencies import get_current_user
5
+ # Assuming a utility for processing PDFs and generating embeddings
6
+ from ..utils.doc_ingest import ingest_document
7
 
8
  router = APIRouter()
9
 
10
  @router.post("/user/upload")
11
  async def upload_file(file: UploadFile = File(...), current_user: Any = Depends(get_current_user)):
12
+ if file.content_type != "application/pdf":
13
+ raise HTTPException(status_code=400, detail="Unsupported file type. Please upload a PDF.")
14
+
15
  upload_dir = "/home/user/data/uploads"
16
+ os.makedirs(upload_dir, exist_ok=True)
 
17
 
18
  file_location = f"{upload_dir}/{file.filename}"
19
  with open(file_location, "wb") as buffer:
20
  contents = await file.read()
21
  buffer.write(contents)
22
+
23
+ try:
24
+ # Process PDF and store embeddings
25
+ ingest_document(file_location, current_user["user_id"])
26
+ except Exception as e:
27
+ # If processing fails, attempt to clean up the file before re-raising the error
28
+ os.remove(file_location)
29
+ raise HTTPException(status_code=500, detail=f"Failed to process file: {e}")
30
+
31
+ # Clean up file in uploads directory after successful processing
32
+ os.remove(file_location)
33
+
34
  return {
35
+ "status": "File uploaded and processed successfully.",
36
  "user_id": current_user["user_id"],
37
  "name": current_user["name"],
38
  "role": current_user["role"]
app/dependencies.py CHANGED
@@ -2,7 +2,7 @@ from fastapi import Depends, HTTPException, status
2
  from fastapi.security import OAuth2PasswordBearer
3
  from jose import jwt, JWTError
4
  from .utils.db import tinydb_helper # Ensure correct import path
5
- from .utils.jwt_utils import SECRET_KEY, ALGORITHM # Ensure these are defined in your jwt_utils.py
6
 
7
  oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
8
 
 
2
  from fastapi.security import OAuth2PasswordBearer
3
  from jose import jwt, JWTError
4
  from .utils.db import tinydb_helper # Ensure correct import path
5
+ from .utils.jwt_utils import SECRET_KEY, ALGORITHM # Ensure these are defined in our jwt_utils.py
6
 
7
  oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
8
 
app/main.py CHANGED
@@ -16,7 +16,7 @@ CHROMADB_LOC = "/home/user/data/chromadb"
16
  app = FastAPI()
17
 
18
  # Add middleware
19
- # Set all origins to wildcard for simplicity, but you should limit this in production
20
  app.add_middleware(
21
  CORSMiddleware,
22
  allow_origins=["*"],
 
16
  app = FastAPI()
17
 
18
  # Add middleware
19
+ # Set all origins to wildcard for simplicity, but we should limit this in production
20
  app.add_middleware(
21
  CORSMiddleware,
22
  allow_origins=["*"],
app/utils/chat_rag.py CHANGED
@@ -1,4 +1,5 @@
1
  #list of librarys for requirement.txt
 
2
  from langchain.document_loaders import PyPDFLoader
3
 
4
  # Import embeddings module from langchain for vector representations of text
@@ -26,7 +27,20 @@ from langchain.chains import ConversationalRetrievalChain
26
  from langchain.callbacks.manager import CallbackManager
27
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
28
 
29
- def pdf_to_vec(filename):
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  document = []
31
  loader = PyPDFLoader(filename)
32
  document.extend(loader.load()) #which library is this from?
@@ -41,7 +55,7 @@ def pdf_to_vec(filename):
41
  document_chunks = document_splitter.split_documents(document) #which library is this from?
42
 
43
  # Create a Chroma vector database from the document chunks with the specified embeddings, and set a directory for persistence
44
- vectordb = Chroma.from_documents(document_chunks, embedding=embeddings, persist_directory='./data') ## change to GUI path
45
 
46
  # Persist the created vector database to disk in the specified directory
47
  vectordb.persist() #this is mandatory?
@@ -49,11 +63,19 @@ def pdf_to_vec(filename):
49
  return(vectordb)
50
  #return collection # Return the collection as the asset
51
 
52
- def load_llm():
53
- #callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
54
- llm = LlamaCpp(
 
 
 
 
 
 
 
 
55
  #streaming = True,
56
- model_path="/content/llama-2-7b-mini-ibased.Q5_K_M.gguf", #/content/data/llama-2-7b-mcq_2-gguf.gguf. # change to GUI path. llama-2-7b-mini-ibased.Q5_K_M.gguf llama-2-7b-mcq_2.Q5_K_M.gguf
57
  #n_gpu_layers=-1,
58
  n_batch=512,
59
  temperature=0.1,
@@ -61,17 +83,23 @@ def load_llm():
61
  #verbose=False,
62
  #callback_manager=callback_manager,
63
  max_tokens=2000,
64
- )
65
- return llm
 
66
 
67
 
68
- #step 5, to instantiate once to create default_chain,router_chain,destination_chains into chain and set vectordb. so will not re-create per prompt
69
- def default_chain():
 
70
 
 
 
 
 
71
  sum_template = """
72
- As a machine learning education specialist, your expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.
73
 
74
- Your role entails:
75
 
76
  Providing Detailed Explanations: Deliver comprehensive answers to these questions, elucidating the underlying technical principles.
77
  Assisting in Exam Preparation: Support educators in formulating sophisticated exam and quiz questions, including MCQs, accompanied by thorough explanations.
@@ -82,9 +110,9 @@ def default_chain():
82
  {input}"""
83
 
84
  mcq_template = """
85
- As a machine learning education specialist, your expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.
86
 
87
- Your role entails:
88
  Crafting Insightful Questions: Develop thought-provoking questions that explore the intricacies of machine learning topics.
89
  Generating MCQs: Create MCQs for each machine learning topic, comprising a question, four choices (A-D), and the correct answer, along with a rationale explaining the answer.
90
 
@@ -140,7 +168,14 @@ def default_chain():
140
 
141
  return default_chain,router_chain,destination_chains
142
 
143
- def llm_infer(default_chain,router_chain,destination_chains,prompt):
 
 
 
 
 
 
 
144
 
145
  chain = MultiPromptChain(
146
  router_chain=router_chain,
@@ -151,4 +186,93 @@ def llm_infer(default_chain,router_chain,destination_chains,prompt):
151
  )
152
  response = chain.run(prompt)
153
 
154
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #list of librarys for requirement.txt
2
+ import os
3
  from langchain.document_loaders import PyPDFLoader
4
 
5
  # Import embeddings module from langchain for vector representations of text
 
27
  from langchain.callbacks.manager import CallbackManager
28
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
29
 
30
+ CHROMADB_LOC = "/home/user/data/chromadb"
31
+
32
+ # Modify vectordb initialization to be dynamic based on user_id
33
+ def get_vectordb_for_user(user_id):
34
+ collection_name = f"user_{user_id}_collection"
35
+ vectordb = Chroma(
36
+ collection_name=collection_name,
37
+ embedding_function=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2'),
38
+ persist_directory=f"{CHROMADB_LOC}/{collection_name}", # Optional: Separate directory for each user's data
39
+ )
40
+ return vectordb
41
+
42
+
43
+ def pdf_to_vec(filename, collection_name):
44
  document = []
45
  loader = PyPDFLoader(filename)
46
  document.extend(loader.load()) #which library is this from?
 
55
  document_chunks = document_splitter.split_documents(document) #which library is this from?
56
 
57
  # Create a Chroma vector database from the document chunks with the specified embeddings, and set a directory for persistence
58
+ vectordb = Chroma.from_documents(document_chunks, embedding=embeddings, collection_name=collection_name, persist_directory=CHROMADB_LOC) ## change to GUI path
59
 
60
  # Persist the created vector database to disk in the specified directory
61
  vectordb.persist() #this is mandatory?
 
63
  return(vectordb)
64
  #return collection # Return the collection as the asset
65
 
66
+ class LlamaModelSingleton:
67
+ _instance = None
68
+
69
+ def __new__(cls):
70
+ if cls._instance is None:
71
+ print('Loading LLM model...')
72
+ cls._instance = super(LlamaModelSingleton, cls).__new__(cls)
73
+
74
+ # Model loading logic
75
+ model_path = os.getenv("MODEL_PATH")
76
+ cls._instance.llm = LlamaCpp(
77
  #streaming = True,
78
+ model_path=model_path,
79
  #n_gpu_layers=-1,
80
  n_batch=512,
81
  temperature=0.1,
 
83
  #verbose=False,
84
  #callback_manager=callback_manager,
85
  max_tokens=2000,
86
+ )
87
+ print(f'Model loaded from {model_path}')
88
+ return cls._instance.llm
89
 
90
 
91
+ def load_llm():
92
+ return LlamaModelSingleton()
93
+
94
 
95
+
96
+ #step 5, to instantiate once to create default_chain,router_chain,destination_chains into chain and set vectordb. so will not re-create per prompt
97
+ def default_chain(llm, user_id):
98
+ vectordb = get_vectordb_for_user(user_id) # Use the dynamic vectordb based on user_id
99
  sum_template = """
100
+ As a machine learning education specialist, our expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.
101
 
102
+ our role entails:
103
 
104
  Providing Detailed Explanations: Deliver comprehensive answers to these questions, elucidating the underlying technical principles.
105
  Assisting in Exam Preparation: Support educators in formulating sophisticated exam and quiz questions, including MCQs, accompanied by thorough explanations.
 
110
  {input}"""
111
 
112
  mcq_template = """
113
+ As a machine learning education specialist, our expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.
114
 
115
+ our role entails:
116
  Crafting Insightful Questions: Develop thought-provoking questions that explore the intricacies of machine learning topics.
117
  Generating MCQs: Create MCQs for each machine learning topic, comprising a question, four choices (A-D), and the correct answer, along with a rationale explaining the answer.
118
 
 
168
 
169
  return default_chain,router_chain,destination_chains
170
 
171
+ # Adjust llm_infer to accept user_id and use it for user-specific processing
172
+ def llm_infer(user_id, prompt):
173
+
174
+ llm = load_llm() # load_llm is singleton for entire system
175
+
176
+ vectordb = get_vectordb_for_user(user_id) # Vector collection for each us.
177
+
178
+ default_chain, router_chain, destination_chains = get_or_create_chain(user_id, llm) # Now user-specific
179
 
180
  chain = MultiPromptChain(
181
  router_chain=router_chain,
 
186
  )
187
  response = chain.run(prompt)
188
 
189
+ return response
190
+
191
+ # Assuming a simplified caching mechanism for demonstration
192
+ chain_cache = {}
193
+
194
+ def get_or_create_chain(user_id, llm):
195
+ if 'default_chain' in chain_cache and 'router_chain' in chain_cache:
196
+ default_chain = chain_cache['default_chain']
197
+ router_chain = chain_cache['router_chain']
198
+ destination_chains = chain_cache['destination_chains']
199
+ else:
200
+ vectordb = get_vectordb_for_user(user_id) # User-specific vector database
201
+ sum_template = """
202
+ As a machine learning education specialist, our expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.
203
+
204
+ our role entails:
205
+
206
+ Providing Detailed Explanations: Deliver comprehensive answers to these questions, elucidating the underlying technical principles.
207
+ Assisting in Exam Preparation: Support educators in formulating sophisticated exam and quiz questions, including MCQs, accompanied by thorough explanations.
208
+ Summarizing Course Material: Distill key information from course materials, articulating complex ideas within the context of advanced machine learning practices.
209
+
210
+ Objective: to summarize and explain the key points.
211
+ Here the question:
212
+ {input}"""
213
+
214
+ mcq_template = """
215
+ As a machine learning education specialist, our expertise is pivotal in deepening the comprehension of complex machine learning concepts for both educators and students.
216
+
217
+ our role entails:
218
+ Crafting Insightful Questions: Develop thought-provoking questions that explore the intricacies of machine learning topics.
219
+ Generating MCQs: Create MCQs for each machine learning topic, comprising a question, four choices (A-D), and the correct answer, along with a rationale explaining the answer.
220
+
221
+ Objective: to create multiple choice question in this format
222
+ [question:
223
+ options A:
224
+ options B:
225
+ options C:
226
+ options D:
227
+ correct_answer:
228
+ explanation:]
229
+
230
+ Here the question:
231
+ {input}"""
232
+
233
+ prompt_infos = [
234
+ {
235
+ "name": "SUMMARIZE",
236
+ "description": "Good for summarizing and explaination ",
237
+ "prompt_template": sum_template,
238
+ },
239
+ {
240
+ "name": "MCQ",
241
+ "description": "Good for creating multiple choices questions",
242
+ "prompt_template": mcq_template,
243
+ },
244
+ ]
245
+
246
+ destination_chains = {}
247
+
248
+ for p_info in prompt_infos:
249
+ name = p_info["name"]
250
+ prompt_template = p_info["prompt_template"]
251
+ prompt = PromptTemplate(template=prompt_template, input_variables=["input"])
252
+ chain = LLMChain(llm=llm, prompt=prompt)
253
+ destination_chains[name] = chain
254
+ #default_chain = ConversationChain(llm=llm, output_key="text")
255
+ #memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
256
+
257
+ default_chain = ConversationalRetrievalChain.from_llm(llm=llm,
258
+ retriever=vectordb.as_retriever(search_kwargs={'k': 3}),
259
+ verbose=True, output_key="text" )
260
+
261
+ destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
262
+ destinations_str = "\n".join(destinations)
263
+ router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(destinations=destinations_str)
264
+ router_prompt = PromptTemplate(
265
+ template=router_template,
266
+ input_variables=["input"],
267
+ output_parser=RouterOutputParser(),
268
+ )
269
+ router_chain = LLMRouterChain.from_llm(llm, router_prompt)
270
+ #
271
+ chain_cache['default_chain'] = default_chain
272
+ chain_cache['router_chain'] = router_chain
273
+ chain_cache['destination_chains'] = destination_chains
274
+
275
+ # Here we can adapt the chains if needed based on the user_id, for example, by adjusting the vectordb retriever
276
+ # This is where user-specific adaptations occur
277
+
278
+ return default_chain, router_chain, destination_chains
app/utils/db.py CHANGED
@@ -20,7 +20,7 @@ class TinyDBHelper:
20
  def query_token(self, user_id: str, token: str) -> bool:
21
  """Query to check if the token exists and is valid."""
22
  User = Query()
23
- # Assuming your tokens table contains 'user_id', 'token', and 'expires_at'
24
  result = self.tokens_table.search((User.user_id == user_id) & (User.token == token))
25
  # Optionally, check if the token is expired
26
  expires_at = datetime.fromisoformat(result[0]['expires_at'])
 
20
  def query_token(self, user_id: str, token: str) -> bool:
21
  """Query to check if the token exists and is valid."""
22
  User = Query()
23
+ # Assuming our tokens table contains 'user_id', 'token', and 'expires_at'
24
  result = self.tokens_table.search((User.user_id == user_id) & (User.token == token))
25
  # Optionally, check if the token is expired
26
  expires_at = datetime.fromisoformat(result[0]['expires_at'])
app/utils/doc_ingest.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/doc_ingest.py
2
+ from .chat_rag import pdf_to_vec
3
+
4
+ def ingest_document(file_location: str, user_id: str):
5
+ """
6
+ Process and ingest a document into a user-specific vector database.
7
+
8
+ :param file_location: The location of the uploaded file on the server.
9
+ :param user_id: The ID of the user uploading the document.
10
+ """
11
+ # Construct a unique collection name based on user_id
12
+ collection_name = f"user_{user_id}_collection"
13
+
14
+ try:
15
+ vectordb = pdf_to_vec(file_location, collection_name)
16
+ print("Document processed and ingested successfully into user-specific collection.")
17
+ except Exception as e:
18
+ print(f"Error processing document for user {user_id}: {e}")
19
+ raise
docs/design_principles.md ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DESIGN PRINCIPLES
2
+
3
+ ## VectorDB collections
4
+
5
+ Given that our EduConnect project is an academic project with a scale limited to not more than 10 users, the approach of creating separate vector stores (ChromaDB collections) for each user becomes highly feasible and manageable. This small scale alleviates concerns about scalability and management overhead that would be significant in a larger, production-level system. Here's how we can effectively implement and manage user-specific vector stores under these conditions:
6
+
7
+ ### Implementation Strategy for Small Scale
8
+
9
+ 1. **Simplified Database Management**: With a maximum of 10 users, managing separate ChromaDB collections becomes straightforward. we can manually monitor and maintain these collections without the need for automated scalability solutions.
10
+
11
+ 2. **Personalized Data Handling**: This setup allows for a high degree of personalization in data handling and retrieval. Each user's interactions and uploads can be contained within their dedicated collection, ensuring data isolation and relevance.
12
+
13
+ 3. **Performance Considerations**: Performance issues related to managing multiple collections are negligible at this scale. Each user's collection will be relatively small, ensuring quick access and query times.
14
+
15
+ 4. **Security and Privacy**: Maintaining separate collections for each user naturally enhances data privacy, as there is a clear separation of data at the database level.
16
+
17
+ ### Example Adjustments
18
+
19
+ Given the small scale of our project, we might not need to implement complex dynamic collection management. Instead, we can hard-code the logic to create or select a collection based on the user ID. Here is a simplified example adjustment to our document ingestion logic:
20
+
21
+ ```python
22
+ # utils/doc_ingest.py
23
+ def ingest_document(file_location: str, user_id: str):
24
+ """
25
+ Process and ingest a document into a user-specific vector database.
26
+
27
+ :param file_location: The location of the uploaded file on the server.
28
+ :param user_id: The ID of the user uploading the document.
29
+ """
30
+ # Construct a unique collection name based on user_id
31
+ collection_name = f"user_{user_id}_collection"
32
+
33
+ try:
34
+ vectordb = pdf_to_vec(file_location, collection_name)
35
+ print("Document processed and ingested successfully into user-specific collection.")
36
+ except Exception as e:
37
+ print(f"Error processing document for user {user_id}: {e}")
38
+ raise
39
+ ```
40
+
41
+ For `pdf_to_vec`, ensure it uses the `collection_name` to store the embeddings in the correct user-specific collection:
42
+
43
+ ```python
44
+ def pdf_to_vec(filename, collection_name):
45
+ # Logic to process the PDF and store its embeddings in vectordb
46
+ # Use collection_name for ChromaDB collection
47
+ # This function will now be more aware of user-specific storage requirements
48
+ ```
49
+
50
+ ### Final Notes
51
+
52
+ Given the academic nature and small scale of our project, focusing on implementing clean, maintainable code that clearly demonstrates the functionality and benefits of user-specific data handling is more valuable than worrying about scalability. This approach also serves as a good model for how similar systems could be architected to scale with more users, by introducing more automated and dynamic management of resources and collections.
53
+
54
+ ## DEFAULT CHAIN
55
+
56
+ Configuring `default_chain` for each chat interaction, especially when it involves setting up multiple components like template parsing, vector database retrieval, and language model routing for every single request, could indeed introduce overhead and potentially impact performance. This overhead is particularly concerning if the configuration process is resource-intensive, involving complex computations or significant memory allocation.
57
+
58
+ ### Strategies to Optimize Performance
59
+
60
+ 1. **Caching Common Components**: Components that don't change frequently, such as prompt templates and certain chain configurations, can be cached. This way, we avoid re-initializing these components for every chat interaction. we can initialize these components once and reuse them across chat sessions.
61
+
62
+ 2. **Lazy Initialization**: Only initialize certain parts of the chain when they are actually needed. If certain prompts or chains are used more frequently than others, we could prioritize their initialization and delay others until they're required.
63
+
64
+ 3. **Preconfigured Chain Templates**: If the customization per user is limited to a few parameters (such as the vector database they're interacting with), consider creating a preconfigured template for the chains that can be quickly cloned or adapted per user session with minimal overhead.
65
+
66
+ 4. **Efficient Retrieval Mechanism**: For the vector database retriever used in `ConversationalRetrievalChain`, ensure that the mechanism to switch between user-specific databases is optimized. This might mean having a lightweight way of switching context without needing to reload or reinitialize the entire database connection or retrieval logic.
67
+
68
+ ### Implementation Example
69
+
70
+ Here's an example of how we might implement a caching mechanism for `default_chain` components that are common across users:
71
+
72
+ ```python
73
+ # Assuming a simplified caching mechanism for demonstration
74
+ chain_cache = {}
75
+
76
+ def get_or_create_chain(user_id, llm):
77
+ if 'default_chain' in chain_cache and 'router_chain' in chain_cache:
78
+ default_chain = chain_cache['default_chain']
79
+ router_chain = chain_cache['router_chain']
80
+ destination_chains = chain_cache['destination_chains']
81
+ else:
82
+ vectordb = get_vectordb_for_user(user_id) # User-specific vector database
83
+ # Configuration for default_chain, router_chain, and destination_chains as before
84
+ # [...]
85
+ chain_cache['default_chain'] = default_chain
86
+ chain_cache['router_chain'] = router_chain
87
+ chain_cache['destination_chains'] = destination_chains
88
+
89
+ # Here we can adapt the chains if needed based on the user_id, for example, by adjusting the vectordb retriever
90
+ # This is where user-specific adaptations occur
91
+
92
+ return default_chain, router_chain, destination_chains
93
+ ```
94
+
95
+ ### Key Points
96
+
97
+ - **Reuse and Cache**: Reuse components wherever possible, caching configurations that are static or common across interactions.
98
+ - **Minimize Dynamic Configuration**: Minimize the amount of dynamic configuration needed per interaction by using templates and parameters that can be easily switched out.
99
+ - **Optimize Data Layer**: Ensure the data layer (e.g., user-specific vector databases) is optimized for quick switching or context updates to prevent it from becoming a bottleneck.
100
+
101
+ Adopting these strategies will help maintain responsiveness and efficiency in our chat application, ensuring that overhead from setting up `default_chain` for each interaction is minimized.
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  fastapi==0.95.2 # Core framework for building APIs.
2
  uvicorn[standard]==0.18.3 # ASGI server for FastAPI, supports live reloading.
3
- requests==2.28.* # For making HTTP requests, if needed by your app.
4
  torch==1.11.* # PyTorch, for handling deep learning models.
5
  sentencepiece==0.1.* # For chat text processing
6
  mtcnn==0.1.1 # For face detection in images.
 
1
  fastapi==0.95.2 # Core framework for building APIs.
2
  uvicorn[standard]==0.18.3 # ASGI server for FastAPI, supports live reloading.
3
+ requests==2.28.* # For making HTTP requests, if needed by our app.
4
  torch==1.11.* # PyTorch, for handling deep learning models.
5
  sentencepiece==0.1.* # For chat text processing
6
  mtcnn==0.1.1 # For face detection in images.