dtyago commited on
Commit
a30f2aa
β€’
1 Parent(s): 045f16c

Plant UML docs

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
README.md CHANGED
@@ -46,6 +46,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
46
  β”œβ”€β”€ .env # Environment variables for configuration settings**
47
  β”œβ”€β”€ ipynb/ # Jupyter notebooks used for fine-tuning / transfer learning model
48
  β”‚ β”œβ”€β”€ facenet/ # Jupyter Notebooks for Facenet / MTCNN test
49
- β”‚ β”œβ”€β”€ llm/ # Jupyter Notebooks for LLM training / validation
50
  └── docs/ # Project Documents (ONLY MARKDOWNs and UML diagrams)
51
  ```
 
46
  β”œβ”€β”€ .env # Environment variables for configuration settings**
47
  β”œβ”€β”€ ipynb/ # Jupyter notebooks used for fine-tuning / transfer learning model
48
  β”‚ β”œβ”€β”€ facenet/ # Jupyter Notebooks for Facenet / MTCNN test
49
+ β”‚ └── llm/ # Jupyter Notebooks for LLM training / validation
50
  └── docs/ # Project Documents (ONLY MARKDOWNs and UML diagrams)
51
  ```
docs/EduConnect_class.puml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml EduConnect
2
+ class FastAPIApp {
3
+ }
4
+
5
+ class User {
6
+ }
7
+
8
+ class Authentication {
9
+ }
10
+
11
+ class JWTToken {
12
+ }
13
+
14
+ class TinyDB {
15
+ }
16
+
17
+ class FileHandler {
18
+ }
19
+
20
+ class ChatHandler {
21
+ }
22
+
23
+ class DocumentIngestor {
24
+ }
25
+
26
+ class FaceVerifier {
27
+ }
28
+
29
+ FastAPIApp --> User : manages
30
+ FastAPIApp --> Authentication : uses
31
+ Authentication --> JWTToken : generates/validates
32
+ User --> TinyDB : stores/retrieves data
33
+ FastAPIApp --> FileHandler : uploads/downloads files
34
+ FastAPIApp --> ChatHandler : sends/receives messages
35
+ FastAPIApp --> DocumentIngestor : processes documents
36
+ FastAPIApp --> FaceVerifier : verifies user face
37
+
38
+ @enduml
docs/EduConnect_seq.puml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml EduConnect
2
+ actor User
3
+ participant "FastAPIApp" as app
4
+ participant "Authentication" as auth
5
+ database "TinyDB" as db
6
+ participant "FileHandler" as fileHandler
7
+ participant "ChatHandler" as chat
8
+ participant "DocumentIngestor" as docIngestor
9
+
10
+ == User Login ==
11
+ User -> app : login()
12
+ app -> auth : authenticate()
13
+ auth -> db : verifyUser()
14
+ db --> auth : userValidated
15
+ auth --> app : token
16
+ app --> User : token
17
+
18
+ == Chat Message ==
19
+ User -> app : sendMessage()
20
+ app -> chat : processMessage()
21
+ chat --> User : response
22
+
23
+ == Upload Document ==
24
+ User -> app : uploadDocument()
25
+ app -> fileHandler : saveFile()
26
+ fileHandler -> docIngestor : processDocument()
27
+ docIngestor --> fileHandler : docProcessed
28
+ fileHandler --> User : uploadSuccess
29
+
30
+ @enduml
docs/api/userchat_class.puml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml userchat
2
+ class APIRouter {
3
+ }
4
+
5
+ class LLMInfer {
6
+ +llm_infer(user_collection_name: str, prompt: str): str
7
+ }
8
+
9
+ class SanitizeCollectionName {
10
+ +sanitize_collection_name(name: str): str
11
+ }
12
+
13
+ class Dependencies {
14
+ +get_current_user(): dict
15
+ }
16
+
17
+ APIRouter ..> LLMInfer : uses
18
+ APIRouter ..> SanitizeCollectionName : uses
19
+ APIRouter ..> Dependencies : uses
20
+ @enduml
docs/api/userchat_seq.puml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml userchat
2
+ actor User
3
+ participant "APIRouter" as router
4
+ participant "Dependencies" as dependencies
5
+ participant "SanitizeCollectionName" as sanitizer
6
+ participant "LLMInfer" as llm
7
+
8
+ User -> router : chat_with_llama(user_input)
9
+ activate router
10
+
11
+ router -> dependencies : get_current_user()
12
+ activate dependencies
13
+ dependencies --> router : current_user
14
+ deactivate dependencies
15
+
16
+ router -> sanitizer : sanitize_collection_name(current_user["user_id"])
17
+ activate sanitizer
18
+ sanitizer --> router : sanitized_name
19
+ deactivate sanitizer
20
+
21
+ router -> llm : llm_infer(sanitized_name, user_input)
22
+ activate llm
23
+ llm --> router : model_response
24
+ deactivate llm
25
+
26
+ router --> User : {"ai_response": model_response, "user_id", "name", "role"}
27
+ deactivate router
28
+ @enduml
docs/api/userlogin_class.puml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml userlogin
2
+ class UserLogin {
3
+ +user_login(file: UploadFile): dict
4
+ }
5
+
6
+ class FaceVerification {
7
+ +verify_user_face(file_path: str): Optional[dict]
8
+ }
9
+
10
+ class JWTToken {
11
+ +create_access_token(data: dict, expires_delta: Optional[timedelta]): string
12
+ }
13
+
14
+ class TinyDBHelper {
15
+ +insert_token(user_id: string, token: string, expires_at: string): void
16
+ }
17
+
18
+ UserLogin ..> FaceVerification : uses
19
+ FaceVerification ..> JWTToken : uses
20
+ JWTToken ..> TinyDBHelper : uses
21
+ @enduml
docs/api/userlogin_seq.puml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml userlogin
2
+ actor Client
3
+ participant "UserLogin" as login
4
+ participant "FaceVerification" as verification
5
+ participant "JWTToken" as jwt
6
+ database "TinyDB" as db
7
+
8
+ Client -> login : user_login(file)
9
+ activate login
10
+
11
+ login -> verification : verify_user_face(file_path)
12
+ activate verification
13
+ verification --> login : verification_result
14
+ deactivate verification
15
+
16
+ login -> jwt : create_access_token(user_id, metadata)
17
+ activate jwt
18
+ jwt --> login : access_token
19
+ deactivate jwt
20
+
21
+ login -> db : insert_token(user_id, access_token, expires_at)
22
+ activate db
23
+ db --> login
24
+ deactivate db
25
+
26
+ login --> Client : {access_token, user_id, name, role}
27
+ deactivate login
28
+ @enduml
docs/api/userlogout_class.puml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml userlogout
2
+ class APIRouter {
3
+ }
4
+
5
+ class TinyDBHelper {
6
+ +query_token(user_id: string, token: string): bool
7
+ +remove_token_by_value(token: string): void
8
+ }
9
+
10
+ class OAuth2Scheme {
11
+ }
12
+
13
+ APIRouter ..> TinyDBHelper : uses
14
+ APIRouter ..> OAuth2Scheme : depends on
15
+ @enduml
docs/api/userlogout_seq.puml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml userlogout
2
+ actor Client
3
+ participant "APIRouter" as router
4
+ participant "OAuth2Scheme" as oauth2
5
+ participant "TinyDBHelper" as db
6
+
7
+ Client -> router : user_logout(token)
8
+ activate router
9
+
10
+ router -> oauth2 : get token from request
11
+ activate oauth2
12
+ oauth2 --> router : token
13
+ deactivate oauth2
14
+
15
+ router -> db : query_token(user_id, token)
16
+ activate db
17
+ alt if token exists
18
+ db --> router : true
19
+ router -> db : remove_token_by_value(token)
20
+ db -> router : token removed
21
+ else
22
+ db --> router : false
23
+ router --> Client : {"detail": "Token not found."}
24
+ end
25
+ deactivate db
26
+
27
+ router --> Client : {"message": "User logged out successfully"}
28
+ deactivate router
29
+ @enduml
docs/api/userupload_class.puml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml userupload
2
+ class APIRouter {
3
+ }
4
+
5
+ class UploadFile {
6
+ }
7
+
8
+ class FileDependencies {
9
+ }
10
+
11
+ class DocumentIngestor {
12
+ +ingest_document(file_location: str, collection_name: str): void
13
+ }
14
+
15
+ class Sanitizer {
16
+ +sanitize_collection_name(name: str): str
17
+ }
18
+
19
+ APIRouter ..> UploadFile : receives
20
+ APIRouter ..> FileDependencies : uses
21
+ UploadFile ..> DocumentIngestor : processed by
22
+ APIRouter ..> Sanitizer : uses
23
+ @enduml
docs/api/userupload_seq.puml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml userupload
2
+ actor Client
3
+ participant "APIRouter" as router
4
+ participant "UploadFile" as file
5
+ participant "Sanitizer" as sanitizer
6
+ participant "DocumentIngestor" as ingestor
7
+
8
+ Client -> router : upload_file(file)
9
+ activate router
10
+
11
+ router -> file : check_content_type()
12
+ activate file
13
+ file --> router : valid/invalid
14
+ deactivate file
15
+
16
+ alt if file is valid
17
+ router -> sanitizer : sanitize_collection_name(current_user["user_id"])
18
+ activate sanitizer
19
+ sanitizer --> router : sanitized_name
20
+ deactivate sanitizer
21
+
22
+ router -> ingestor : ingest_document(file_location, sanitized_name)
23
+ activate ingestor
24
+ ingestor --> router : success/failure
25
+ deactivate ingestor
26
+
27
+ router --> Client : {"status": "File uploaded and processed successfully."}
28
+ else if file is invalid
29
+ router --> Client : {"status": "Unsupported file type."}
30
+ end
31
+
32
+ deactivate router
33
+ @enduml
docs/dependencies_class.puml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml dependencies
2
+ class OAuth2PasswordBearer {
3
+ }
4
+
5
+ class JWTError {
6
+ }
7
+
8
+ class HTTPException {
9
+ }
10
+
11
+ class TinyDBHelper {
12
+ +query_token(user_id: string, token: string): bool
13
+ }
14
+
15
+ class Dependencies {
16
+ +get_current_user(token: str): dict
17
+ }
18
+
19
+ OAuth2PasswordBearer ..> Dependencies : token
20
+ Dependencies ..> JWTError : catch errors
21
+ Dependencies ..> HTTPException : throw on failure
22
+ Dependencies ..> TinyDBHelper : validate token
23
+ @enduml
docs/dependencies_seq.puml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml dependencies
2
+ actor Client
3
+ participant "OAuth2PasswordBearer" as oauth2
4
+ participant "Dependencies" as dependencies
5
+ participant "JWTError" as jwtError
6
+ participant "HTTPException" as httpException
7
+ database "TinyDBHelper" as db
8
+
9
+ Client -> dependencies : get_current_user(token)
10
+ activate dependencies
11
+
12
+ dependencies -> oauth2 : token
13
+ oauth2 --> dependencies : token
14
+ dependencies -> dependencies : decode_jwt(token)
15
+ activate dependencies
16
+ alt success
17
+ dependencies --> dependencies : payload
18
+ else JWTError
19
+ dependencies -> jwtError : raise credentials_exception
20
+ jwtError --> dependencies : HTTP_401_UNAUTHORIZED
21
+ end
22
+ deactivate dependencies
23
+
24
+ dependencies -> db : query_token(user_id, token)
25
+ activate db
26
+ alt token valid
27
+ db --> dependencies : true
28
+ else token invalid
29
+ db --> dependencies : false
30
+ dependencies -> httpException : raise expiry_exception
31
+ httpException --> dependencies : HTTP_401_UNAUTHORIZED
32
+ end
33
+ deactivate db
34
+
35
+ dependencies --> Client : user_details
36
+ deactivate dependencies
37
+ @enduml
docs/design_principles.md DELETED
@@ -1,101 +0,0 @@
1
- # DESIGN PRINCIPLES
2
-
3
- ## VectorDB collections
4
-
5
- Given that our EduConnect project is an academic project with a scale limited to not more than 10 users, the approach of creating separate vector stores (ChromaDB collections) for each user becomes highly feasible and manageable. This small scale alleviates concerns about scalability and management overhead that would be significant in a larger, production-level system. Here's how we can effectively implement and manage user-specific vector stores under these conditions:
6
-
7
- ### Implementation Strategy for Small Scale
8
-
9
- 1. **Simplified Database Management**: With a maximum of 10 users, managing separate ChromaDB collections becomes straightforward. we can manually monitor and maintain these collections without the need for automated scalability solutions.
10
-
11
- 2. **Personalized Data Handling**: This setup allows for a high degree of personalization in data handling and retrieval. Each user's interactions and uploads can be contained within their dedicated collection, ensuring data isolation and relevance.
12
-
13
- 3. **Performance Considerations**: Performance issues related to managing multiple collections are negligible at this scale. Each user's collection will be relatively small, ensuring quick access and query times.
14
-
15
- 4. **Security and Privacy**: Maintaining separate collections for each user naturally enhances data privacy, as there is a clear separation of data at the database level.
16
-
17
- ### Example Adjustments
18
-
19
- Given the small scale of our project, we might not need to implement complex dynamic collection management. Instead, we can hard-code the logic to create or select a collection based on the user ID. Here is a simplified example adjustment to our document ingestion logic:
20
-
21
- ```python
22
- # utils/doc_ingest.py
23
- def ingest_document(file_location: str, user_id: str):
24
- """
25
- Process and ingest a document into a user-specific vector database.
26
-
27
- :param file_location: The location of the uploaded file on the server.
28
- :param user_id: The ID of the user uploading the document.
29
- """
30
- # Construct a unique collection name based on user_id
31
- collection_name = f"user_{user_id}_collection"
32
-
33
- try:
34
- vectordb = pdf_to_vec(file_location, collection_name)
35
- print("Document processed and ingested successfully into user-specific collection.")
36
- except Exception as e:
37
- print(f"Error processing document for user {user_id}: {e}")
38
- raise
39
- ```
40
-
41
- For `pdf_to_vec`, ensure it uses the `collection_name` to store the embeddings in the correct user-specific collection:
42
-
43
- ```python
44
- def pdf_to_vec(filename, collection_name):
45
- # Logic to process the PDF and store its embeddings in vectordb
46
- # Use collection_name for ChromaDB collection
47
- # This function will now be more aware of user-specific storage requirements
48
- ```
49
-
50
- ### Final Notes
51
-
52
- Given the academic nature and small scale of our project, focusing on implementing clean, maintainable code that clearly demonstrates the functionality and benefits of user-specific data handling is more valuable than worrying about scalability. This approach also serves as a good model for how similar systems could be architected to scale with more users, by introducing more automated and dynamic management of resources and collections.
53
-
54
- ## DEFAULT CHAIN
55
-
56
- Configuring `default_chain` for each chat interaction, especially when it involves setting up multiple components like template parsing, vector database retrieval, and language model routing for every single request, could indeed introduce overhead and potentially impact performance. This overhead is particularly concerning if the configuration process is resource-intensive, involving complex computations or significant memory allocation.
57
-
58
- ### Strategies to Optimize Performance
59
-
60
- 1. **Caching Common Components**: Components that don't change frequently, such as prompt templates and certain chain configurations, can be cached. This way, we avoid re-initializing these components for every chat interaction. we can initialize these components once and reuse them across chat sessions.
61
-
62
- 2. **Lazy Initialization**: Only initialize certain parts of the chain when they are actually needed. If certain prompts or chains are used more frequently than others, we could prioritize their initialization and delay others until they're required.
63
-
64
- 3. **Preconfigured Chain Templates**: If the customization per user is limited to a few parameters (such as the vector database they're interacting with), consider creating a preconfigured template for the chains that can be quickly cloned or adapted per user session with minimal overhead.
65
-
66
- 4. **Efficient Retrieval Mechanism**: For the vector database retriever used in `ConversationalRetrievalChain`, ensure that the mechanism to switch between user-specific databases is optimized. This might mean having a lightweight way of switching context without needing to reload or reinitialize the entire database connection or retrieval logic.
67
-
68
- ### Implementation Example
69
-
70
- Here's an example of how we might implement a caching mechanism for `default_chain` components that are common across users:
71
-
72
- ```python
73
- # Assuming a simplified caching mechanism for demonstration
74
- chain_cache = {}
75
-
76
- def get_or_create_chain(user_id, llm):
77
- if 'default_chain' in chain_cache and 'router_chain' in chain_cache:
78
- default_chain = chain_cache['default_chain']
79
- router_chain = chain_cache['router_chain']
80
- destination_chains = chain_cache['destination_chains']
81
- else:
82
- vectordb = get_vectordb_for_user(user_id) # User-specific vector database
83
- # Configuration for default_chain, router_chain, and destination_chains as before
84
- # [...]
85
- chain_cache['default_chain'] = default_chain
86
- chain_cache['router_chain'] = router_chain
87
- chain_cache['destination_chains'] = destination_chains
88
-
89
- # Here we can adapt the chains if needed based on the user_id, for example, by adjusting the vectordb retriever
90
- # This is where user-specific adaptations occur
91
-
92
- return default_chain, router_chain, destination_chains
93
- ```
94
-
95
- ### Key Points
96
-
97
- - **Reuse and Cache**: Reuse components wherever possible, caching configurations that are static or common across interactions.
98
- - **Minimize Dynamic Configuration**: Minimize the amount of dynamic configuration needed per interaction by using templates and parameters that can be easily switched out.
99
- - **Optimize Data Layer**: Ensure the data layer (e.g., user-specific vector databases) is optimized for quick switching or context updates to prevent it from becoming a bottleneck.
100
-
101
- Adopting these strategies will help maintain responsiveness and efficiency in our chat application, ensuring that overhead from setting up `default_chain` for each interaction is minimized.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
docs/main_class.puml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml main
2
+ class FastAPI {
3
+ }
4
+
5
+ class CORSMiddleware {
6
+ }
7
+
8
+ class StaticFiles {
9
+ }
10
+
11
+ class Jinja2Templates {
12
+ }
13
+
14
+ class AdminFunctions {
15
+ }
16
+
17
+ class UserFaceEmbeddingFunction {
18
+ }
19
+
20
+ class ChromaDBFaceHelper {
21
+ }
22
+
23
+ class UserLoginRouter {
24
+ }
25
+
26
+ class UserLogoutRouter {
27
+ }
28
+
29
+ class UserChatRouter {
30
+ }
31
+
32
+ class UserUploadRouter {
33
+ }
34
+
35
+ FastAPI --> CORSMiddleware : uses
36
+ FastAPI --> StaticFiles : serves
37
+ FastAPI --> Jinja2Templates : renders
38
+ FastAPI --> AdminFunctions : utilizes
39
+ FastAPI --> UserFaceEmbeddingFunction : utilizes
40
+ FastAPI --> ChromaDBFaceHelper : utilizes
41
+ FastAPI --> UserLoginRouter : includes
42
+ FastAPI --> UserLogoutRouter : includes
43
+ FastAPI --> UserChatRouter : includes
44
+ FastAPI --> UserUploadRouter : includes
45
+ @enduml
docs/main_seq.puml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml main
2
+ actor Admin
3
+ participant "FastAPI" as app
4
+ participant "AdminFunctions" as admin
5
+ participant "Jinja2Templates" as templates
6
+
7
+ Admin -> app : POST /admin/login
8
+ activate app
9
+
10
+ app -> admin : verify_admin_password(username, password)
11
+ activate admin
12
+ admin --> app : verification_result
13
+ deactivate admin
14
+
15
+ alt verification success
16
+ app -> templates : TemplateResponse("admin_register_user.html")
17
+ templates --> Admin
18
+ else verification failure
19
+ app -> templates : TemplateResponse("admin_login.html", {"error": "Invalid password"})
20
+ templates --> Admin
21
+ end
22
+
23
+ deactivate app
24
+ @enduml
docs/utils/chat_rag_class.puml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml chat_rag
2
+ class PyPDFLoader {
3
+ }
4
+
5
+ class HuggingFaceEmbeddings {
6
+ }
7
+
8
+ class CharacterTextSplitter {
9
+ }
10
+
11
+ class Chroma {
12
+ }
13
+
14
+ class LlamaCpp {
15
+ }
16
+
17
+ class LlamaModelSingleton {
18
+ {static} get_instance()
19
+ _load_llm()
20
+ }
21
+
22
+ PyPDFLoader --> HuggingFaceEmbeddings : uses
23
+ HuggingFaceEmbeddings --> CharacterTextSplitter : uses
24
+ CharacterTextSplitter --> Chroma : creates vectors
25
+ LlamaCpp ..> LlamaModelSingleton : instance
26
+ @enduml
docs/utils/chat_rag_seq.puml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml chat_rag
2
+ actor User
3
+ participant "PyPDFLoader" as loader
4
+ participant "HuggingFaceEmbeddings" as embeddings
5
+ participant "CharacterTextSplitter" as splitter
6
+ participant "Chroma" as vectordb
7
+
8
+ User -> loader : load(filename)
9
+ activate loader
10
+ loader --> User : document
11
+ deactivate loader
12
+
13
+ User -> embeddings : init(model_name)
14
+ activate embeddings
15
+ embeddings --> User : embeddings
16
+ deactivate embeddings
17
+
18
+ User -> splitter : split_documents(document)
19
+ activate splitter
20
+ splitter --> User : document_chunks
21
+ deactivate splitter
22
+
23
+ User -> vectordb : from_documents(document_chunks, embeddings)
24
+ activate vectordb
25
+ vectordb --> User : vectordb
26
+ deactivate vectordb
27
+ @enduml
docs/utils/db_class.puml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml db
2
+ class TinyDBHelper {
3
+ -db : TinyDB
4
+ -tokens_table
5
+ +insert_token(user_id : string, token : string, expires_at : string) : void
6
+ +query_token(user_id : string, token : string) : bool
7
+ +remove_token_by_value(token : string) : void
8
+ }
9
+
10
+ class UserFaceEmbeddingFunction {
11
+ -facenet : FaceNet
12
+ +__call__(input : Images) : Embeddings
13
+ }
14
+
15
+ class ChromaDBFaceHelper {
16
+ -client
17
+ -user_faces_db
18
+ +query_user_face(presented_face : Any, n_results : int) : dict
19
+ +print_query_results(query_results : dict) : void
20
+ }
21
+
22
+ @enduml
docs/utils/db_seq.puml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml db
2
+ actor User
3
+ participant "ChromaDBFaceHelper" as helper
4
+ participant "UserFaceEmbeddingFunction" as embedder
5
+ database "Chroma DB" as db
6
+
7
+ User -> helper: query_user_face(presented_face, n_results)
8
+ activate helper
9
+
10
+ helper -> embedder: __call__(presented_face)
11
+ activate embedder
12
+ embedder -> db: query(embeddings)
13
+ activate db
14
+ db --> embedder: query_results
15
+ deactivate db
16
+
17
+ embedder --> helper: embeddings
18
+ deactivate embedder
19
+
20
+ helper --> User: query_results
21
+ deactivate helper
22
+ @enduml
docs/utils/doc_ingest_class.puml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml doc_ingest
2
+ class DocumentIngestor {
3
+ +ingest_document(file_location: str, collection_name: str): void
4
+ }
5
+
6
+ class VectorDB {
7
+ +pdf_to_vec(file_location: str, collection_name: str): VectorDB
8
+ }
9
+ DocumentIngestor --> VectorDB : uses
10
+ @enduml
docs/utils/doc_ingest_seq.puml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml doc_ingest
2
+ actor User
3
+ participant "DocumentIngestor" as ingestor
4
+ participant "VectorDB" as vectordb
5
+
6
+ User -> ingestor : ingest_document(file_location, collection_name)
7
+ activate ingestor
8
+
9
+ ingestor -> vectordb : pdf_to_vec(file_location, collection_name)
10
+ activate vectordb
11
+ vectordb --> ingestor : vectordb
12
+ deactivate vectordb
13
+
14
+ ingestor --> User : "Document processed and ingested successfully"
15
+ deactivate ingestor
16
+ @enduml
docs/utils/download_model_class.puml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml download_model
2
+ class ModelDownloader {
3
+ -model_file_path : string
4
+ -model_name : string
5
+ -model_dir : string
6
+ +model_file_exists_and_valid(model_file_path : string) : boolean
7
+ +write_model_path_to_txt_file(model_file_path : string) : void
8
+ +download_hf_model() : void
9
+ +download_gguf_model() : void
10
+ +download_model() : void
11
+ }
12
+ @enduml
docs/utils/download_model_seq.puml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml download_model
2
+ actor User
3
+ participant "Environment" as env
4
+ participant "ModelDownloader" as downloader
5
+
6
+ User -> downloader: download_model()
7
+ activate downloader
8
+
9
+ downloader -> env: Get MODEL_CLASS
10
+ activate env
11
+ env --> downloader: 'hf' or 'gguf'
12
+ deactivate env
13
+
14
+ alt MODEL_CLASS is 'hf'
15
+ downloader -> downloader: download_hf_model()
16
+ activate downloader
17
+ downloader -> downloader: model_file_exists_and_valid()
18
+ downloader -> downloader: write_model_path_to_txt_file()
19
+ deactivate downloader
20
+ else MODEL_CLASS is 'gguf'
21
+ downloader -> downloader: download_gguf_model()
22
+ activate downloader
23
+ downloader -> downloader: model_file_exists_and_valid()
24
+ downloader -> downloader: write_model_path_to_txt_file()
25
+ deactivate downloader
26
+ else Unsupported model class
27
+ downloader -> User: Print "Unsupported model class"
28
+ end
29
+
30
+ deactivate downloader
31
+ @enduml
docs/utils/ec_image_utils_class.puml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ @startuml ec_image_utiils
2
+ class ImageUtils {
3
+ {static} load_image(filename : string) : image
4
+ {static} normalize(img : image) : image
5
+ {static} detect_faces_with_mtcnn(img : image) : List[boundingBox]
6
+ {static} crop_faces_to_160x160(img : image, bounding_boxes : List[boundingBox]) : imageArray
7
+ {static} get_user_cropped_image_from_photo(filename : string) : image
8
+ }
9
+ @enduml
docs/utils/ec_image_utils_seq.puml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml ec_image_utils
2
+ actor User
3
+ participant "ImageUtils" as utils
4
+
5
+ User -> utils : get_user_cropped_image_from_photo(filename)
6
+ activate utils
7
+
8
+ utils -> utils : load_image(filename)
9
+ activate utils
10
+ utils --> utils : img
11
+ deactivate utils
12
+
13
+ utils -> utils : detect_faces_with_mtcnn(img)
14
+ activate utils
15
+ utils --> utils : bounding_boxes
16
+ deactivate utils
17
+
18
+ utils -> utils : crop_faces_to_160x160(img, bounding_boxes)
19
+ activate utils
20
+ utils --> utils : cropped_faces
21
+ deactivate utils
22
+
23
+ utils --> User : cropped_face[0]
24
+ deactivate utils
25
+
26
+ @enduml
docs/utils/jwt_utils_class.puml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ @startuml jwt_utils
2
+ class JWTUtils {
3
+ +encode_jwt(data: dict): string
4
+ +decode_jwt(token: string): dict
5
+ +create_access_token(data: dict, expires_delta: Union[timedelta, None]): string
6
+ }
7
+ @enduml
docs/utils/jwt_utils_seq.puml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @startuml jwt_utils
2
+ actor Client
3
+ participant "JWTUtils" as utils
4
+
5
+ Client -> utils : create_access_token(data, expires_delta)
6
+ activate utils
7
+
8
+ utils -> utils : encode_jwt(data)
9
+ activate utils
10
+ utils -> utils : Setting expiration
11
+ utils --> utils : encoded_jwt
12
+ deactivate utils
13
+
14
+ utils --> Client : encoded_jwt
15
+ deactivate utils
16
+ @enduml