abadesalex commited on
Commit
714be4e
1 Parent(s): 6f1a9e1
Api/app/__pycache__/main.cpython-310.pyc CHANGED
Binary files a/Api/app/__pycache__/main.cpython-310.pyc and b/Api/app/__pycache__/main.cpython-310.pyc differ
 
Api/app/main.py CHANGED
@@ -1,13 +1,17 @@
 
 
1
  import uvicorn
2
  from fastapi import APIRouter, FastAPI
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from fastapi.responses import FileResponse
5
  from fastapi.staticfiles import StaticFiles
6
 
7
- from app.modules.querySearch.routes.querySearch_route import \
8
- router as query_search_routes
9
- from app.modules.uploadDocument.routes.uploadDocument_route import \
10
- router as upload_file_routes
 
 
11
 
12
  app = FastAPI()
13
 
@@ -41,12 +45,21 @@ async def root():
41
  # Serve static files from the 'out/_next/static' directory
42
  app.mount("/_next/static", StaticFiles(directory="app/out/_next/static"), name="static")
43
 
 
44
  # Serve the main index.html
45
  @app.get("/")
46
  def read_root():
47
  return FileResponse("app/out/index.html")
48
 
49
 
 
 
 
 
 
 
 
 
50
 
51
 
52
  app.include_router(app_router)
 
1
+ import logging
2
+
3
  import uvicorn
4
  from fastapi import APIRouter, FastAPI
5
  from fastapi.middleware.cors import CORSMiddleware
6
  from fastapi.responses import FileResponse
7
  from fastapi.staticfiles import StaticFiles
8
 
9
+ from app.modules.querySearch.routes.querySearch_route import (
10
+ router as query_search_routes,
11
+ )
12
+ from app.modules.uploadDocument.routes.uploadDocument_route import (
13
+ router as upload_file_routes,
14
+ )
15
 
16
  app = FastAPI()
17
 
 
45
  # Serve static files from the 'out/_next/static' directory
46
  app.mount("/_next/static", StaticFiles(directory="app/out/_next/static"), name="static")
47
 
48
+
49
  # Serve the main index.html
50
  @app.get("/")
51
  def read_root():
52
  return FileResponse("app/out/index.html")
53
 
54
 
55
+ @app.on_event("startup")
56
+ async def startup_event():
57
+ logging.info("Application is starting up...")
58
+
59
+
60
+ @app.on_event("shutdown")
61
+ async def shutdown_event():
62
+ logging.info("Application is shutting down...")
63
 
64
 
65
  app.include_router(app_router)
Api/app/modules/__pycache__/model.cpython-310.pyc ADDED
Binary file (377 Bytes). View file
 
Api/app/modules/model.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from transformers import pipeline
3
+
4
+
5
+ model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
6
+ qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
Api/app/modules/querySearch/features/__pycache__/querySearch_feature.cpython-310.pyc CHANGED
Binary files a/Api/app/modules/querySearch/features/__pycache__/querySearch_feature.cpython-310.pyc and b/Api/app/modules/querySearch/features/__pycache__/querySearch_feature.cpython-310.pyc differ
 
Api/app/modules/querySearch/features/querySearch_feature.py CHANGED
@@ -2,9 +2,9 @@ import json
2
  from typing import List, Tuple
3
 
4
  import numpy as np
5
- from fastapi.responses import JSONResponse
6
- from sentence_transformers import SentenceTransformer
7
- from transformers import pipeline
8
 
9
  from app.db_local_storage.files_db import VECTOR_FILES_DIRECTORY
10
  from app.db_local_storage.in_memory_db import query_response_storage
@@ -45,7 +45,6 @@ class QuerySearchFeature:
45
 
46
  query_response_storage.append(response_query)
47
 
48
-
49
  return {
50
  "message": response["answer"],
51
  "context_used": context,
@@ -56,7 +55,7 @@ class QuerySearchFeature:
56
  async def semantic_search(
57
  query: str, chunks: List[str], embeddings: np.ndarray, model
58
  ) -> List[str]:
59
- query_embedding = model.encode([query])
60
  similarities = np.dot(embeddings, query_embedding.T).flatten()
61
  top_indices = np.argsort(-similarities)[:3]
62
  return [chunks[i] for i in top_indices]
 
2
  from typing import List, Tuple
3
 
4
  import numpy as np
5
+ # from fastapi.responses import JSONResponse
6
+ # from sentence_transformers import SentenceTransformer
7
+ # from transformers import pipeline
8
 
9
  from app.db_local_storage.files_db import VECTOR_FILES_DIRECTORY
10
  from app.db_local_storage.in_memory_db import query_response_storage
 
45
 
46
  query_response_storage.append(response_query)
47
 
 
48
  return {
49
  "message": response["answer"],
50
  "context_used": context,
 
55
  async def semantic_search(
56
  query: str, chunks: List[str], embeddings: np.ndarray, model
57
  ) -> List[str]:
58
+ query_embedding = model.encode([query], clean_up_tokenization_spaces=False)
59
  similarities = np.dot(embeddings, query_embedding.T).flatten()
60
  top_indices = np.argsort(-similarities)[:3]
61
  return [chunks[i] for i in top_indices]
Api/app/modules/querySearch/routes/__pycache__/querySearch_route.cpython-310.pyc CHANGED
Binary files a/Api/app/modules/querySearch/routes/__pycache__/querySearch_route.cpython-310.pyc and b/Api/app/modules/querySearch/routes/__pycache__/querySearch_route.cpython-310.pyc differ
 
Api/app/modules/querySearch/routes/querySearch_route.py CHANGED
@@ -1,19 +1,19 @@
1
  from fastapi import APIRouter, Form, HTTPException
2
  from fastapi.responses import JSONResponse
3
- from sentence_transformers import SentenceTransformer
4
- from transformers import pipeline
5
 
6
- from app.modules.querySearch.features.querySearch_feature import QuerySearchFeature
7
  from app.modules.querySearch.controllers.querySearch_controller import (
8
  QuerySearchController,
9
  )
10
- from app.db_local_storage.in_memory_db import query_response_storage
11
-
12
 
13
  router = APIRouter()
14
 
15
- model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
16
- qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
17
 
18
  querySearchFeature = QuerySearchFeature(model, qa_pipeline)
19
  querySearchController = QuerySearchController(querySearchFeature)
 
1
  from fastapi import APIRouter, Form, HTTPException
2
  from fastapi.responses import JSONResponse
3
+ # from sentence_transformers import SentenceTransformer
4
+ # from transformers import pipeline
5
 
6
+ from app.db_local_storage.in_memory_db import query_response_storage
7
  from app.modules.querySearch.controllers.querySearch_controller import (
8
  QuerySearchController,
9
  )
10
+ from app.modules.model import model, qa_pipeline
11
+ from app.modules.querySearch.features.querySearch_feature import QuerySearchFeature
12
 
13
  router = APIRouter()
14
 
15
+ # model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
16
+ # qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
17
 
18
  querySearchFeature = QuerySearchFeature(model, qa_pipeline)
19
  querySearchController = QuerySearchController(querySearchFeature)
Api/app/modules/uploadDocument/features/__pycache__/createEmbeddings_feature.cpython-310.pyc CHANGED
Binary files a/Api/app/modules/uploadDocument/features/__pycache__/createEmbeddings_feature.cpython-310.pyc and b/Api/app/modules/uploadDocument/features/__pycache__/createEmbeddings_feature.cpython-310.pyc differ
 
Api/app/modules/uploadDocument/features/__pycache__/extractText_feature.cpython-310.pyc CHANGED
Binary files a/Api/app/modules/uploadDocument/features/__pycache__/extractText_feature.cpython-310.pyc and b/Api/app/modules/uploadDocument/features/__pycache__/extractText_feature.cpython-310.pyc differ
 
Api/app/modules/uploadDocument/features/__pycache__/uploadDocument_feature.cpython-310.pyc CHANGED
Binary files a/Api/app/modules/uploadDocument/features/__pycache__/uploadDocument_feature.cpython-310.pyc and b/Api/app/modules/uploadDocument/features/__pycache__/uploadDocument_feature.cpython-310.pyc differ
 
Api/app/modules/uploadDocument/features/createEmbeddings_feature.py CHANGED
@@ -1,27 +1,21 @@
1
  import json
2
  from typing import List
3
 
4
- from sentence_transformers import SentenceTransformer
5
-
6
- from app.db_local_storage.files_db import VECTOR_FILES_DIRECTORY
7
  from app.db_local_storage.vector_files_db import vector_files_db as EMBEDDING_DATA
 
8
 
9
 
10
  class CreateEmbeddingsFeature:
11
 
12
  @staticmethod
13
- def chunk_text(text: str, chunk_size: int = 512) -> List[str]:
14
  chunks = [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]
15
  return chunks
16
 
17
  @staticmethod
18
  async def create_embeddings(text: str, filename: str) -> List:
19
- # TODO: Check model
20
- model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
21
- chunks = CreateEmbeddingsFeature.chunk_text(text)
22
 
23
- # with open(VECTOR_FILES_DIRECTORY, "r") as file:
24
- # EMBEDDING_DATA = json.load(file)
25
 
26
  id = len(EMBEDDING_DATA) + 1
27
  docoument_index = f"document_{id}"
@@ -31,7 +25,7 @@ class CreateEmbeddingsFeature:
31
  }
32
 
33
  for i, chunk in enumerate(chunks):
34
- embedding = model.encode(chunk).tolist()
35
  embedding_entry = {
36
  "embedding": embedding,
37
  "metadata": {
@@ -42,9 +36,4 @@ class CreateEmbeddingsFeature:
42
  }
43
  EMBEDDING_DATA[docoument_index]["data"].append(embedding_entry)
44
 
45
- # print(EMBEDDING_DATA)
46
-
47
- # with open(VECTOR_FILES_DIRECTORY, "w") as f:
48
- # json.dump(EMBEDDING_DATA, f)
49
-
50
  return
 
1
  import json
2
  from typing import List
3
 
 
 
 
4
  from app.db_local_storage.vector_files_db import vector_files_db as EMBEDDING_DATA
5
+ from app.modules.model import model, qa_pipeline
6
 
7
 
8
  class CreateEmbeddingsFeature:
9
 
10
  @staticmethod
11
+ async def chunk_text(text: str, chunk_size: int = 512) -> List[str]:
12
  chunks = [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]
13
  return chunks
14
 
15
  @staticmethod
16
  async def create_embeddings(text: str, filename: str) -> List:
 
 
 
17
 
18
+ chunks = CreateEmbeddingsFeature.chunk_text(text)
 
19
 
20
  id = len(EMBEDDING_DATA) + 1
21
  docoument_index = f"document_{id}"
 
25
  }
26
 
27
  for i, chunk in enumerate(chunks):
28
+ embedding = model.encode(chunk, clean_up_tokenization_spaces=False).tolist()
29
  embedding_entry = {
30
  "embedding": embedding,
31
  "metadata": {
 
36
  }
37
  EMBEDDING_DATA[docoument_index]["data"].append(embedding_entry)
38
 
 
 
 
 
 
39
  return
Api/app/modules/uploadDocument/features/extractText_feature.py CHANGED
@@ -10,27 +10,27 @@ from app.db_local_storage.documents_db import documents_text
10
 
11
  class ExtractTextFeature:
12
 
13
- @staticmethod
14
- def ensure_directory_exists(folder_path: str) -> None:
15
- """Ensure that the directory exists."""
16
- if not os.path.exists(folder_path):
17
- os.makedirs(folder_path)
18
-
19
- @staticmethod
20
- async def saveFile(content, filename, directory: str) -> str:
21
- """Save the uploaded file to the specified directory."""
22
- file_path = os.path.join(directory, filename)
23
- with open(file_path, "w") as file:
24
- file.write(content)
25
- return file_path
26
-
27
- @staticmethod
28
- async def save_text_from_pdf(file: UploadFile, text) -> str:
29
-
30
- ExtractTextFeature.ensure_directory_exists(TEXT_FILES_DIRECTORY)
31
- await ExtractTextFeature.saveFile(text, file.filename, TEXT_FILES_DIRECTORY)
32
-
33
- return text
34
 
35
  @staticmethod
36
  async def extract_text_from_pdf(file: UploadFile) -> str:
 
10
 
11
  class ExtractTextFeature:
12
 
13
+ # @staticmethod
14
+ # def ensure_directory_exists(folder_path: str) -> None:
15
+ # """Ensure that the directory exists."""
16
+ # if not os.path.exists(folder_path):
17
+ # os.makedirs(folder_path)
18
+
19
+ # @staticmethod
20
+ # async def saveFile(content, filename, directory: str) -> str:
21
+ # """Save the uploaded file to the specified directory."""
22
+ # file_path = os.path.join(directory, filename)
23
+ # with open(file_path, "w") as file:
24
+ # file.write(content)
25
+ # return file_path
26
+
27
+ # @staticmethod
28
+ # async def save_text_from_pdf(file: UploadFile, text) -> str:
29
+
30
+ # ExtractTextFeature.ensure_directory_exists(TEXT_FILES_DIRECTORY)
31
+ # await ExtractTextFeature.saveFile(text, file.filename, TEXT_FILES_DIRECTORY)
32
+
33
+ # return text
34
 
35
  @staticmethod
36
  async def extract_text_from_pdf(file: UploadFile) -> str:
Api/app/modules/uploadDocument/features/uploadDocument_feature.py CHANGED
@@ -8,30 +8,30 @@ from app.db_local_storage.documents_db import documents_db
8
 
9
  class UploadDocumentFeature:
10
 
11
- @staticmethod
12
- def ensure_directory_exists(folder_path: str) -> None:
13
- """Ensure that the directory exists."""
14
- if not os.path.exists(folder_path):
15
- os.makedirs(folder_path)
16
-
17
- @staticmethod
18
- async def saveFile(document: UploadFile, directory: str) -> str:
19
- """Save the uploaded file to the specified directory."""
20
- file_path = os.path.join(directory, document.filename)
21
- with open(file_path, "wb") as file:
22
- content = await document.read()
23
- file.write(content)
24
- return file
25
-
26
- @staticmethod
27
- async def SaveFileMemory(document: UploadFile) -> str:
28
- """Save the uploaded file to memory."""
29
- UploadDocumentFeature.ensure_directory_exists(FILES_DIRECTORY)
30
- id = len(FILES_NAMES_DATABASE) + 1
31
- FILES_NAMES_DATABASE[id] = document.filename
32
-
33
- file = await UploadDocumentFeature.saveFile(document, FILES_DIRECTORY)
34
- return file
35
 
36
  @staticmethod
37
  async def uploadFile(document: UploadFile) -> Dict[str, str]:
 
8
 
9
  class UploadDocumentFeature:
10
 
11
+ # @staticmethod
12
+ # def ensure_directory_exists(folder_path: str) -> None:
13
+ # """Ensure that the directory exists."""
14
+ # if not os.path.exists(folder_path):
15
+ # os.makedirs(folder_path)
16
+
17
+ # @staticmethod
18
+ # async def saveFile(document: UploadFile, directory: str) -> str:
19
+ # """Save the uploaded file to the specified directory."""
20
+ # file_path = os.path.join(directory, document.filename)
21
+ # with open(file_path, "wb") as file:
22
+ # content = await document.read()
23
+ # file.write(content)
24
+ # return file
25
+
26
+ # @staticmethod
27
+ # async def SaveFileMemory(document: UploadFile) -> str:
28
+ # """Save the uploaded file to memory."""
29
+ # UploadDocumentFeature.ensure_directory_exists(FILES_DIRECTORY)
30
+ # id = len(FILES_NAMES_DATABASE) + 1
31
+ # FILES_NAMES_DATABASE[id] = document.filename
32
+
33
+ # file = await UploadDocumentFeature.saveFile(document, FILES_DIRECTORY)
34
+ # return file
35
 
36
  @staticmethod
37
  async def uploadFile(document: UploadFile) -> Dict[str, str]:
App/my-app/src/services/api/api.config.js CHANGED
@@ -1,4 +1,4 @@
1
- // const baseUrl = "http://localhost:8000/api";
2
- const baseUrl = "https://abadesalex-docurag.hf.space/api";
3
 
4
  export default baseUrl;
 
1
+ const baseUrl = "http://localhost:8000/api";
2
+ // const baseUrl = "https://abadesalex-docurag.hf.space/api";
3
 
4
  export default baseUrl;