abadesalex commited on
Commit
bc0b69d
1 Parent(s): 1a2a05f
Api/app/__pycache__/main.cpython-310.pyc CHANGED
Binary files a/Api/app/__pycache__/main.cpython-310.pyc and b/Api/app/__pycache__/main.cpython-310.pyc differ
 
Api/app/modules/querySearch/features/__pycache__/querySearch_feature.cpython-310.pyc CHANGED
Binary files a/Api/app/modules/querySearch/features/__pycache__/querySearch_feature.cpython-310.pyc and b/Api/app/modules/querySearch/features/__pycache__/querySearch_feature.cpython-310.pyc differ
 
Api/app/modules/querySearch/features/querySearch_feature.py CHANGED
@@ -6,10 +6,12 @@ import numpy as np
6
  # from sentence_transformers import SentenceTransformer
7
  # from transformers import pipeline
8
 
 
9
  from app.db_local_storage.files_db import VECTOR_FILES_DIRECTORY
10
  from app.db_local_storage.in_memory_db import query_response_storage
11
 
12
 
 
13
  class QuerySearchFeature:
14
 
15
  def __init__(self, model, qa_pipeline):
@@ -25,7 +27,8 @@ class QuerySearchFeature:
25
 
26
  query_response_storage.append(user_query)
27
 
28
- dataBase = await QuerySearchFeature.load_data()
 
29
  text_data, embeddings = await QuerySearchFeature.split_dataBase(dataBase)
30
 
31
  lexical_results = await QuerySearchFeature.lexical_search(query, text_data)
@@ -55,7 +58,7 @@ class QuerySearchFeature:
55
  async def semantic_search(
56
  query: str, chunks: List[str], embeddings: np.ndarray, model
57
  ) -> List[str]:
58
- query_embedding = model.encode([query], clean_up_tokenization_spaces=False)
59
  similarities = np.dot(embeddings, query_embedding.T).flatten()
60
  top_indices = np.argsort(-similarities)[:3]
61
  return [chunks[i] for i in top_indices]
 
6
  # from sentence_transformers import SentenceTransformer
7
  # from transformers import pipeline
8
 
9
+ from app.db_local_storage.vector_files_db import vector_files_db
10
  from app.db_local_storage.files_db import VECTOR_FILES_DIRECTORY
11
  from app.db_local_storage.in_memory_db import query_response_storage
12
 
13
 
14
+
15
  class QuerySearchFeature:
16
 
17
  def __init__(self, model, qa_pipeline):
 
27
 
28
  query_response_storage.append(user_query)
29
 
30
+ # dataBase = await QuerySearchFeature.load_data()
31
+ dataBase = vector_files_db
32
  text_data, embeddings = await QuerySearchFeature.split_dataBase(dataBase)
33
 
34
  lexical_results = await QuerySearchFeature.lexical_search(query, text_data)
 
58
  async def semantic_search(
59
  query: str, chunks: List[str], embeddings: np.ndarray, model
60
  ) -> List[str]:
61
+ query_embedding = model.encode([query])
62
  similarities = np.dot(embeddings, query_embedding.T).flatten()
63
  top_indices = np.argsort(-similarities)[:3]
64
  return [chunks[i] for i in top_indices]
Api/app/modules/uploadDocument/features/__pycache__/createEmbeddings_feature.cpython-310.pyc CHANGED
Binary files a/Api/app/modules/uploadDocument/features/__pycache__/createEmbeddings_feature.cpython-310.pyc and b/Api/app/modules/uploadDocument/features/__pycache__/createEmbeddings_feature.cpython-310.pyc differ
 
Api/app/modules/uploadDocument/features/createEmbeddings_feature.py CHANGED
@@ -15,7 +15,7 @@ class CreateEmbeddingsFeature:
15
  @staticmethod
16
  async def create_embeddings(text: str, filename: str) -> List:
17
 
18
- chunks = CreateEmbeddingsFeature.chunk_text(text)
19
 
20
  id = len(EMBEDDING_DATA) + 1
21
  docoument_index = f"document_{id}"
@@ -25,7 +25,7 @@ class CreateEmbeddingsFeature:
25
  }
26
 
27
  for i, chunk in enumerate(chunks):
28
- embedding = model.encode(chunk, clean_up_tokenization_spaces=False).tolist()
29
  embedding_entry = {
30
  "embedding": embedding,
31
  "metadata": {
 
15
  @staticmethod
16
  async def create_embeddings(text: str, filename: str) -> List:
17
 
18
+ chunks = await CreateEmbeddingsFeature.chunk_text(text)
19
 
20
  id = len(EMBEDDING_DATA) + 1
21
  docoument_index = f"document_{id}"
 
25
  }
26
 
27
  for i, chunk in enumerate(chunks):
28
+ embedding = model.encode(chunk).tolist()
29
  embedding_entry = {
30
  "embedding": embedding,
31
  "metadata": {