renamed method in processing
Browse files- app/engine/processing.py +1 -1
- app/engine/vectorstore.py +0 -1
- app/main.py +1 -42
app/engine/processing.py
CHANGED
@@ -42,7 +42,7 @@ def process_pdf(filepath:str) -> dict:
|
|
42 |
logger.info(f"Successfully vectorized PDF content")
|
43 |
return new_content
|
44 |
|
45 |
-
def
|
46 |
|
47 |
ans = finrag_vectorstore.hybrid_search(query=question, limit=3, alpha=0.8)
|
48 |
return ans
|
|
|
42 |
logger.info(f"Successfully vectorized PDF content")
|
43 |
return new_content
|
44 |
|
45 |
+
def vector_search(question:str) -> List[str]:
|
46 |
|
47 |
ans = finrag_vectorstore.hybrid_search(query=question, limit=3, alpha=0.8)
|
48 |
return ans
|
app/engine/vectorstore.py
CHANGED
@@ -62,7 +62,6 @@ class VectorStore:
|
|
62 |
self.client = WeaviateWCS(endpoint=self.url,
|
63 |
api_key=self.api_key,
|
64 |
model_name_or_path=self.model_path)
|
65 |
-
print('After client creation')
|
66 |
|
67 |
except Exception as e:
|
68 |
# raise Exception(f"Could not create Weaviate client: {e}")
|
|
|
62 |
self.client = WeaviateWCS(endpoint=self.url,
|
63 |
api_key=self.api_key,
|
64 |
model_name_or_path=self.model_path)
|
|
|
65 |
|
66 |
except Exception as e:
|
67 |
# raise Exception(f"Could not create Weaviate client: {e}")
|
app/main.py
CHANGED
@@ -32,8 +32,6 @@ if environment == "dev":
|
|
32 |
)
|
33 |
|
34 |
try:
|
35 |
-
# will not work on HuggingFace
|
36 |
-
# and Liquidity dont' have the env anyway
|
37 |
load_dotenv(find_dotenv('env'))
|
38 |
|
39 |
except Exception as e:
|
@@ -56,7 +54,7 @@ def read_root():
|
|
56 |
def ping():
|
57 |
""" Testing """
|
58 |
logger.info("Someone is pinging the server")
|
59 |
-
return {"answer": str(random.random() * 100)}
|
60 |
|
61 |
|
62 |
@app.delete("/erase_data/")
|
@@ -168,14 +166,6 @@ async def ragit(question: Question):
|
|
168 |
except Exception as e:
|
169 |
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
170 |
|
171 |
-
|
172 |
-
# TODO
|
173 |
-
# rejects searches with a search score below a threshold
|
174 |
-
# scrape the tables (and find a way to reject them from the text search -> LLamaparse)
|
175 |
-
# see why the filename in search results is always empty
|
176 |
-
# -> add it to the search results to avoid confusion Google-Amazon for instance
|
177 |
-
# add python scripts to create index, rag etc
|
178 |
-
|
179 |
if __name__ == '__main__':
|
180 |
import uvicorn
|
181 |
from os import getenv
|
@@ -198,34 +188,3 @@ if __name__ == '__main__':
|
|
198 |
|
199 |
# curl -X POST http://localhost:80/ask/ -H "Content-Type: application/json" -d '{"question": "what is Amazon loss"}'
|
200 |
# curl -X POST http://localhost:80/ragit/ -H "Content-Type: application/json" -d '{"question": "Does ATT have postpaid phone customers?"}'
|
201 |
-
|
202 |
-
|
203 |
-
# TODO
|
204 |
-
# import unittest
|
205 |
-
# from unitesting_utils import load_impact_theory_data
|
206 |
-
|
207 |
-
# class TestSplitContents(unittest.TestCase):
|
208 |
-
# '''
|
209 |
-
# Unit test to ensure proper functionality of split_contents function
|
210 |
-
# '''
|
211 |
-
|
212 |
-
# def test_split_contents(self):
|
213 |
-
# import tiktoken
|
214 |
-
# from llama_index.text_splitter import SentenceSplitter
|
215 |
-
|
216 |
-
# data = load_impact_theory_data()
|
217 |
-
|
218 |
-
# subset = data[:3]
|
219 |
-
# chunk_size = 256
|
220 |
-
# chunk_overlap = 0
|
221 |
-
# encoding = tiktoken.encoding_for_model('gpt-3.5-turbo-0613')
|
222 |
-
# gpt35_txt_splitter = SentenceSplitter(chunk_size=chunk_size, tokenizer=encoding.encode, chunk_overlap=chunk_overlap)
|
223 |
-
# results = split_contents(subset, gpt35_txt_splitter)
|
224 |
-
# self.assertEqual(len(results), 3)
|
225 |
-
# self.assertEqual(len(results[0]), 83)
|
226 |
-
# self.assertEqual(len(results[1]), 178)
|
227 |
-
# self.assertEqual(len(results[2]), 144)
|
228 |
-
# self.assertTrue(isinstance(results, list))
|
229 |
-
# self.assertTrue(isinstance(results[0], list))
|
230 |
-
# self.assertTrue(isinstance(results[0][0], str))
|
231 |
-
# unittest.TextTestRunner().run(unittest.TestLoader().loadTestsFromTestCase(TestSplitContents))
|
|
|
32 |
)
|
33 |
|
34 |
try:
|
|
|
|
|
35 |
load_dotenv(find_dotenv('env'))
|
36 |
|
37 |
except Exception as e:
|
|
|
54 |
def ping():
|
55 |
""" Testing """
|
56 |
logger.info("Someone is pinging the server")
|
57 |
+
return {"answer": str(int(random.random() * 100))}
|
58 |
|
59 |
|
60 |
@app.delete("/erase_data/")
|
|
|
166 |
except Exception as e:
|
167 |
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
if __name__ == '__main__':
|
170 |
import uvicorn
|
171 |
from os import getenv
|
|
|
188 |
|
189 |
# curl -X POST http://localhost:80/ask/ -H "Content-Type: application/json" -d '{"question": "what is Amazon loss"}'
|
190 |
# curl -X POST http://localhost:80/ragit/ -H "Content-Type: application/json" -d '{"question": "Does ATT have postpaid phone customers?"}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|