fixed call to main_reflex with uvicorn
Browse files- Dockerfile +1 -1
- app/main.py +93 -53
Dockerfile
CHANGED
@@ -48,4 +48,4 @@ RUN guardrails hub install hub://guardrails/detect_pii
|
|
48 |
RUN guardrails hub install hub://guardrails/qa_relevance_llm_eval
|
49 |
|
50 |
|
51 |
-
CMD ["uvicorn", "app.
|
|
|
48 |
RUN guardrails hub install hub://guardrails/qa_relevance_llm_eval
|
49 |
|
50 |
|
51 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
app/main.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
import os, random, logging, pickle, shutil
|
3 |
from dotenv import load_dotenv, find_dotenv
|
4 |
from typing import Optional
|
@@ -7,15 +6,21 @@ from pydantic import BaseModel, Field
|
|
7 |
from fastapi import FastAPI, HTTPException, File, UploadFile, status
|
8 |
from fastapi.responses import HTMLResponse
|
9 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
from engine.
|
12 |
-
from rag.rag import rag_it
|
13 |
-
|
14 |
-
from engine.logger import logger
|
15 |
|
16 |
-
from settings import datadir
|
17 |
|
18 |
-
|
19 |
|
20 |
app = FastAPI()
|
21 |
|
@@ -37,74 +42,83 @@ try:
|
|
37 |
except Exception as e:
|
38 |
pass
|
39 |
|
40 |
-
|
41 |
-
@app.get("/", response_class=HTMLResponse)
|
42 |
def read_root():
|
43 |
-
logger
|
44 |
return """
|
45 |
<html>
|
46 |
<body>
|
47 |
-
<h1>Welcome to
|
48 |
</body>
|
49 |
</html>
|
50 |
"""
|
51 |
|
52 |
-
|
53 |
-
@app.get("/ping/")
|
54 |
def ping():
|
55 |
""" Testing """
|
56 |
-
logger
|
57 |
return {"answer": str(int(random.random() * 100))}
|
58 |
|
59 |
|
60 |
-
@app.delete("/erase_data/")
|
61 |
def erase_data():
|
62 |
-
""" Erase all files in the data directory
|
|
|
|
|
|
|
|
|
63 |
if len(os.listdir(datadir)) == 0:
|
64 |
-
logger
|
65 |
return {"message": "No data to erase"}
|
66 |
|
67 |
-
|
68 |
-
os.
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
70 |
return {"message": "All data has been erased"}
|
71 |
|
72 |
|
73 |
-
@app.delete("/empty_collection/")
|
74 |
def delete_vectors():
|
75 |
""" Empty the collection in the vector store """
|
76 |
try:
|
77 |
status = empty_collection()
|
78 |
-
return {
|
79 |
except Exception as e:
|
80 |
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
81 |
|
82 |
-
|
|
|
83 |
def list_files():
|
84 |
""" List all files in the data directory """
|
|
|
85 |
files = os.listdir(datadir)
|
86 |
-
logger
|
87 |
return {"files": files}
|
88 |
|
89 |
|
90 |
-
@app.post("/upload/")
|
91 |
# @limiter.limit("5/minute") see 'slowapi' for rate limiting
|
92 |
async def upload_file(file: UploadFile = File(...)):
|
93 |
""" Uploads a file in data directory, for later indexing """
|
94 |
try:
|
95 |
filepath = os.path.join(datadir, file.filename)
|
96 |
-
logger
|
97 |
if os.path.exists(filepath):
|
98 |
-
logger
|
99 |
return {"message": f"File {file.filename} already exists: no processing done"}
|
100 |
|
101 |
else:
|
102 |
-
logger
|
103 |
contents = await file.read()
|
104 |
-
logger
|
105 |
|
106 |
except Exception as e:
|
107 |
-
logger
|
108 |
return {"message": f"Error during file upload: {str(e)}"}
|
109 |
|
110 |
if file.filename.endswith('.pdf'):
|
@@ -112,9 +126,14 @@ async def upload_file(file: UploadFile = File(...)):
|
|
112 |
# let's save the file in /data even if it's temp storage on HF
|
113 |
with open(filepath, 'wb') as f:
|
114 |
f.write(contents)
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
try:
|
117 |
-
logger
|
118 |
new_content = process_pdf(filepath)
|
119 |
success = {"message": f"Successfully uploaded {file.filename}"}
|
120 |
success.update(new_content)
|
@@ -122,15 +141,35 @@ async def upload_file(file: UploadFile = File(...)):
|
|
122 |
|
123 |
except Exception as e:
|
124 |
return {"message": f"Failed to extract text from PDF: {str(e)}"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
else:
|
126 |
-
return {"message": "Only PDF files are accepted"}
|
127 |
|
128 |
|
129 |
-
@app.post("/create_index/")
|
130 |
async def create_index():
|
131 |
""" Create an index for the uploaded files """
|
132 |
|
133 |
-
logger
|
134 |
try:
|
135 |
msg = index_data()
|
136 |
return {"message": msg}
|
@@ -141,50 +180,51 @@ async def create_index():
|
|
141 |
class Question(BaseModel):
|
142 |
question: str
|
143 |
|
144 |
-
@app.post("/ask/")
|
145 |
async def hybrid_search(question: Question):
|
146 |
-
logger
|
147 |
try:
|
148 |
search_results = vector_search(question.question)
|
149 |
-
logger
|
150 |
return {"answer": search_results}
|
151 |
except Exception as e:
|
152 |
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
153 |
|
154 |
-
|
155 |
-
@app.post("/ragit/")
|
156 |
async def ragit(question: Question):
|
157 |
-
logger
|
158 |
try:
|
159 |
-
search_results =
|
160 |
-
logger
|
161 |
|
162 |
answer = rag_it(question.question, search_results)
|
163 |
|
164 |
-
logger
|
165 |
return {"answer": answer}
|
166 |
except Exception as e:
|
167 |
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
168 |
|
|
|
169 |
if __name__ == '__main__':
|
170 |
import uvicorn
|
171 |
from os import getenv
|
172 |
-
port = int(getenv("PORT",
|
173 |
print(f"Starting server on port {port}")
|
174 |
reload = True if environment == "dev" else False
|
175 |
uvicorn.run("main:app", host="0.0.0.0", port=port, reload=reload)
|
176 |
|
177 |
|
178 |
-
|
179 |
# Examples:
|
180 |
-
# curl -X POST "http://localhost:
|
181 |
-
# curl -X DELETE "http://localhost:
|
182 |
-
# curl -X GET "http://localhost:
|
183 |
|
184 |
-
# hf space is at https://jpbianchi-
|
185 |
-
# code given by https://jpbianchi-
|
186 |
# Space must be public
|
187 |
-
# curl -X POST "https://jpbianchi-
|
188 |
|
189 |
-
# curl -X POST http://localhost:
|
190 |
-
# curl -X POST http://localhost:
|
|
|
|
|
|
1 |
import os, random, logging, pickle, shutil
|
2 |
from dotenv import load_dotenv, find_dotenv
|
3 |
from typing import Optional
|
|
|
6 |
from fastapi import FastAPI, HTTPException, File, UploadFile, status
|
7 |
from fastapi.responses import HTMLResponse
|
8 |
from fastapi.middleware.cors import CORSMiddleware
|
9 |
+
from app.engine.processing import ( # << creates the collection already
|
10 |
+
process_pdf,
|
11 |
+
process_txt,
|
12 |
+
index_data,
|
13 |
+
empty_collection,
|
14 |
+
vector_search,
|
15 |
+
vector_search_raw,
|
16 |
+
)
|
17 |
+
from app.rag.rag import rag_it
|
18 |
|
19 |
+
from app.engine.logger import logger
|
|
|
|
|
|
|
20 |
|
21 |
+
from app.settings import datadir, datadir2
|
22 |
|
23 |
+
EXTENSIONS = ["pdf", "txt"]
|
24 |
|
25 |
app = FastAPI()
|
26 |
|
|
|
42 |
except Exception as e:
|
43 |
pass
|
44 |
|
|
|
|
|
45 |
def read_root():
|
46 |
+
logger("Title displayed on home page")
|
47 |
return """
|
48 |
<html>
|
49 |
<body>
|
50 |
+
<h1>Welcome to MultiRAG, a RAG system designed by JP Bianchi!</h1>
|
51 |
</body>
|
52 |
</html>
|
53 |
"""
|
54 |
|
55 |
+
# already provided by Reflex
|
56 |
+
# @app.get("/ping/")
|
57 |
def ping():
|
58 |
""" Testing """
|
59 |
+
logger("Someone is pinging the server")
|
60 |
return {"answer": str(int(random.random() * 100))}
|
61 |
|
62 |
|
63 |
+
# @app.delete("/erase_data/")
|
64 |
def erase_data():
|
65 |
+
""" Erase all files in the data directory at the first level only,
|
66 |
+
(in case we would like to use it for something else)
|
67 |
+
but not the vector store or the parquet file.
|
68 |
+
We can do it since the embeddings are in the parquet file already.
|
69 |
+
"""
|
70 |
if len(os.listdir(datadir)) == 0:
|
71 |
+
logger("No data to erase")
|
72 |
return {"message": "No data to erase"}
|
73 |
|
74 |
+
# if we try to rmtree datadir, it looks like /data can't be deleted on HF
|
75 |
+
for f in os.listdir(datadir):
|
76 |
+
if f == '.DS_Store' or f.split('.')[-1].lower() in EXTENSIONS:
|
77 |
+
print(f"Removing {f}")
|
78 |
+
os.remove(os.path.join(datadir, f))
|
79 |
+
# we don't remove the parquet file, create_index does that
|
80 |
+
|
81 |
+
logger("All data has been erased")
|
82 |
return {"message": "All data has been erased"}
|
83 |
|
84 |
|
85 |
+
# @app.delete("/empty_collection/")
|
86 |
def delete_vectors():
|
87 |
""" Empty the collection in the vector store """
|
88 |
try:
|
89 |
status = empty_collection()
|
90 |
+
return {"message": f"Collection{'' if status else ' NOT'} erased!"}
|
91 |
except Exception as e:
|
92 |
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
93 |
|
94 |
+
|
95 |
+
# @app.get("/list_files/")
|
96 |
def list_files():
|
97 |
""" List all files in the data directory """
|
98 |
+
print("Listing files")
|
99 |
files = os.listdir(datadir)
|
100 |
+
logger(f"Files in data directory: {files}")
|
101 |
return {"files": files}
|
102 |
|
103 |
|
104 |
+
# @app.post("/upload/")
|
105 |
# @limiter.limit("5/minute") see 'slowapi' for rate limiting
|
106 |
async def upload_file(file: UploadFile = File(...)):
|
107 |
""" Uploads a file in data directory, for later indexing """
|
108 |
try:
|
109 |
filepath = os.path.join(datadir, file.filename)
|
110 |
+
logger(f"Fiename detected: {file.filename}")
|
111 |
if os.path.exists(filepath):
|
112 |
+
logger(f"File {file.filename} already exists: no processing done")
|
113 |
return {"message": f"File {file.filename} already exists: no processing done"}
|
114 |
|
115 |
else:
|
116 |
+
logger(f"Receiving file: {file.filename}")
|
117 |
contents = await file.read()
|
118 |
+
logger(f"File reception complete!")
|
119 |
|
120 |
except Exception as e:
|
121 |
+
logger(f"Error during file upload: {str(e)}")
|
122 |
return {"message": f"Error during file upload: {str(e)}"}
|
123 |
|
124 |
if file.filename.endswith('.pdf'):
|
|
|
126 |
# let's save the file in /data even if it's temp storage on HF
|
127 |
with open(filepath, 'wb') as f:
|
128 |
f.write(contents)
|
129 |
+
|
130 |
+
# save it also in assets/data because data can be cleared
|
131 |
+
filepath2 = os.path.join(datadir2, file.filename)
|
132 |
+
with open(filepath2, 'wb') as f:
|
133 |
+
f.write(contents)
|
134 |
|
135 |
try:
|
136 |
+
logger(f"Starting to process {file.filename}")
|
137 |
new_content = process_pdf(filepath)
|
138 |
success = {"message": f"Successfully uploaded {file.filename}"}
|
139 |
success.update(new_content)
|
|
|
141 |
|
142 |
except Exception as e:
|
143 |
return {"message": f"Failed to extract text from PDF: {str(e)}"}
|
144 |
+
|
145 |
+
elif file.filename.endswith('.txt'):
|
146 |
+
|
147 |
+
with open(filepath, 'wb') as f:
|
148 |
+
f.write(contents)
|
149 |
+
|
150 |
+
filepath2 = os.path.join(datadir2, file.filename)
|
151 |
+
with open(filepath2, 'wb') as f:
|
152 |
+
f.write(contents)
|
153 |
+
|
154 |
+
try:
|
155 |
+
logger(f"Reading {file.filename}")
|
156 |
+
new_content = process_txt(filepath)
|
157 |
+
success = {"message": f"Successfully uploaded {file.filename}"}
|
158 |
+
success.update(new_content)
|
159 |
+
return success
|
160 |
+
|
161 |
+
except Exception as e:
|
162 |
+
return {"message": f"Failed to extract text from TXT: {str(e)}"}
|
163 |
+
|
164 |
else:
|
165 |
+
return {"message": "Only PDF & txt files are accepted"}
|
166 |
|
167 |
|
168 |
+
# @app.post("/create_index/")
|
169 |
async def create_index():
|
170 |
""" Create an index for the uploaded files """
|
171 |
|
172 |
+
logger("Creating index for uploaded files")
|
173 |
try:
|
174 |
msg = index_data()
|
175 |
return {"message": msg}
|
|
|
180 |
class Question(BaseModel):
|
181 |
question: str
|
182 |
|
183 |
+
# @app.post("/ask/")
|
184 |
async def hybrid_search(question: Question):
|
185 |
+
logger(f"Processing question: {question.question}")
|
186 |
try:
|
187 |
search_results = vector_search(question.question)
|
188 |
+
logger(f"Answer: {search_results}")
|
189 |
return {"answer": search_results}
|
190 |
except Exception as e:
|
191 |
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
192 |
|
193 |
+
|
194 |
+
# @app.post("/ragit/")
|
195 |
async def ragit(question: Question):
|
196 |
+
logger(f"Processing question: {question.question}")
|
197 |
try:
|
198 |
+
search_results = vector_search_raw(question.question)
|
199 |
+
logger(f"Search results generated: {search_results}")
|
200 |
|
201 |
answer = rag_it(question.question, search_results)
|
202 |
|
203 |
+
logger(f"Answer: {answer}")
|
204 |
return {"answer": answer}
|
205 |
except Exception as e:
|
206 |
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
|
207 |
|
208 |
+
|
209 |
if __name__ == '__main__':
|
210 |
import uvicorn
|
211 |
from os import getenv
|
212 |
+
port = int(getenv("PORT", 8000))
|
213 |
print(f"Starting server on port {port}")
|
214 |
reload = True if environment == "dev" else False
|
215 |
uvicorn.run("main:app", host="0.0.0.0", port=port, reload=reload)
|
216 |
|
217 |
|
|
|
218 |
# Examples:
|
219 |
+
# curl -X POST "http://localhost:8001/upload" -F "[email protected]"
|
220 |
+
# curl -X DELETE "http://localhost:8001/erase_data/"
|
221 |
+
# curl -X GET "http://localhost:8001/list_files/"
|
222 |
|
223 |
+
# hf space is at https://jpbianchi-mr.hf.space/
|
224 |
+
# code given by https://jpbianchi-mr.hf.space/docs
|
225 |
# Space must be public
|
226 |
+
# curl -X POST "https://jpbianchi-mr.hf.space/upload/" -F "[email protected]"
|
227 |
|
228 |
+
# curl -X POST http://localhost:8000/ask/ -H "Content-Type: application/json" -d '{"question": "what is Amazon loss"}'
|
229 |
+
# curl -X POST http://localhost:8000/ragit/ -H "Content-Type: application/json" -d '{"question": "Does ATT have postpaid phone customers?"}'
|
230 |
+
# see more in notebook upload_index.ipynb
|