JPBianchi commited on
Commit
747f388
1 Parent(s): 24f6e72

fixed call to main_reflex with uvicorn

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. app/main.py +93 -53
Dockerfile CHANGED
@@ -48,4 +48,4 @@ RUN guardrails hub install hub://guardrails/detect_pii
48
  RUN guardrails hub install hub://guardrails/qa_relevance_llm_eval
49
 
50
 
51
- CMD ["uvicorn", "app.main_reflex:app", "--host", "0.0.0.0", "--port", "7860"]
 
48
  RUN guardrails hub install hub://guardrails/qa_relevance_llm_eval
49
 
50
 
51
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/main.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os, random, logging, pickle, shutil
3
  from dotenv import load_dotenv, find_dotenv
4
  from typing import Optional
@@ -7,15 +6,21 @@ from pydantic import BaseModel, Field
7
  from fastapi import FastAPI, HTTPException, File, UploadFile, status
8
  from fastapi.responses import HTMLResponse
9
  from fastapi.middleware.cors import CORSMiddleware
 
 
 
 
 
 
 
 
 
10
 
11
- from engine.processing import process_pdf, index_data, empty_collection, vector_search
12
- from rag.rag import rag_it
13
-
14
- from engine.logger import logger
15
 
16
- from settings import datadir
17
 
18
- os.makedirs(datadir, exist_ok=True)
19
 
20
  app = FastAPI()
21
 
@@ -37,74 +42,83 @@ try:
37
  except Exception as e:
38
  pass
39
 
40
-
41
- @app.get("/", response_class=HTMLResponse)
42
  def read_root():
43
- logger.info("Title displayed on home page")
44
  return """
45
  <html>
46
  <body>
47
- <h1>Welcome to FinExpert, a RAG system designed by JP Bianchi!</h1>
48
  </body>
49
  </html>
50
  """
51
 
52
-
53
- @app.get("/ping/")
54
  def ping():
55
  """ Testing """
56
- logger.info("Someone is pinging the server")
57
  return {"answer": str(int(random.random() * 100))}
58
 
59
 
60
- @app.delete("/erase_data/")
61
  def erase_data():
62
- """ Erase all files in the data directory, but not the vector store """
 
 
 
 
63
  if len(os.listdir(datadir)) == 0:
64
- logger.info("No data to erase")
65
  return {"message": "No data to erase"}
66
 
67
- shutil.rmtree(datadir, ignore_errors=True)
68
- os.mkdir(datadir)
69
- logger.warning("All data has been erased")
 
 
 
 
 
70
  return {"message": "All data has been erased"}
71
 
72
 
73
- @app.delete("/empty_collection/")
74
  def delete_vectors():
75
  """ Empty the collection in the vector store """
76
  try:
77
  status = empty_collection()
78
- return {f"""message": "Collection{'' if status else ' NOT'} erased!"""}
79
  except Exception as e:
80
  raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
81
 
82
- @app.get("/list_files/")
 
83
  def list_files():
84
  """ List all files in the data directory """
 
85
  files = os.listdir(datadir)
86
- logger.info(f"Files in data directory: {files}")
87
  return {"files": files}
88
 
89
 
90
- @app.post("/upload/")
91
  # @limiter.limit("5/minute") see 'slowapi' for rate limiting
92
  async def upload_file(file: UploadFile = File(...)):
93
  """ Uploads a file in data directory, for later indexing """
94
  try:
95
  filepath = os.path.join(datadir, file.filename)
96
- logger.info(f"Fiename detected: {file.filename}")
97
  if os.path.exists(filepath):
98
- logger.warning(f"File {file.filename} already exists: no processing done")
99
  return {"message": f"File {file.filename} already exists: no processing done"}
100
 
101
  else:
102
- logger.info(f"Receiving file: {file.filename}")
103
  contents = await file.read()
104
- logger.info(f"File reception complete!")
105
 
106
  except Exception as e:
107
- logger.error(f"Error during file upload: {str(e)}")
108
  return {"message": f"Error during file upload: {str(e)}"}
109
 
110
  if file.filename.endswith('.pdf'):
@@ -112,9 +126,14 @@ async def upload_file(file: UploadFile = File(...)):
112
  # let's save the file in /data even if it's temp storage on HF
113
  with open(filepath, 'wb') as f:
114
  f.write(contents)
 
 
 
 
 
115
 
116
  try:
117
- logger.info(f"Starting to process {file.filename}")
118
  new_content = process_pdf(filepath)
119
  success = {"message": f"Successfully uploaded {file.filename}"}
120
  success.update(new_content)
@@ -122,15 +141,35 @@ async def upload_file(file: UploadFile = File(...)):
122
 
123
  except Exception as e:
124
  return {"message": f"Failed to extract text from PDF: {str(e)}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  else:
126
- return {"message": "Only PDF files are accepted"}
127
 
128
 
129
- @app.post("/create_index/")
130
  async def create_index():
131
  """ Create an index for the uploaded files """
132
 
133
- logger.info("Creating index for uploaded files")
134
  try:
135
  msg = index_data()
136
  return {"message": msg}
@@ -141,50 +180,51 @@ async def create_index():
141
  class Question(BaseModel):
142
  question: str
143
 
144
- @app.post("/ask/")
145
  async def hybrid_search(question: Question):
146
- logger.info(f"Processing question: {question.question}")
147
  try:
148
  search_results = vector_search(question.question)
149
- logger.info(f"Answer: {search_results}")
150
  return {"answer": search_results}
151
  except Exception as e:
152
  raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
153
 
154
-
155
- @app.post("/ragit/")
156
  async def ragit(question: Question):
157
- logger.info(f"Processing question: {question.question}")
158
  try:
159
- search_results = vector_search(question.question)
160
- logger.info(f"Search results generated: {search_results}")
161
 
162
  answer = rag_it(question.question, search_results)
163
 
164
- logger.info(f"Answer: {answer}")
165
  return {"answer": answer}
166
  except Exception as e:
167
  raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
168
 
 
169
  if __name__ == '__main__':
170
  import uvicorn
171
  from os import getenv
172
- port = int(getenv("PORT", 80))
173
  print(f"Starting server on port {port}")
174
  reload = True if environment == "dev" else False
175
  uvicorn.run("main:app", host="0.0.0.0", port=port, reload=reload)
176
 
177
 
178
-
179
  # Examples:
180
- # curl -X POST "http://localhost:80/upload" -F "[email protected]"
181
- # curl -X DELETE "http://localhost:80/erase_data/"
182
- # curl -X GET "http://localhost:80/list_files/"
183
 
184
- # hf space is at https://jpbianchi-finrag.hf.space/
185
- # code given by https://jpbianchi-finrag.hf.space/docs
186
  # Space must be public
187
- # curl -X POST "https://jpbianchi-finrag.hf.space/upload/" -F "[email protected]"
188
 
189
- # curl -X POST http://localhost:80/ask/ -H "Content-Type: application/json" -d '{"question": "what is Amazon loss"}'
190
- # curl -X POST http://localhost:80/ragit/ -H "Content-Type: application/json" -d '{"question": "Does ATT have postpaid phone customers?"}'
 
 
 
1
  import os, random, logging, pickle, shutil
2
  from dotenv import load_dotenv, find_dotenv
3
  from typing import Optional
 
6
  from fastapi import FastAPI, HTTPException, File, UploadFile, status
7
  from fastapi.responses import HTMLResponse
8
  from fastapi.middleware.cors import CORSMiddleware
9
+ from app.engine.processing import ( # << creates the collection already
10
+ process_pdf,
11
+ process_txt,
12
+ index_data,
13
+ empty_collection,
14
+ vector_search,
15
+ vector_search_raw,
16
+ )
17
+ from app.rag.rag import rag_it
18
 
19
+ from app.engine.logger import logger
 
 
 
20
 
21
+ from app.settings import datadir, datadir2
22
 
23
+ EXTENSIONS = ["pdf", "txt"]
24
 
25
  app = FastAPI()
26
 
 
42
  except Exception as e:
43
  pass
44
 
 
 
45
  def read_root():
46
+ logger("Title displayed on home page")
47
  return """
48
  <html>
49
  <body>
50
+ <h1>Welcome to MultiRAG, a RAG system designed by JP Bianchi!</h1>
51
  </body>
52
  </html>
53
  """
54
 
55
+ # already provided by Reflex
56
+ # @app.get("/ping/")
57
  def ping():
58
  """ Testing """
59
+ logger("Someone is pinging the server")
60
  return {"answer": str(int(random.random() * 100))}
61
 
62
 
63
+ # @app.delete("/erase_data/")
64
  def erase_data():
65
+ """ Erase all files in the data directory at the first level only,
66
+ (in case we would like to use it for something else)
67
+ but not the vector store or the parquet file.
68
+ We can do it since the embeddings are in the parquet file already.
69
+ """
70
  if len(os.listdir(datadir)) == 0:
71
+ logger("No data to erase")
72
  return {"message": "No data to erase"}
73
 
74
+ # if we try to rmtree datadir, it looks like /data can't be deleted on HF
75
+ for f in os.listdir(datadir):
76
+ if f == '.DS_Store' or f.split('.')[-1].lower() in EXTENSIONS:
77
+ print(f"Removing {f}")
78
+ os.remove(os.path.join(datadir, f))
79
+ # we don't remove the parquet file, create_index does that
80
+
81
+ logger("All data has been erased")
82
  return {"message": "All data has been erased"}
83
 
84
 
85
+ # @app.delete("/empty_collection/")
86
  def delete_vectors():
87
  """ Empty the collection in the vector store """
88
  try:
89
  status = empty_collection()
90
+ return {"message": f"Collection{'' if status else ' NOT'} erased!"}
91
  except Exception as e:
92
  raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
93
 
94
+
95
+ # @app.get("/list_files/")
96
  def list_files():
97
  """ List all files in the data directory """
98
+ print("Listing files")
99
  files = os.listdir(datadir)
100
+ logger(f"Files in data directory: {files}")
101
  return {"files": files}
102
 
103
 
104
+ # @app.post("/upload/")
105
  # @limiter.limit("5/minute") see 'slowapi' for rate limiting
106
  async def upload_file(file: UploadFile = File(...)):
107
  """ Uploads a file in data directory, for later indexing """
108
  try:
109
  filepath = os.path.join(datadir, file.filename)
110
+ logger(f"Fiename detected: {file.filename}")
111
  if os.path.exists(filepath):
112
+ logger(f"File {file.filename} already exists: no processing done")
113
  return {"message": f"File {file.filename} already exists: no processing done"}
114
 
115
  else:
116
+ logger(f"Receiving file: {file.filename}")
117
  contents = await file.read()
118
+ logger(f"File reception complete!")
119
 
120
  except Exception as e:
121
+ logger(f"Error during file upload: {str(e)}")
122
  return {"message": f"Error during file upload: {str(e)}"}
123
 
124
  if file.filename.endswith('.pdf'):
 
126
  # let's save the file in /data even if it's temp storage on HF
127
  with open(filepath, 'wb') as f:
128
  f.write(contents)
129
+
130
+ # save it also in assets/data because data can be cleared
131
+ filepath2 = os.path.join(datadir2, file.filename)
132
+ with open(filepath2, 'wb') as f:
133
+ f.write(contents)
134
 
135
  try:
136
+ logger(f"Starting to process {file.filename}")
137
  new_content = process_pdf(filepath)
138
  success = {"message": f"Successfully uploaded {file.filename}"}
139
  success.update(new_content)
 
141
 
142
  except Exception as e:
143
  return {"message": f"Failed to extract text from PDF: {str(e)}"}
144
+
145
+ elif file.filename.endswith('.txt'):
146
+
147
+ with open(filepath, 'wb') as f:
148
+ f.write(contents)
149
+
150
+ filepath2 = os.path.join(datadir2, file.filename)
151
+ with open(filepath2, 'wb') as f:
152
+ f.write(contents)
153
+
154
+ try:
155
+ logger(f"Reading {file.filename}")
156
+ new_content = process_txt(filepath)
157
+ success = {"message": f"Successfully uploaded {file.filename}"}
158
+ success.update(new_content)
159
+ return success
160
+
161
+ except Exception as e:
162
+ return {"message": f"Failed to extract text from TXT: {str(e)}"}
163
+
164
  else:
165
+ return {"message": "Only PDF & txt files are accepted"}
166
 
167
 
168
+ # @app.post("/create_index/")
169
  async def create_index():
170
  """ Create an index for the uploaded files """
171
 
172
+ logger("Creating index for uploaded files")
173
  try:
174
  msg = index_data()
175
  return {"message": msg}
 
180
  class Question(BaseModel):
181
  question: str
182
 
183
+ # @app.post("/ask/")
184
  async def hybrid_search(question: Question):
185
+ logger(f"Processing question: {question.question}")
186
  try:
187
  search_results = vector_search(question.question)
188
+ logger(f"Answer: {search_results}")
189
  return {"answer": search_results}
190
  except Exception as e:
191
  raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
192
 
193
+
194
+ # @app.post("/ragit/")
195
  async def ragit(question: Question):
196
+ logger(f"Processing question: {question.question}")
197
  try:
198
+ search_results = vector_search_raw(question.question)
199
+ logger(f"Search results generated: {search_results}")
200
 
201
  answer = rag_it(question.question, search_results)
202
 
203
+ logger(f"Answer: {answer}")
204
  return {"answer": answer}
205
  except Exception as e:
206
  raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
207
 
208
+
209
  if __name__ == '__main__':
210
  import uvicorn
211
  from os import getenv
212
+ port = int(getenv("PORT", 8000))
213
  print(f"Starting server on port {port}")
214
  reload = True if environment == "dev" else False
215
  uvicorn.run("main:app", host="0.0.0.0", port=port, reload=reload)
216
 
217
 
 
218
  # Examples:
219
+ # curl -X POST "http://localhost:8001/upload" -F "[email protected]"
220
+ # curl -X DELETE "http://localhost:8001/erase_data/"
221
+ # curl -X GET "http://localhost:8001/list_files/"
222
 
223
+ # hf space is at https://jpbianchi-mr.hf.space/
224
+ # code given by https://jpbianchi-mr.hf.space/docs
225
  # Space must be public
226
+ # curl -X POST "https://jpbianchi-mr.hf.space/upload/" -F "[email protected]"
227
 
228
+ # curl -X POST http://localhost:8000/ask/ -H "Content-Type: application/json" -d '{"question": "what is Amazon loss"}'
229
+ # curl -X POST http://localhost:8000/ragit/ -H "Content-Type: application/json" -d '{"question": "Does ATT have postpaid phone customers?"}'
230
+ # see more in notebook upload_index.ipynb