Spaces:
Sleeping
Sleeping
cashilaadev
commited on
Commit
•
9b2f63d
1
Parent(s):
cdc4465
Update app.py
Browse files
app.py
CHANGED
@@ -1,95 +1,103 @@
|
|
1 |
-
import os
|
2 |
-
import time
|
3 |
-
from flask import Flask, render_template, jsonify, request
|
4 |
-
from src.helper import download_hugging_face_embeddings
|
5 |
-
from langchain.llms import Replicate
|
6 |
-
from dotenv import load_dotenv
|
7 |
-
from PyPDF2 import PdfReader
|
8 |
-
from langchain.schema import Document
|
9 |
-
from langchain.text_splitter import CharacterTextSplitter
|
10 |
-
|
11 |
-
# Initialize Flask app
|
12 |
-
app = Flask(__name__)
|
13 |
-
|
14 |
-
# Load environment variables
|
15 |
-
load_dotenv()
|
16 |
-
|
17 |
-
# Define the load_pdf function
|
18 |
-
""" def load_pdf(file_path):
|
19 |
-
all_text = ""
|
20 |
-
with open(file_path, 'rb') as file:
|
21 |
-
reader = PdfReader(file)
|
22 |
-
for page in reader.pages:
|
23 |
-
all_text += page.extract_text() + "\n"
|
24 |
-
return all_text if all_text else None
|
25 |
-
"""
|
26 |
-
# Define the text_split function
|
27 |
-
""" def text_split(text):
|
28 |
-
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
29 |
-
document = Document(page_content=text)
|
30 |
-
return text_splitter.split_documents([document])
|
31 |
-
"""
|
32 |
-
# Load and process data
|
33 |
-
#pdf_file_path = "data/Gale Encyclopedia of Medicine Vol. 1 (A-B).pdf" # Update this path to your single PDF file
|
34 |
-
#extracted_data = load_pdf(pdf_file_path)
|
35 |
-
#if extracted_data is None:
|
36 |
-
#raise ValueError("The extracted data is None. Please check the load_pdf function.")
|
37 |
-
|
38 |
-
#print(f"Extracted Data: {extracted_data}")
|
39 |
-
|
40 |
-
# Split the extracted text into chunks
|
41 |
-
#text_chunks = text_split(extracted_data)
|
42 |
-
#if not text_chunks:
|
43 |
-
#raise ValueError("The text_chunks is None or empty. Please check the text_split function.")
|
44 |
-
|
45 |
-
#print(f"Text Chunks: {text_chunks}")
|
46 |
-
|
47 |
-
embeddings = download_hugging_face_embeddings()
|
48 |
-
if embeddings is None:
|
49 |
-
raise ValueError("The embeddings is None. Please check the download_hugging_face_embeddings function.")
|
50 |
-
|
51 |
-
print(f"Embeddings: {embeddings}")
|
52 |
-
|
53 |
-
# Setup CTransformers LLM
|
54 |
-
from langchain.llms import Replicate
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
print(f"
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
app.run(host="0.0.0.0", port=8080, debug=True)
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
from flask import Flask, render_template, jsonify, request
|
4 |
+
from src.helper import download_hugging_face_embeddings
|
5 |
+
from langchain.llms import Replicate
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from PyPDF2 import PdfReader
|
8 |
+
from langchain.schema import Document
|
9 |
+
from langchain.text_splitter import CharacterTextSplitter
|
10 |
+
|
11 |
+
# Initialize Flask app
|
12 |
+
app = Flask(__name__)
|
13 |
+
|
14 |
+
# Load environment variables
|
15 |
+
load_dotenv()
|
16 |
+
|
17 |
+
# Define the load_pdf function
|
18 |
+
""" def load_pdf(file_path):
|
19 |
+
all_text = ""
|
20 |
+
with open(file_path, 'rb') as file:
|
21 |
+
reader = PdfReader(file)
|
22 |
+
for page in reader.pages:
|
23 |
+
all_text += page.extract_text() + "\n"
|
24 |
+
return all_text if all_text else None
|
25 |
+
"""
|
26 |
+
# Define the text_split function
|
27 |
+
""" def text_split(text):
|
28 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
29 |
+
document = Document(page_content=text)
|
30 |
+
return text_splitter.split_documents([document])
|
31 |
+
"""
|
32 |
+
# Load and process data
|
33 |
+
#pdf_file_path = "data/Gale Encyclopedia of Medicine Vol. 1 (A-B).pdf" # Update this path to your single PDF file
|
34 |
+
#extracted_data = load_pdf(pdf_file_path)
|
35 |
+
#if extracted_data is None:
|
36 |
+
#raise ValueError("The extracted data is None. Please check the load_pdf function.")
|
37 |
+
|
38 |
+
#print(f"Extracted Data: {extracted_data}")
|
39 |
+
|
40 |
+
# Split the extracted text into chunks
|
41 |
+
#text_chunks = text_split(extracted_data)
|
42 |
+
#if not text_chunks:
|
43 |
+
#raise ValueError("The text_chunks is None or empty. Please check the text_split function.")
|
44 |
+
|
45 |
+
#print(f"Text Chunks: {text_chunks}")
|
46 |
+
|
47 |
+
embeddings = download_hugging_face_embeddings()
|
48 |
+
if embeddings is None:
|
49 |
+
raise ValueError("The embeddings is None. Please check the download_hugging_face_embeddings function.")
|
50 |
+
|
51 |
+
print(f"Embeddings: {embeddings}")
|
52 |
+
|
53 |
+
# Setup CTransformers LLM
|
54 |
+
from langchain.llms import Replicate
|
55 |
+
# Setup Replicate LLM
|
56 |
+
os.environ["REPLICATE_API_TOKEN"] = "r8_PArS6Z8LxcRZld6NjVxhd8NJbIJXYDE25XiJd"
|
57 |
+
llm = Replicate(
|
58 |
+
model="a16z-infra/llama7b-v2-chat:4f0a4744c7295c024a1de15e1a63c880d3da035fa1f49bfd344fe076074c8eea",
|
59 |
+
config={
|
60 |
+
'max_new_tokens': 100, # Maximum number of tokens to generate in response
|
61 |
+
'temperature': 0.7, # Optimal temperature for balanced randomness and coherence
|
62 |
+
'top_k': 50 # Optimal top-k value for considering the top 50 predictions
|
63 |
+
}
|
64 |
+
)
|
65 |
+
|
66 |
+
|
67 |
+
# Flask routes
|
68 |
+
@app.route("/")
|
69 |
+
def index():
|
70 |
+
return render_template('chat.html')
|
71 |
+
|
72 |
+
@app.route("/get", methods=["GET", "POST"])
|
73 |
+
def chat():
|
74 |
+
try:
|
75 |
+
msg = request.form["msg"]
|
76 |
+
input_text = msg
|
77 |
+
print(f"Received message: {input_text}")
|
78 |
+
|
79 |
+
# Display spinner
|
80 |
+
result = {"generated_text": "Thinking..."}
|
81 |
+
|
82 |
+
# Simulate processing delay
|
83 |
+
time.sleep(1)
|
84 |
+
|
85 |
+
# Retrieve response from the model
|
86 |
+
result = llm.generate([input_text])
|
87 |
+
print(f"LLMResult: {result}")
|
88 |
+
|
89 |
+
# Access the generated text from the result object
|
90 |
+
if result.generations and result.generations[0]:
|
91 |
+
generated_text = result.generations[0][0].text
|
92 |
+
else:
|
93 |
+
generated_text = "No response generated."
|
94 |
+
|
95 |
+
print(f"Response: {generated_text}")
|
96 |
+
|
97 |
+
return str(generated_text)
|
98 |
+
except Exception as e:
|
99 |
+
print(f"Error: {e}")
|
100 |
+
return jsonify({"error": str(e)}), 500
|
101 |
+
|
102 |
+
if __name__ == '__main__':
|
103 |
app.run(host="0.0.0.0", port=8080, debug=True)
|