cashilaadev commited on
Commit
93b9d0f
1 Parent(s): 3a07d4d

Upload 18 files

Browse files
.env ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ PINECONE_API_TOKEN="9e0943d2-1669-4389-9af3-f0b40bd3aab7"
2
+ PINECONE_API_ENV="usa-east-1"
3
+ REPLICATE_API_KEY="r8_PArS6Z8LxcRZld6NjVxhd8NJbIJXYDE25XiJd"
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+
5
+ WORKDIR /app
6
+
7
+ COPY --chown=user ./requirements.txt requirements.txt
8
+
9
+ RUN pip install -r requirements.txt
10
+ RUN pip install --upgrade sentence_transformers
11
+ RUN pip install --upgrade langchain
12
+
13
+ COPY --chown=user . /app
14
+
15
+ CMD ["gunicorn", "app:app", "-b", "0.0.0.0:7860"]
README.md CHANGED
@@ -1,11 +1,10 @@
1
- ---
2
- title: Mybot
3
- emoji: 🐢
4
- colorFrom: green
5
- colorTo: blue
6
- sdk: static
7
- pinned: false
8
- license: apache-2.0
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Michaelmosesbot
3
+ emoji: 🌍
4
+ colorFrom: gray
5
+ colorTo: red
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
__pycache__/store_index.cpython-38.pyc ADDED
Binary file (2.61 kB). View file
 
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from flask import Flask, render_template, jsonify, request
4
+ from src.helper import download_hugging_face_embeddings
5
+ from langchain.llms import Replicate
6
+ from dotenv import load_dotenv
7
+ from PyPDF2 import PdfReader
8
+ from langchain.schema import Document
9
+ from langchain.text_splitter import CharacterTextSplitter
10
+
11
+ # Initialize Flask app
12
+ app = Flask(__name__)
13
+
14
+ # Load environment variables
15
+ load_dotenv()
16
+
17
+ # Define the load_pdf function
18
+ """ def load_pdf(file_path):
19
+ all_text = ""
20
+ with open(file_path, 'rb') as file:
21
+ reader = PdfReader(file)
22
+ for page in reader.pages:
23
+ all_text += page.extract_text() + "\n"
24
+ return all_text if all_text else None
25
+ """
26
+ # Define the text_split function
27
+ """ def text_split(text):
28
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
29
+ document = Document(page_content=text)
30
+ return text_splitter.split_documents([document])
31
+ """
32
+ # Load and process data
33
+ #pdf_file_path = "data/Gale Encyclopedia of Medicine Vol. 1 (A-B).pdf" # Update this path to your single PDF file
34
+ #extracted_data = load_pdf(pdf_file_path)
35
+ #if extracted_data is None:
36
+ #raise ValueError("The extracted data is None. Please check the load_pdf function.")
37
+
38
+ #print(f"Extracted Data: {extracted_data}")
39
+
40
+ # Split the extracted text into chunks
41
+ #text_chunks = text_split(extracted_data)
42
+ #if not text_chunks:
43
+ #raise ValueError("The text_chunks is None or empty. Please check the text_split function.")
44
+
45
+ #print(f"Text Chunks: {text_chunks}")
46
+
47
+ embeddings = download_hugging_face_embeddings()
48
+ if embeddings is None:
49
+ raise ValueError("The embeddings is None. Please check the download_hugging_face_embeddings function.")
50
+
51
+ print(f"Embeddings: {embeddings}")
52
+
53
+ # Setup CTransformers LLM
54
+ from langchain.llms import Replicate
55
+ os.environ["REPLICATE_API_TOKEN"]= "r8_PArS6Z8LxcRZld6NjVxhd8NJbIJXYDE25XiJd"
56
+ llm = Replicate(model="a16z-infra/llama7b-v2-chat:4f0a4744c7295c024a1de15e1a63c880d3da035fa1f49bfd344fe076074c8eea")
57
+
58
+
59
+ # Flask routes
60
+ @app.route("/")
61
+ def index():
62
+ return render_template('chat.html')
63
+
64
+ @app.route("/get", methods=["GET", "POST"])
65
+ def chat():
66
+ try:
67
+ msg = request.form["msg"]
68
+ input_text = msg
69
+ print(f"Received message: {input_text}")
70
+
71
+ # Display spinner
72
+ result = {"generated_text": "Thinking..."}
73
+
74
+ # Simulate processing delay
75
+ time.sleep(1)
76
+
77
+ # Retrieve response from the model
78
+ result = llm.generate([input_text])
79
+ print(f"LLMResult: {result}")
80
+
81
+ # Access the generated text from the result object
82
+ if result.generations and result.generations[0]:
83
+ generated_text = result.generations[0][0].text
84
+ else:
85
+ generated_text = "No response generated."
86
+
87
+ print(f"Response: {generated_text}")
88
+
89
+ return str(generated_text)
90
+ except Exception as e:
91
+ print(f"Error: {e}")
92
+ return jsonify({"error": str(e)}), 500
93
+
94
+ if __name__ == '__main__':
95
+ app.run(host="0.0.0.0", port=8080, debug=True)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ctransformers==0.2.5
2
+ sentence-transformers==2.2.2
3
+ pinecone-client
4
+ langchain==0.0.225
5
+ flask
6
+ langchain-community
7
+ pypdf
8
+ python-dotenv
9
+ replicate
10
+ gunicorn
11
+ PyPDF2
12
+ langchain_community
setup.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import find_packages, setup
2
+
3
+ setup(
4
+ name='Medical chatbot',
5
+ version='0.0.0',
6
+ description='This is My Medical chatbot',
7
+ author='Michael Moses',
8
+ author_email='[email protected]',
9
+ packages=find_packages(),
10
+ install_requires=[]
11
+
12
+ )
src/__init__.py ADDED
File without changes
src/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (159 Bytes). View file
 
src/__pycache__/helper.cpython-38.pyc ADDED
Binary file (1.49 kB). View file
 
src/helper.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import PromptTemplate
2
+ from langchain.chains import RetrievalQA
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.vectorstores import Pinecone
5
+ from dotenv import load_dotenv
6
+ import os
7
+ from pinecone import Pinecone
8
+ from langchain.document_loaders import PyPDFLoader, DirectoryLoader
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain.llms import CTransformers
12
+ from unittest import loader
13
+
14
+
15
+ load_dotenv()
16
+
17
+ PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
18
+ PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV')
19
+
20
+ # Extract pdf data
21
+
22
+
23
+
24
+ def load_pdf(data):
25
+ directory_loader = DirectoryLoader(data,
26
+ glob="*.pdf",
27
+ loader_cls=PyPDFLoader)
28
+
29
+ documents = directory_loader.load()
30
+
31
+ def text_split(extracted_data):
32
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
33
+ text_chunks = text_splitter.split_documents(extracted_data)
34
+
35
+ return text_chunks
36
+
37
+ def download_hugging_face_embeddings():
38
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
39
+ return embeddings
40
+
41
+
42
+
43
+
src/mike2chatbot.code-workspace ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "folders": [
3
+ {
4
+ "path": ".."
5
+ },
6
+ {
7
+ "path": "../../LifestyleChatbot/healthyliving"
8
+ }
9
+ ],
10
+ "settings": {}
11
+ }
src/prompt.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ prompt_template="""
2
+ You are a helpful assistant called Michael Moses. You should not express yourself as a human or try to be harmful in any way.
3
+
4
+ Use the following pieces of information to answer the user's question. If you don't know the answer, simply state that you don't know; do not attempt to fabricate an answer. Avoid imitating the user's phrasing or style in your response.
5
+
6
+ Context: {context}
7
+ Question: {question}
8
+
9
+ Provide only the helpful answer below:
10
+ Helpful answer:
11
+
12
+ """
13
+
14
+
15
+
static/style.css ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body,html{
2
+ height: 100%;
3
+ margin: 0;
4
+ background: rgb(44, 47, 59);
5
+ background: -webkit-linear-gradient(to right, rgb(40, 59, 34), rgb(54, 60, 70), rgb(32, 32, 43));
6
+ background: linear-gradient(to right, rgb(38, 51, 61), rgb(50, 55, 65), rgb(33, 33, 78));
7
+ }
8
+
9
+ .chat{
10
+ margin-top: auto;
11
+ margin-bottom: auto;
12
+ }
13
+ .card{
14
+ height: 500px;
15
+ border-radius: 15px !important;
16
+ background-color: rgba(0,0,0,0.4) !important;
17
+ }
18
+ .contacts_body{
19
+ padding: 0.75rem 0 !important;
20
+ overflow-y: auto;
21
+ white-space: nowrap;
22
+ }
23
+ .msg_card_body{
24
+ overflow-y: auto;
25
+ }
26
+ .card-header{
27
+ border-radius: 15px 15px 0 0 !important;
28
+ border-bottom: 0 !important;
29
+ }
30
+ .card-footer{
31
+ border-radius: 0 0 15px 15px !important;
32
+ border-top: 0 !important;
33
+ }
34
+ .container{
35
+ align-content: center;
36
+ }
37
+ .search{
38
+ border-radius: 15px 0 0 15px !important;
39
+ background-color: rgba(0,0,0,0.3) !important;
40
+ border:0 !important;
41
+ color:white !important;
42
+ }
43
+ .search:focus{
44
+ box-shadow:none !important;
45
+ outline:0px !important;
46
+ }
47
+ .type_msg{
48
+ background-color: rgba(0,0,0,0.3) !important;
49
+ border:0 !important;
50
+ color:white !important;
51
+ height: 60px !important;
52
+ overflow-y: auto;
53
+ }
54
+ .type_msg:focus{
55
+ box-shadow:none !important;
56
+ outline:0px !important;
57
+ }
58
+ .attach_btn{
59
+ border-radius: 15px 0 0 15px !important;
60
+ background-color: rgba(0,0,0,0.3) !important;
61
+ border:0 !important;
62
+ color: white !important;
63
+ cursor: pointer;
64
+ }
65
+ .send_btn{
66
+ border-radius: 0 15px 15px 0 !important;
67
+ background-color: rgba(0,0,0,0.3) !important;
68
+ border:0 !important;
69
+ color: white !important;
70
+ cursor: pointer;
71
+ }
72
+ .search_btn{
73
+ border-radius: 0 15px 15px 0 !important;
74
+ background-color: rgba(245, 210, 185, 0.3) !important;
75
+ border:0 !important;
76
+ color: white !important;
77
+ cursor: pointer;
78
+ }
79
+ .contacts{
80
+ list-style: none;
81
+ padding: 0;
82
+ }
83
+ .contacts li{
84
+ width: 100% !important;
85
+ padding: 5px 10px;
86
+ margin-bottom: 15px !important;
87
+ }
88
+ .active{
89
+ background-color: rgba(0,0,0,0.3);
90
+ }
91
+ .user_img{
92
+ height: 70px;
93
+ width: 70px;
94
+ border:1.5px solid #f5f6fa;
95
+
96
+ }
97
+ .user_img_msg{
98
+ height: 40px;
99
+ width: 40px;
100
+ border:1.5px solid #f5f6fa;
101
+
102
+ }
103
+ .img_cont{
104
+ position: relative;
105
+ height: 70px;
106
+ width: 70px;
107
+ }
108
+ .img_cont_msg{
109
+ height: 40px;
110
+ width: 40px;
111
+ }
112
+ .online_icon{
113
+ position: absolute;
114
+ height: 15px;
115
+ width:15px;
116
+ background-color: #4cd137;
117
+ border-radius: 50%;
118
+ bottom: 0.2em;
119
+ right: 0.4em;
120
+ border:1.5px solid white;
121
+ }
122
+ .offline{
123
+ background-color: #c23616 !important;
124
+ }
125
+ .user_info{
126
+ margin-top: auto;
127
+ margin-bottom: auto;
128
+ margin-left: 15px;
129
+ }
130
+ .user_info span{
131
+ font-size: 20px;
132
+ color: white;
133
+ }
134
+ .user_info p{
135
+ font-size: 10px;
136
+ color: rgba(255,255,255,0.6);
137
+ }
138
+ .video_cam{
139
+ margin-left: 50px;
140
+ margin-top: 5px;
141
+ }
142
+ .video_cam span{
143
+ color: white;
144
+ font-size: 20px;
145
+ cursor: pointer;
146
+ margin-right: 20px;
147
+ }
148
+ .msg_cotainer{
149
+ margin-top: auto;
150
+ margin-bottom: auto;
151
+ margin-left: 10px;
152
+ border-radius: 25px;
153
+ background-color: rgb(82, 172, 255);
154
+ padding: 10px;
155
+ position: relative;
156
+ }
157
+ .msg_cotainer_send{
158
+ margin-top: auto;
159
+ margin-bottom: auto;
160
+ margin-right: 10px;
161
+ border-radius: 25px;
162
+ background-color: #58cc71;
163
+ padding: 10px;
164
+ position: relative;
165
+ }
166
+ .msg_time{
167
+ position: absolute;
168
+ left: 0;
169
+ bottom: -15px;
170
+ color: rgba(255,255,255,0.5);
171
+ font-size: 10px;
172
+ }
173
+ .msg_time_send{
174
+ position: absolute;
175
+ right:0;
176
+ bottom: -15px;
177
+ color: rgba(255,255,255,0.5);
178
+ font-size: 10px;
179
+ }
180
+ .msg_head{
181
+ position: relative;
182
+ }
183
+ #action_menu_btn{
184
+ position: absolute;
185
+ right: 10px;
186
+ top: 10px;
187
+ color: white;
188
+ cursor: pointer;
189
+ font-size: 20px;
190
+ }
191
+ .action_menu{
192
+ z-index: 1;
193
+ position: absolute;
194
+ padding: 15px 0;
195
+ background-color: rgba(0,0,0,0.5);
196
+ color: white;
197
+ border-radius: 15px;
198
+ top: 30px;
199
+ right: 15px;
200
+ display: none;
201
+ }
202
+ .action_menu ul{
203
+ list-style: none;
204
+ padding: 0;
205
+ margin: 0;
206
+ }
207
+ .action_menu ul li{
208
+ width: 100%;
209
+ padding: 10px 15px;
210
+ margin-bottom: 5px;
211
+ }
212
+ .action_menu ul li i{
213
+ padding-right: 10px;
214
+ }
215
+ .action_menu ul li:hover{
216
+ cursor: pointer;
217
+ background-color: rgba(0,0,0,0.2);
218
+ }
219
+ @media(max-width: 576px){
220
+ .contacts_card{
221
+ margin-bottom: 15px !important;
222
+ }
223
+ }
store_index.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from src.helper import PINECONE_API_KEY, text_split, download_hugging_face_embeddings
4
+ from langchain.vectorstores import Pinecone as LangchainPinecone # Alias to avoid confusion
5
+ from dotenv import load_dotenv
6
+ from pinecone import Pinecone, ServerlessSpec
7
+ from langchain_pinecone import PineconeVectorStore
8
+ from PyPDF2 import PdfReader
9
+
10
+ # Define the load_pdf function
11
+ def load_pdf(file_path):
12
+ all_text = ""
13
+ with open(file_path, 'rb') as file:
14
+ reader = PdfReader(file)
15
+ for page in reader.pages:
16
+ all_text += page.extract_text() + "\n"
17
+ return all_text if all_text else None
18
+
19
+ # Define the text_split function
20
+ def text_split(text):
21
+ from langchain.text_splitter import CharacterTextSplitter
22
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
23
+ return text_splitter.split_text(text)
24
+
25
+ # Load environment variables if not already set
26
+ load_dotenv()
27
+
28
+ # Load and process data
29
+ pdf_file_path = "data/Gale Encyclopedia of Medicine Vol. 1 (A-B).pdf" # Update this path to your single PDF file
30
+ extracted_data = load_pdf(pdf_file_path)
31
+ if extracted_data is None:
32
+ raise ValueError("The extracted data is None. Please check the load_pdf function.")
33
+
34
+ print(f"Extracted Data: {extracted_data}")
35
+
36
+ # Split the extracted text into chunks
37
+ text_chunks = text_split(extracted_data)
38
+ if text_chunks is None:
39
+ raise ValueError("The text_chunks is None. Please check the text_split function.")
40
+
41
+ print(f"Text Chunks: {text_chunks}")
42
+
43
+ embeddings = download_hugging_face_embeddings()
44
+ if embeddings is None:
45
+ raise ValueError("The embeddings is None. Please check the download_hugging_face_embeddings function.")
46
+
47
+ print(f"Embeddings: {embeddings}")
48
+
49
+ # Ensure Pinecone API key is available
50
+ api_key = os.environ.get("PINECONE_API_KEY")
51
+ if not api_key:
52
+ raise ValueError("PINECONE_API_KEY environment variable not set.")
53
+
54
+ # Initialize Pinecone client
55
+ pc = Pinecone(api_key=api_key)
56
+
57
+ # Specify cloud and region for the serverless index
58
+ cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
59
+ region = os.environ.get('PINECONE_REGION') or 'us-east-1'
60
+ spec = ServerlessSpec(cloud=cloud, region=region)
61
+
62
+ # Define the index name
63
+ index_name = "healthbot"
64
+
65
+ # Create the index if it does not exist
66
+ if index_name not in pc.list_indexes().names():
67
+ pc.create_index(
68
+ name=index_name,
69
+ dimension=384,
70
+ metric="cosine",
71
+ spec=spec
72
+ )
73
+ # Wait for the index to be ready
74
+ while not pc.describe_index(index_name).status['ready']:
75
+ time.sleep(1)
76
+
77
+ # Connect to the created index
78
+ index = pc.Index(index_name)
79
+ time.sleep(1)
80
+
81
+ # Example: Add data to the index with reduced metadata
82
+ # Create a dictionary to simulate external storage of text chunks
83
+ text_chunk_store = {}
84
+
85
+ # Function to simulate storing text chunk and returning a reference ID
86
+ def store_text_chunk(text_chunk):
87
+ chunk_id = f"chunk_{len(text_chunk_store)}"
88
+ text_chunk_store[chunk_id] = text_chunk
89
+ return chunk_id
90
+
91
+ # Add text chunks to Pinecone with reference IDs
92
+ for i, text_chunk in enumerate(text_chunks):
93
+ chunk_id = store_text_chunk(text_chunk)
94
+ embedding = embeddings.embed_query(text_chunk) # Embed the text chunk
95
+ index.upsert(
96
+ vectors=[
97
+ {
98
+ "id": f"vec_{i}",
99
+ "values": embedding,
100
+ "metadata": {"chunk_id": chunk_id} # Only store the reference ID as metadata
101
+ }
102
+ ],
103
+ namespace="ns1"
104
+ )
105
+
106
+ print("Indexing completed successfully.")
template.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os # Import the os module to interact with the operating system
2
+ from pathlib import Path # Import the Path class from pathlib module for handling file paths
3
+ import logging # Import the logging module for logging messages
4
+
5
+ # Configure the logging to display messages with time and message content
6
+ logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s')
7
+
8
+ # List of files that we want to create
9
+ list_of_files = [
10
+ "src/__init__.py", # A Python package initialization file
11
+ "src/helper.py", # A helper module
12
+ "src/prompt.py", # A prompt module
13
+ ".env", # Environment configuration file
14
+ "setup.py", # Python setup script
15
+ "research/trials.ipynb", # Jupyter notebook for research
16
+ "app.py", # Main application script
17
+ "store_index.py", # Index script for storage
18
+ "static/.gitkeep", # Directory for static files (e.g., CSS, JS)
19
+ "templates/chat.html" , # HTML template for chat
20
+ ".gitignore"
21
+
22
+ ]
23
+
24
+ # Iterate over each file path in the list
25
+ for filepath in list_of_files:
26
+ filepath = Path(filepath) # Convert the file path to a Path object
27
+ filedir, filename = os.path.split(filepath) # Split the path into directory and file name
28
+
29
+ # Check if the directory part of the path is not empty
30
+ if filedir:
31
+ os.makedirs(filedir, exist_ok=True) # Create the directory if it doesn't exist
32
+ logging.info(f"Creating directory: {filedir} for the file {filename}") # Log directory creation
33
+
34
+ # Check if the file doesn't exist or if it exists but is empty
35
+ if not os.path.exists(filepath) or os.path.getsize(filepath) == 0:
36
+ with open(filepath, 'w') as f: # Open the file in write mode (this creates an empty file)
37
+ pass # Do nothing else inside the with block
38
+ logging.info(f"Creating empty file: {filepath}") # Log file creation
39
+ else:
40
+ logging.info(f"{filename} is already created") # Log that the file already exists
templates/chat.html ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Chatbot</title>
7
+ <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css">
8
+ <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.5.0/css/all.css">
9
+ <style>
10
+ body, html {
11
+ height: 100%;
12
+ margin: 0;
13
+ background-color: #f8f9fa;
14
+ display: flex;
15
+ justify-content: center;
16
+ align-items: center;
17
+ }
18
+ .chat-container {
19
+ width: 100%;
20
+ max-width: 600px;
21
+ height: 80vh;
22
+ border-radius: 10px;
23
+ overflow: hidden;
24
+ display: flex;
25
+ flex-direction: column;
26
+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
27
+ background-color: #ffffff;
28
+ }
29
+ .card-header {
30
+ background-color: #007bff;
31
+ color: #fff;
32
+ font-size: 1.2rem;
33
+ padding: 10px 15px;
34
+ }
35
+ .msg_card_body {
36
+ flex: 1;
37
+ overflow-y: auto;
38
+ padding: 15px;
39
+ background-color: #f1f1f1;
40
+ }
41
+ .msg_card_body .d-flex {
42
+ margin-bottom: 10px;
43
+ }
44
+ .msg_cotainer, .msg_cotainer_send {
45
+ padding: 10px;
46
+ border-radius: 20px;
47
+ max-width: 60%;
48
+ word-wrap: break-word;
49
+ }
50
+ .msg_cotainer {
51
+ background-color: #e1ffc7;
52
+ }
53
+ .msg_cotainer_send {
54
+ background-color: #c7eaff;
55
+ }
56
+ .msg_time, .msg_time_send {
57
+ font-size: 10px;
58
+ color: #999;
59
+ margin-top: 5px;
60
+ display: block;
61
+ }
62
+ .img_cont_msg {
63
+ display: flex;
64
+ align-items: center;
65
+ margin: 0 10px;
66
+ }
67
+ .online_icon {
68
+ position: absolute;
69
+ width: 15px;
70
+ height: 15px;
71
+ background: #4cd137;
72
+ border-radius: 50%;
73
+ top: 2px;
74
+ right: 2px;
75
+ }
76
+ .user_img_msg {
77
+ width: 40px;
78
+ height: 40px;
79
+ }
80
+ .rounded-circle {
81
+ border-radius: 50% !important;
82
+ }
83
+ .card-footer {
84
+ background-color: #f1f1f1;
85
+ padding: 10px;
86
+ }
87
+ .type_msg {
88
+ border: none;
89
+ border-top-left-radius: 20px;
90
+ border-bottom-left-radius: 20px;
91
+ padding: 10px;
92
+ width: 100%;
93
+ }
94
+ .send_btn, .clear_btn {
95
+ border: none;
96
+ background: none;
97
+ font-size: 20px;
98
+ cursor: pointer;
99
+ }
100
+ .send_btn {
101
+ color: #007bff;
102
+ }
103
+ .clear_btn {
104
+ color: #dc3545;
105
+ }
106
+ </style>
107
+ </head>
108
+ <body>
109
+ <div class="chat-container">
110
+ <div class="card">
111
+ <div class="card-header d-flex align-items-center">
112
+ <img src="https://www.prdistribution.com/spirit/uploads/pressreleases/2019/newsreleases/d83341deb75c4c4f6b113f27b1e42cd8-chatbot-florence-already-helps-thousands-of-patients-to-remember-their-medication.png" class="rounded-circle user_img" width="40" height="40" alt="Chatbot Avatar">
113
+ <span class="ml-2">Starlets-Lifestyle Chatbot</span>
114
+ </div>
115
+ <div id="messageFormeight" class="card-body msg_card_body">
116
+ <!-- Messages will be appended here -->
117
+ </div>
118
+ <div class="card-footer d-flex">
119
+ <form id="messageArea" class="input-group" style="flex: 1;">
120
+ <input type="text" id="text" name="msg" placeholder="Type your message..." autocomplete="off" class="form-control type_msg" required/>
121
+ <div class="input-group-append">
122
+ <button type="submit" id="send" class="input-group-text send_btn"><i class="fas fa-location-arrow"></i></button>
123
+ <button type="button" id="clearChat" class="input-group-text clear_btn"><i class="fas fa-trash"></i></button>
124
+ </div>
125
+ </form>
126
+ </div>
127
+ </div>
128
+ </div>
129
+
130
+ <script src="https://code.jquery.com/jquery-3.5.1.min.js"></script>
131
+ <script>
132
+ $(document).ready(function() {
133
+ // Handle form submission
134
+ $("#messageArea").on("submit", function(event) {
135
+ const date = new Date();
136
+ const hour = date.getHours();
137
+ const minute = date.getMinutes();
138
+ const str_time = hour + ":" + (minute < 10 ? "0" + minute : minute);
139
+ var rawText = $("#text").val().trim();
140
+
141
+ if (rawText !== "") {
142
+ var userHtml = `<div class="d-flex justify-content-end mb-4">
143
+ <div class="msg_cotainer_send">${rawText}
144
+ <span class="msg_time_send">${str_time}</span>
145
+ </div>
146
+ <div class="img_cont_msg">
147
+ <img src="https://i.ibb.co/d5b84Xw/Untitled-design.png" class="rounded-circle user_img_msg">
148
+ </div>
149
+ </div>`;
150
+
151
+ $("#text").val(""); // Clear input field
152
+ $("#messageFormeight").append(userHtml); // Append user message
153
+ $("#messageFormeight").scrollTop($("#messageFormeight")[0].scrollHeight);
154
+
155
+ // AJAX post request
156
+ $.ajax({
157
+ data: { msg: rawText },
158
+ type: "POST",
159
+ url: "/get", // Replace with your endpoint
160
+ }).done(function(data) {
161
+ var botHtml = `<div class="d-flex justify-content-start mb-4">
162
+ <div class="img_cont_msg">
163
+ <img src="https://www.prdistribution.com/spirit/uploads/pressreleases/2019/newsreleases/d83341deb75c4c4f6b113f27b1e42cd8-chatbot-florence-already-helps-thousands-of-patients-to-remember-their-medication.png" class="rounded-circle user_img_msg">
164
+ </div>
165
+ <div class="msg_cotainer">${data}
166
+ <span class="msg_time">${str_time}</span>
167
+ </div>
168
+ </div>`;
169
+ $("#messageFormeight").append(botHtml); // Append bot response
170
+ $("#messageFormeight").scrollTop($("#messageFormeight")[0].scrollHeight);
171
+ });
172
+ }
173
+ event.preventDefault(); // Prevent default form submission
174
+ });
175
+
176
+ // Handle send button click (optional)
177
+ $("#send").on("click", function() {
178
+ $("#messageArea").submit(); // Submit the form
179
+ });
180
+
181
+ // Clear chat history button functionality
182
+ $("#clearChat").click(function() {
183
+ $("#messageFormeight").empty(); // Clear all messages
184
+ });
185
+ });
186
+ </script>
187
+ </body>
188
+ </html>