Kirai-Kevin commited on
Commit
cfd52b4
1 Parent(s): a9c7a33

Upload 18 files

Browse files
.env ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ PINECONE_API_TOKEN="9e0943d2-1669-4389-9af3-f0b40bd3aab7"
2
+ PINECONE_API_ENV="usa-east-1"
3
+ REPLICATE_API_TOKEN="r8_3eWT6qNBwq8r7zNknWKxsyNyOQ6WMGS2WWRay"
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+
5
+ WORKDIR /app
6
+
7
+ COPY --chown=user ./requirements.txt requirements.txt
8
+
9
+ RUN pip install -r requirements.txt
10
+ RUN pip install --upgrade sentence_transformers
11
+ RUN pip install --upgrade langchain
12
+
13
+ COPY --chown=user . /app
14
+
15
+ CMD ["gunicorn", "app:app", "-b", "0.0.0.0:7860"]
README.md CHANGED
@@ -1,11 +1,10 @@
1
- ---
2
- title: Travellor Bot
3
- emoji: 📊
4
- colorFrom: yellow
5
- colorTo: green
6
- sdk: static
7
- pinned: false
8
- license: apache-2.0
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: KevinKiraiTravelBot
3
+ emoji: 🌍
4
+ colorFrom: gray
5
+ colorTo: red
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
__pycache__/store_index.cpython-38.pyc ADDED
Binary file (2.61 kB). View file
 
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from flask import Flask, render_template, jsonify, request
4
+ from src.helper import download_hugging_face_embeddings
5
+ from langchain.llms import Replicate
6
+ from dotenv import load_dotenv
7
+ from PyPDF2 import PdfReader
8
+ from langchain.schema import Document
9
+ from langchain.text_splitter import CharacterTextSplitter
10
+
11
+ # Initialize Flask app
12
+ app = Flask(__name__)
13
+
14
+ # Load environment variables
15
+ load_dotenv()
16
+
17
+ # Optional PDF processing functions
18
+ # def load_pdf(file_path):
19
+ # all_text = ""
20
+ # with open(file_path, 'rb') as file:
21
+ # reader = PdfReader(file)
22
+ # for page in reader.pages:
23
+ # all_text += page.extract_text() + "\n"
24
+ # return all_text if all_text else None
25
+
26
+ # def text_split(text):
27
+ # text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
28
+ # document = Document(page_content=text)
29
+ # return text_splitter.split_documents([document])
30
+
31
+ # Load and process data
32
+ # pdf_file_path = "data/Okelloetal.2008TourismanalysisManka.pdf"
33
+ # extracted_data = load_pdf(pdf_file_path)
34
+ # if extracted_data is None:
35
+ # raise ValueError("The extracted data is None. Please check the load_pdf function.")
36
+ # print(f"Extracted Data: {extracted_data}")
37
+
38
+ # Split the extracted text into chunks
39
+ # text_chunks = text_split(extracted_data)
40
+ # if not text_chunks:
41
+ # raise ValueError("The text_chunks is None or empty. Please check the text_split function.")
42
+ # print(f"Text Chunks: {text_chunks}")
43
+
44
+ embeddings = download_hugging_face_embeddings()
45
+ if embeddings is None:
46
+ raise ValueError("The embeddings is None. Please check the download_hugging_face_embeddings function.")
47
+ print(f"Embeddings: {embeddings}")
48
+
49
+ os.environ["REPLICATE_API_TOKEN"] = "r8_3eWT6qNBwq8r7zNknWKxsyNyOQ6WMGS2WWRay"
50
+
51
+ # Initialize the Replicate model
52
+ llm = Replicate(
53
+ model="a16z-infra/llama7b-v2-chat:4f0a4744c7295c024a1de15e1a63c880d3da035fa1f49bfd344fe076074c8eea",
54
+ config={
55
+ 'max_new_tokens': 100, # Maximum number of tokens to generate in response
56
+ 'temperature': 0.7, # Optimal temperature for balanced randomness and coherence
57
+ 'top_k': 50 # Optimal top-k value for considering the top 50 predictions
58
+ }
59
+ )
60
+ # Flask routes
61
+ @app.route("/")
62
+ def index():
63
+ return render_template('chat.html')
64
+
65
+ @app.route("/get", methods=["GET", "POST"])
66
+ def chat():
67
+ try:
68
+ msg = request.form["msg"]
69
+ input_text = msg
70
+ print(f"Received message: {input_text}")
71
+
72
+ # Display spinner
73
+ result = {"generated_text": "Thinking..."}
74
+
75
+ # Simulate processing delay
76
+ time.sleep(1)
77
+
78
+ # Retrieve response from the model
79
+ result = llm.generate([input_text])
80
+ print(f"LLMResult: {result}")
81
+
82
+ # Access the generated text from the result object
83
+ if result.generations and result.generations[0]:
84
+ generated_text = result.generations[0][0].text
85
+ else:
86
+ generated_text = "No response generated."
87
+
88
+ print(f"Response: {generated_text}")
89
+
90
+ return str(generated_text)
91
+ except Exception as e:
92
+ print(f"Error: {e}")
93
+ return jsonify({"error": str(e)}), 500
94
+
95
+ if __name__ == '__main__':
96
+ app.run(host="0.0.0.0", port=8080, debug=True)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ctransformers==0.2.5
2
+ sentence-transformers==2.2.2
3
+ pinecone-client
4
+ langchain==0.0.225
5
+ flask
6
+ langchain-community
7
+ pypdf
8
+ python-dotenv
9
+ replicate
10
+ gunicorn
11
+ PyPDF2
12
+ langchain_community
setup.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import find_packages, setup
2
+
3
+ setup(
4
+ name='Travel chatbot',
5
+ version='0.0.0',
6
+ description='This is My Travel chatbot',
7
+ author='Michael Moses',
8
+ author_email='[email protected]',
9
+ packages=find_packages(),
10
+ install_requires=[]
11
+
12
+ )
src/__init__.py ADDED
File without changes
src/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (143 Bytes). View file
 
src/__pycache__/helper.cpython-38.pyc ADDED
Binary file (1.48 kB). View file
 
src/helper.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import PromptTemplate
2
+ from langchain.chains import RetrievalQA
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.vectorstores import Pinecone
5
+ from dotenv import load_dotenv
6
+ import os
7
+ from pinecone import Pinecone
8
+ from langchain.document_loaders import PyPDFLoader, DirectoryLoader
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain.llms import CTransformers
12
+ from unittest import loader
13
+
14
+
15
+ load_dotenv()
16
+
17
+ PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
18
+ PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV')
19
+
20
+ # Extract pdf data
21
+
22
+
23
+
24
+ def load_pdf(data):
25
+ directory_loader = DirectoryLoader(data,
26
+ glob="*.pdf",
27
+ loader_cls=PyPDFLoader)
28
+
29
+ documents = directory_loader.load()
30
+
31
+ def text_split(extracted_data):
32
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
33
+ text_chunks = text_splitter.split_documents(extracted_data)
34
+
35
+ return text_chunks
36
+
37
+ def download_hugging_face_embeddings():
38
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
39
+ return embeddings
40
+
41
+
42
+
43
+
src/kevinkirai.code-workspace ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "folders": [
3
+ {
4
+ "path": ".."
5
+ },
6
+ {
7
+ "path": "../../travelChatbot/tourism"
8
+ }
9
+ ],
10
+ "settings": {}
11
+ }
src/prompt.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ prompt_template="""
2
+ You are a wildlife guide bot called Kevin Kirai. You provide information about wildlife and nature to tourists. You should not express yourself as a human or try to be harmful in any way.
3
+ Use the following pieces of information to answer the tourist's question. If you don't know the answer, simply state that you don't know; do not attempt to fabricate an answer. Avoid imitating the tourist's phrasing or style in your response.
4
+ Context: {context}
5
+ Question: {question}
6
+ Provide only the helpful answer below:
7
+ Helpful answer:
8
+ """
static/style.css ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body,html{
2
+ height: 100%;
3
+ margin: 0;
4
+ background: rgb(44, 47, 59);
5
+ background: -webkit-linear-gradient(to right, rgb(40, 59, 34), rgb(54, 60, 70), rgb(32, 32, 43));
6
+ background: linear-gradient(to right, rgb(38, 51, 61), rgb(50, 55, 65), rgb(33, 33, 78));
7
+ }
8
+
9
+ .chat{
10
+ margin-top: auto;
11
+ margin-bottom: auto;
12
+ }
13
+ .card{
14
+ height: 500px;
15
+ border-radius: 15px !important;
16
+ background-color: rgba(0,0,0,0.4) !important;
17
+ }
18
+ .contacts_body{
19
+ padding: 0.75rem 0 !important;
20
+ overflow-y: auto;
21
+ white-space: nowrap;
22
+ }
23
+ .msg_card_body{
24
+ overflow-y: auto;
25
+ }
26
+ .card-header{
27
+ border-radius: 15px 15px 0 0 !important;
28
+ border-bottom: 0 !important;
29
+ }
30
+ .card-footer{
31
+ border-radius: 0 0 15px 15px !important;
32
+ border-top: 0 !important;
33
+ }
34
+ .container{
35
+ align-content: center;
36
+ }
37
+ .search{
38
+ border-radius: 15px 0 0 15px !important;
39
+ background-color: rgba(0,0,0,0.3) !important;
40
+ border:0 !important;
41
+ color:white !important;
42
+ }
43
+ .search:focus{
44
+ box-shadow:none !important;
45
+ outline:0px !important;
46
+ }
47
+ .type_msg{
48
+ background-color: rgba(0,0,0,0.3) !important;
49
+ border:0 !important;
50
+ color:white !important;
51
+ height: 60px !important;
52
+ overflow-y: auto;
53
+ }
54
+ .type_msg:focus{
55
+ box-shadow:none !important;
56
+ outline:0px !important;
57
+ }
58
+ .attach_btn{
59
+ border-radius: 15px 0 0 15px !important;
60
+ background-color: rgba(0,0,0,0.3) !important;
61
+ border:0 !important;
62
+ color: white !important;
63
+ cursor: pointer;
64
+ }
65
+ .send_btn{
66
+ border-radius: 0 15px 15px 0 !important;
67
+ background-color: rgba(0,0,0,0.3) !important;
68
+ border:0 !important;
69
+ color: white !important;
70
+ cursor: pointer;
71
+ }
72
+ .search_btn{
73
+ border-radius: 0 15px 15px 0 !important;
74
+ background-color: rgba(245, 210, 185, 0.3) !important;
75
+ border:0 !important;
76
+ color: white !important;
77
+ cursor: pointer;
78
+ }
79
+ .contacts{
80
+ list-style: none;
81
+ padding: 0;
82
+ }
83
+ .contacts li{
84
+ width: 100% !important;
85
+ padding: 5px 10px;
86
+ margin-bottom: 15px !important;
87
+ }
88
+ .active{
89
+ background-color: rgba(0,0,0,0.3);
90
+ }
91
+ .user_img{
92
+ height: 70px;
93
+ width: 70px;
94
+ border:1.5px solid #f5f6fa;
95
+
96
+ }
97
+ .user_img_msg{
98
+ height: 40px;
99
+ width: 40px;
100
+ border:1.5px solid #f5f6fa;
101
+
102
+ }
103
+ .img_cont{
104
+ position: relative;
105
+ height: 70px;
106
+ width: 70px;
107
+ }
108
+ .img_cont_msg{
109
+ height: 40px;
110
+ width: 40px;
111
+ }
112
+ .online_icon{
113
+ position: absolute;
114
+ height: 15px;
115
+ width:15px;
116
+ background-color: #4cd137;
117
+ border-radius: 50%;
118
+ bottom: 0.2em;
119
+ right: 0.4em;
120
+ border:1.5px solid white;
121
+ }
122
+ .offline{
123
+ background-color: #c23616 !important;
124
+ }
125
+ .user_info{
126
+ margin-top: auto;
127
+ margin-bottom: auto;
128
+ margin-left: 15px;
129
+ }
130
+ .user_info span{
131
+ font-size: 20px;
132
+ color: white;
133
+ }
134
+ .user_info p{
135
+ font-size: 10px;
136
+ color: rgba(255,255,255,0.6);
137
+ }
138
+ .video_cam{
139
+ margin-left: 50px;
140
+ margin-top: 5px;
141
+ }
142
+ .video_cam span{
143
+ color: white;
144
+ font-size: 20px;
145
+ cursor: pointer;
146
+ margin-right: 20px;
147
+ }
148
+ .msg_cotainer{
149
+ margin-top: auto;
150
+ margin-bottom: auto;
151
+ margin-left: 10px;
152
+ border-radius: 25px;
153
+ background-color: rgb(82, 172, 255);
154
+ padding: 10px;
155
+ position: relative;
156
+ }
157
+ .msg_cotainer_send{
158
+ margin-top: auto;
159
+ margin-bottom: auto;
160
+ margin-right: 10px;
161
+ border-radius: 25px;
162
+ background-color: #58cc71;
163
+ padding: 10px;
164
+ position: relative;
165
+ }
166
+ .msg_time{
167
+ position: absolute;
168
+ left: 0;
169
+ bottom: -15px;
170
+ color: rgba(255,255,255,0.5);
171
+ font-size: 10px;
172
+ }
173
+ .msg_time_send{
174
+ position: absolute;
175
+ right:0;
176
+ bottom: -15px;
177
+ color: rgba(255,255,255,0.5);
178
+ font-size: 10px;
179
+ }
180
+ .msg_head{
181
+ position: relative;
182
+ }
183
+ #action_menu_btn{
184
+ position: absolute;
185
+ right: 10px;
186
+ top: 10px;
187
+ color: white;
188
+ cursor: pointer;
189
+ font-size: 20px;
190
+ }
191
+ .action_menu{
192
+ z-index: 1;
193
+ position: absolute;
194
+ padding: 15px 0;
195
+ background-color: rgba(0,0,0,0.5);
196
+ color: white;
197
+ border-radius: 15px;
198
+ top: 30px;
199
+ right: 15px;
200
+ display: none;
201
+ }
202
+ .action_menu ul{
203
+ list-style: none;
204
+ padding: 0;
205
+ margin: 0;
206
+ }
207
+ .action_menu ul li{
208
+ width: 100%;
209
+ padding: 10px 15px;
210
+ margin-bottom: 5px;
211
+ }
212
+ .action_menu ul li i{
213
+ padding-right: 10px;
214
+ }
215
+ .action_menu ul li:hover{
216
+ cursor: pointer;
217
+ background-color: rgba(0,0,0,0.2);
218
+ }
219
+ @media(max-width: 576px){
220
+ .contacts_card{
221
+ margin-bottom: 15px !important;
222
+ }
223
+ }
store_index.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from src.helper import PINECONE_API_KEY, text_split, download_hugging_face_embeddings
4
+ from langchain.vectorstores import Pinecone as LangchainPinecone # Alias to avoid confusion
5
+ from dotenv import load_dotenv
6
+ from pinecone import Pinecone, ServerlessSpec
7
+ from langchain_pinecone import PineconeVectorStore
8
+ from PyPDF2 import PdfReader
9
+
10
+ # Define the load_pdf function
11
+ def load_pdf(file_path):
12
+ all_text = ""
13
+ with open(file_path, 'rb') as file:
14
+ reader = PdfReader(file)
15
+ for page in reader.pages:
16
+ all_text += page.extract_text() + "\n"
17
+ return all_text if all_text else None
18
+
19
+ # Define the text_split function
20
+ def text_split(text):
21
+ from langchain.text_splitter import CharacterTextSplitter
22
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
23
+ return text_splitter.split_text(text)
24
+
25
+ # Load environment variables if not already set
26
+ load_dotenv()
27
+
28
+ # Load and process data
29
+ pdf_file_path = "data/Okelloetal.2008TourismanalysisManka.pdf" # Update this path to your single PDF file
30
+ extracted_data = load_pdf(pdf_file_path)
31
+ if extracted_data is None:
32
+ raise ValueError("The extracted data is None. Please check the load_pdf function.")
33
+
34
+ print(f"Extracted Data: {extracted_data}")
35
+
36
+ # Split the extracted text into chunks
37
+ text_chunks = text_split(extracted_data)
38
+ if text_chunks is None:
39
+ raise ValueError("The text_chunks is None. Please check the text_split function.")
40
+
41
+ print(f"Text Chunks: {text_chunks}")
42
+
43
+ embeddings = download_hugging_face_embeddings()
44
+ if embeddings is None:
45
+ raise ValueError("The embeddings is None. Please check the download_hugging_face_embeddings function.")
46
+
47
+ print(f"Embeddings: {embeddings}")
48
+
49
+ # Ensure Pinecone API key is available
50
+ api_key = os.environ.get("PINECONE_API_KEY")
51
+ if not api_key:
52
+ raise ValueError("PINECONE_API_KEY environment variable not set.")
53
+
54
+ # Initialize Pinecone client
55
+ pc = Pinecone(api_key=api_key)
56
+
57
+ # Specify cloud and region for the serverless index
58
+ cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
59
+ region = os.environ.get('PINECONE_REGION') or 'us-east-1'
60
+ spec = ServerlessSpec(cloud=cloud, region=region)
61
+
62
+ # Define the index name
63
+ index_name = "healthbot"
64
+
65
+ # Create the index if it does not exist
66
+ if index_name not in pc.list_indexes().names():
67
+ pc.create_index(
68
+ name=index_name,
69
+ dimension=384,
70
+ metric="cosine",
71
+ spec=spec
72
+ )
73
+ # Wait for the index to be ready
74
+ while not pc.describe_index(index_name).status['ready']:
75
+ time.sleep(1)
76
+
77
+ # Connect to the created index
78
+ index = pc.Index(index_name)
79
+ time.sleep(1)
80
+
81
+ # Example: Add data to the index with reduced metadata
82
+ # Create a dictionary to simulate external storage of text chunks
83
+ text_chunk_store = {}
84
+
85
+ # Function to simulate storing text chunk and returning a reference ID
86
+ def store_text_chunk(text_chunk):
87
+ chunk_id = f"chunk_{len(text_chunk_store)}"
88
+ text_chunk_store[chunk_id] = text_chunk
89
+ return chunk_id
90
+
91
+ # Add text chunks to Pinecone with reference IDs
92
+ for i, text_chunk in enumerate(text_chunks):
93
+ chunk_id = store_text_chunk(text_chunk)
94
+ embedding = embeddings.embed_query(text_chunk) # Embed the text chunk
95
+ index.upsert(
96
+ vectors=[
97
+ {
98
+ "id": f"vec_{i}",
99
+ "values": embedding,
100
+ "metadata": {"chunk_id": chunk_id} # Only store the reference ID as metadata
101
+ }
102
+ ],
103
+ namespace="ns1"
104
+ )
105
+
106
+ print("Indexing completed successfully.")
template.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os # Import the os module to interact with the operating system
2
+ from pathlib import Path # Import the Path class from pathlib module for handling file paths
3
+ import logging # Import the logging module for logging messages
4
+
5
+ # Configure the logging to display messages with time and message content
6
+ logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s')
7
+
8
+ # List of files that we want to create
9
+ list_of_files = [
10
+ "src/__init__.py", # A Python package initialization file
11
+ "src/helper.py", # A helper module
12
+ "src/prompt.py", # A prompt module
13
+ ".env", # Environment configuration file
14
+ "setup.py", # Python setup script
15
+ "research/trials.ipynb", # Jupyter notebook for research
16
+ "app.py", # Main application script
17
+ "store_index.py", # Index script for storage
18
+ "static/.gitkeep", # Directory for static files (e.g., CSS, JS)
19
+ "templates/chat.html" , # HTML template for chat
20
+ ".gitignore"
21
+
22
+ ]
23
+
24
+ # Iterate over each file path in the list
25
+ for filepath in list_of_files:
26
+ filepath = Path(filepath) # Convert the file path to a Path object
27
+ filedir, filename = os.path.split(filepath) # Split the path into directory and file name
28
+
29
+ # Check if the directory part of the path is not empty
30
+ if filedir:
31
+ os.makedirs(filedir, exist_ok=True) # Create the directory if it doesn't exist
32
+ logging.info(f"Creating directory: {filedir} for the file {filename}") # Log directory creation
33
+
34
+ # Check if the file doesn't exist or if it exists but is empty
35
+ if not os.path.exists(filepath) or os.path.getsize(filepath) == 0:
36
+ with open(filepath, 'w') as f: # Open the file in write mode (this creates an empty file)
37
+ pass # Do nothing else inside the with block
38
+ logging.info(f"Creating empty file: {filepath}") # Log file creation
39
+ else:
40
+ logging.info(f"{filename} is already created") # Log that the file already exists
templates/chat.html ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Chatbot</title>
5
+ <link href="//maxcdn.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css" rel="stylesheet" id="bootstrap-css">
6
+ <script src="//maxcdn.bootstrapcdn.com/bootstrap/4.1.1/js/bootstrap.min.js"></script>
7
+ <script src="//cdnjs.cloudflare.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
8
+ <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">
9
+ <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.5.0/css/all.css" integrity="sha384-B4dIYHKNBt8Bc12p+WXckhzcICo0wtJAoU8YZTY5qE0Id1GSseTk6S+L3BlXeVIU" crossorigin="anonymous">
10
+ <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
11
+ <link rel="stylesheet" href="static/style.css"/>
12
+ <style>
13
+ body,html {
14
+ height: 100%;
15
+ margin: 0;
16
+ background: rgb(44, 47, 59);
17
+ background: -webkit-linear-gradient(to right, rgb(40, 59, 34), rgb(54, 60, 70), rgb(32, 32, 43));
18
+ background: linear-gradient(to right, rgb(38, 51, 61), rgb(50, 55, 65), rgb(33, 33, 78));
19
+ }
20
+
21
+ .chat {
22
+ margin-top: auto;
23
+ margin-bottom: auto;
24
+ }
25
+ .card {
26
+ height: 500px;
27
+ border-radius: 15px !important;
28
+ background-color: rgba(0,0,0,0.4) !important;
29
+ }
30
+ .contacts_body {
31
+ padding: 0.75rem 0 !important;
32
+ overflow-y: auto;
33
+ white-space: nowrap;
34
+ }
35
+ .msg_card_body {
36
+ overflow-y: auto;
37
+ }
38
+ .card-header {
39
+ border-radius: 15px 15px 0 0 !important;
40
+ border-bottom: 0 !important;
41
+ }
42
+ .card-footer {
43
+ border-radius: 0 0 15px 15px !important;
44
+ border-top: 0 !important;
45
+ }
46
+ .container {
47
+ align-content: center;
48
+ }
49
+ .search {
50
+ border-radius: 15px 0 0 15px !important;
51
+ background-color: rgba(0,0,0,0.3) !important;
52
+ border:0 !important;
53
+ color:white !important;
54
+ }
55
+ .search:focus {
56
+ box-shadow:none !important;
57
+ outline:0px !important;
58
+ }
59
+ .type_msg {
60
+ background-color: rgba(0,0,0,0.3) !important;
61
+ border:0 !important;
62
+ color:white !important;
63
+ height: 60px !important;
64
+ overflow-y: auto;
65
+ }
66
+ .type_msg:focus {
67
+ box-shadow:none !important;
68
+ outline:0px !important;
69
+ }
70
+ .attach_btn {
71
+ border-radius: 15px 0 0 15px !important;
72
+ background-color: rgba(0,0,0,0.3) !important;
73
+ border:0 !important;
74
+ color: white !important;
75
+ cursor: pointer;
76
+ }
77
+ .send_btn {
78
+ border-radius: 0 15px 15px 0 !important;
79
+ background-color: rgba(0,0,0,0.3) !important;
80
+ border:0 !important;
81
+ color: white !important;
82
+ cursor: pointer;
83
+ }
84
+ .search_btn {
85
+ border-radius: 0 15px 15px 0 !important;
86
+ background-color: rgba(0,0,0,0.3) !important;
87
+ border:0 !important;
88
+ color: white !important;
89
+ cursor: pointer;
90
+ }
91
+ .contacts {
92
+ list-style: none;
93
+ padding: 0;
94
+ }
95
+ .contacts li {
96
+ width: 100% !important;
97
+ padding: 5px 10px;
98
+ margin-bottom: 15px !important;
99
+ }
100
+ .active {
101
+ background-color: rgba(0,0,0,0.3);
102
+ }
103
+ .user_img {
104
+ height: 70px;
105
+ width: 70px;
106
+ border:1.5px solid #f5f6fa;
107
+ }
108
+ .user_img_msg {
109
+ height: 40px;
110
+ width: 40px;
111
+ border:1.5px solid #f5f6fa;
112
+ }
113
+ .img_cont {
114
+ position: relative;
115
+ height: 70px;
116
+ width: 70px;
117
+ }
118
+ .img_cont_msg {
119
+ height: 40px;
120
+ width: 40px;
121
+ }
122
+ .online_icon {
123
+ position: absolute;
124
+ height: 15px;
125
+ width:15px;
126
+ background-color: #4cd137;
127
+ border-radius: 50%;
128
+ bottom: 0.2em;
129
+ right: 0.4em;
130
+ border:1.5px solid white;
131
+ }
132
+ .offline {
133
+ background-color: #c23616 !important;
134
+ }
135
+ .user_info {
136
+ margin-top: auto;
137
+ margin-bottom: auto;
138
+ margin-left: 15px;
139
+ }
140
+ .user_info span {
141
+ font-size: 20px;
142
+ color: white;
143
+ }
144
+ .user_info p {
145
+ font-size: 10px;
146
+ color: rgba(255,255,255,0.6);
147
+ }
148
+ .video_cam {
149
+ margin-left: 50px;
150
+ margin-top: 5px;
151
+ }
152
+ .video_cam span {
153
+ color: white;
154
+ font-size: 20px;
155
+ cursor: pointer;
156
+ margin-right: 20px;
157
+ }
158
+ .msg_cotainer {
159
+ margin-top: auto;
160
+ margin-bottom: auto;
161
+ margin-left: 10px;
162
+ border-radius: 25px;
163
+ background-color: rgb(82, 172, 255);
164
+ padding: 10px;
165
+ position: relative;
166
+ }
167
+ .msg_cotainer_send {
168
+ margin-top: auto;
169
+ margin-bottom: auto;
170
+ margin-right: 10px;
171
+ border-radius: 25px;
172
+ background-color: #58cc71;
173
+ padding: 10px;
174
+ position: relative;
175
+ }
176
+ .msg_time {
177
+ position: absolute;
178
+ left: 0;
179
+ bottom: -15px;
180
+ color: rgba(255,255,255,0.5);
181
+ font-size: 10px;
182
+ }
183
+ .msg_time_send {
184
+ position: absolute;
185
+ right:0;
186
+ bottom: -15px;
187
+ color: rgba(255,255,255,0.5);
188
+ font-size: 10px;
189
+ }
190
+ .msg_head {
191
+ position: relative;
192
+ }
193
+ #action_menu_btn {
194
+ position: absolute;
195
+ right: 10px;
196
+ top: 10px;
197
+ color: white;
198
+ cursor: pointer;
199
+ font-size: 20px;
200
+ }
201
+ .action_menu {
202
+ z-index: 1;
203
+ position: absolute;
204
+ padding: 15px 0;
205
+ background-color: rgba(0,0,0,0.5);
206
+ color: white;
207
+ border-radius: 15px;
208
+ top: 30px;
209
+ right: 15px;
210
+ display: none;
211
+ }
212
+ .action_menu ul {
213
+ list-style: none;
214
+ padding: 0;
215
+ margin: 0;
216
+ }
217
+ .action_menu ul li {
218
+ width: 100%;
219
+ padding: 10px 15px;
220
+ margin-bottom: 5px;
221
+ }
222
+ .action_menu ul li i {
223
+ padding-right: 10px;
224
+ }
225
+ .action_menu ul li:hover {
226
+ cursor: pointer;
227
+ background-color: rgba(0,0,0,0.2);
228
+ }
229
+ @media(max-width: 576px) {
230
+ .contacts_card {
231
+ margin-bottom: 15px !important;
232
+ }
233
+ }
234
+ </style>
235
+ </head>
236
+ <body>
237
+ <div class="container-fluid h-100">
238
+ <div class="row justify-content-center h-100">
239
+ <div class="col-md-8 col-xl-6 chat">
240
+ <div class="card">
241
+ <div class="card-header msg_head">
242
+ <div class="d-flex bd-highlight">
243
+ <div class="img_cont">
244
+ <img src="https://img.freepik.com/free-photo/smiling-tourist-woman-using-magnifying-glass-look-map_23-2147653034.jpg" class="rounded-circle user_img">
245
+ <span class="online_icon"></span>
246
+ </div>
247
+ <div class="user_info">
248
+ <span>Travel bot</span>
249
+ <p>Ask me anything!</p>
250
+ </div>
251
+ </div>
252
+ </div>
253
+ <div id="messageFormeight" class="card-body msg_card_body">
254
+ </div>
255
+ <div class="card-footer">
256
+ <form id="messageArea" class="input-group">
257
+ <input type="text" id="text" name="msg" placeholder="Type your message..." autocomplete="off" class="form-control type_msg" required/>
258
+ <div class="input-group-append">
259
+ <button type="submit" id="send" class="input-group-text send_btn"><i class="fas fa-location-arrow"></i></button>
260
+ </div>
261
+ </form>
262
+ </div>
263
+ </div>
264
+ </div>
265
+ </div>
266
+ </div>
267
+
268
+ <script>
269
+ $(document).ready(function() {
270
+ $("#messageArea").on("submit", function(event) {
271
+ const date = new Date();
272
+ const hour = date.getHours();
273
+ const minute = date.getMinutes();
274
+ const str_time = hour+":"+minute;
275
+ var rawText = $("#text").val();
276
+ var userHtml = '<div class="d-flex justify-content-end mb-4"><div class="msg_cotainer_send">' + rawText + '<span class="msg_time_send">'+ str_time + '</span></div><div class="img_cont_msg"><img src="https://i.ibb.co/d5b84Xw/Untitled-design.png" class="rounded-circle user_img_msg"></div></div>';
277
+
278
+ $("#text").val("");
279
+ $("#messageFormeight").append(userHtml);
280
+ $.ajax({
281
+ data: {
282
+ msg: rawText,
283
+ },
284
+ type: "POST",
285
+ url: "/get",
286
+ }).done(function(data) {
287
+ var botHtml = '<div class="d-flex justify-content-start mb-4"><div class="img_cont_msg"><img src="https://www.prdistribution.com/spirit/uploads/pressreleases/2019/newsreleases/--waterless-geothermal-gets-installed-in-historic-breakers-mansion-.png" class="rounded-circle user_img_msg"></div><div class="msg_cotainer">' + data + '<span class="msg_time">' + str_time + '</span></div></div>';
288
+ $("#messageFormeight").append($.parseHTML(botHtml));
289
+ });
290
+ event.preventDefault();
291
+ });
292
+ });
293
+ </script>
294
+ </body>
295
+ </html>