thivav commited on
Commit
e88c82c
1 Parent(s): 143983f

init commit

Browse files
.github/workflows/sync_to_huggingface_space.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to HuggingFace Space
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ with:
15
+ fetch-depth: 0
16
+ lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push --force https://thivav:[email protected]/spaces/thivav/chat_with_pdf_using_zephyr-7b-beta main
.gitignore ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
README.md CHANGED
@@ -1,12 +1,24 @@
1
  ---
2
- title: Chat With Pdf Using Zephyr-7b-beta
3
- emoji: 🔥
4
- colorFrom: yellow
5
- colorTo: pink
6
  sdk: streamlit
7
  sdk_version: 1.31.1
8
  app_file: app.py
9
- pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Chat With Pdf Using Zephyr-7b-Beta
3
+ emoji: 🗣📢
4
+ colorFrom: red
5
+ colorTo: green
6
  sdk: streamlit
7
  sdk_version: 1.31.1
8
  app_file: app.py
9
+ pinned: true
10
  ---
11
 
12
+ ![Zephyr-7b-beta](/img/Zephyr-7b.png)
13
+
14
+ # Chat with PDF using Zephyr-7b 🗣📢
15
+
16
+ #RAG | #Semantic | #Embedding | #HybridSearch | #EnsembleRetriever | #BAAI-Embeddings
17
+
18
+ Chat with pdf using [Zephyr-7b LLM](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
19
+
20
+ - [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta?)
21
+ - Zephyr-7b finetuned from [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
22
+ - [Embeddings](https://huggingface.co/BAAI/bge-base-en-v1.5)
23
+
24
+ [Chat with PDF using Zephyr-7b Beta - Playground](https://huggingface.co/spaces/thivav/chat_with_pdf_using_zephyr-7b-beta)
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+
4
+ import streamlit as st
5
+ from langchain.chains import ConversationalRetrievalChain
6
+ from langchain.memory import ConversationBufferMemory
7
+ from langchain.retrievers import EnsembleRetriever
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain_community.chat_message_histories import StreamlitChatMessageHistory
10
+ from langchain_community.document_loaders import PyPDFLoader
11
+ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
12
+ from langchain_community.llms import CTransformers
13
+ from langchain_community.retrievers import BM25Retriever
14
+ from langchain_community.vectorstores import Chroma
15
+ from streamlit_extras.add_vertical_space import add_vertical_space
16
+
17
+
18
+ @st.cache_resource(ttl="1h")
19
+ def get_retriever(pdf_files):
20
+ """get retriever"""
21
+
22
+ docs = []
23
+ temp_dir = tempfile.TemporaryDirectory()
24
+ for pdf_file in pdf_files:
25
+ temp_pdf_file_path = os.path.join(temp_dir.name, pdf_file.name)
26
+
27
+ with open(temp_pdf_file_path, "wb") as f:
28
+ f.write(pdf_file.getvalue())
29
+
30
+ loader = PyPDFLoader(temp_pdf_file_path)
31
+ docs.extend(loader.load())
32
+
33
+ text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
34
+ chunk_size=1500, chunk_overlap=200
35
+ )
36
+ chunks = text_splitter.split_documents(docs)
37
+
38
+ # get huggingface token from env secret
39
+ HF_TOKEN = os.environ.get("HF_TOKEN")
40
+
41
+ # embeddings
42
+ embeddings = HuggingFaceInferenceAPIEmbeddings(
43
+ api_key=HF_TOKEN,
44
+ model_name="BAAI/bge-base-en-v1.5",
45
+ )
46
+
47
+ # retrieve k
48
+ k = 5
49
+
50
+ # vector retriever
51
+ vector_store = Chroma.from_documents(chunks, embeddings)
52
+ vector_retriever = vector_store.as_retriever(search_kwargs={"k": k})
53
+
54
+ # semantic retriever
55
+ semantic_retriever = BM25Retriever.from_documents(chunks)
56
+ semantic_retriever.k = k
57
+
58
+ # ensemble retriever
59
+ ensemble_retriever = EnsembleRetriever(
60
+ retrievers=[vector_retriever, semantic_retriever], weights=[0.5, 0.5]
61
+ )
62
+
63
+ return ensemble_retriever
64
+
65
+
66
+ @st.cache_resource(ttl="1h")
67
+ def initialize_llm(_retriever):
68
+ """initialize llm"""
69
+
70
+ # load llm model
71
+ model_type = "mistral"
72
+ model_id = "TheBloke/zephyr-7B-beta-GGUF"
73
+ model_file = "zephyr-7b-beta.Q4_K_S.gguf"
74
+
75
+ config = {
76
+ "max_new_tokens": 2048,
77
+ "repetition_penalty": 1.1,
78
+ "temperature": 1,
79
+ "top_k": 50,
80
+ "top_p": 0.9,
81
+ "stream": True,
82
+ "context_length": 4096,
83
+ "gpu_layers": 0,
84
+ "threads": int(os.cpu_count()),
85
+ }
86
+
87
+ llm = CTransformers(
88
+ model=model_id,
89
+ model_file=model_file,
90
+ model_type=model_type,
91
+ config=config,
92
+ lib="avx2",
93
+ )
94
+
95
+ chat_history = StreamlitChatMessageHistory()
96
+
97
+ # init chat history memory
98
+ memory = ConversationBufferMemory(
99
+ memory_key="chat_history", chat_memory=chat_history, return_messages=True
100
+ )
101
+
102
+ chain = ConversationalRetrievalChain.from_llm(
103
+ llm, retriever=_retriever, memory=memory, verbose=False
104
+ )
105
+
106
+ return chain, chat_history
107
+
108
+
109
+ def main():
110
+ """main func"""
111
+
112
+ st.set_page_config(
113
+ page_title="Talk to PDF using Zephyr-7B-Beta",
114
+ page_icon="📰",
115
+ layout="centered",
116
+ initial_sidebar_state="expanded",
117
+ )
118
+
119
+ st.header("Talk to PDF files 📰", divider="rainbow")
120
+ st.subheader(
121
+ "Enjoy :red[talking] with :green[PDF] files using :sunglasses: Zephyr-7B-Beta"
122
+ )
123
+ st.markdown(
124
+ """
125
+ * Used the [zephyr-7b-beta.Q4_K_S.gguf](https://huggingface.co/TheBloke/zephyr-7B-alpha-GGUF/blob/main/zephyr-7b-alpha.Q4_K_S.gguf) quantised
126
+ version of [Zephyr-7B Beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) model
127
+ from the [TheBloke/zephyr-7B-beta-GGUF](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF) repositry.
128
+ ___
129
+ """
130
+ )
131
+
132
+ st.sidebar.title("Talk to PDF 📰")
133
+ st.sidebar.markdown(
134
+ "[Checkout the repository](https://github.com/ThivaV/chat_with_pdf_using_zephyr-7b)"
135
+ )
136
+ st.sidebar.markdown(
137
+ """
138
+ ### This is a LLM powered chatbot, built using:
139
+
140
+ * [Streamlit](https://streamlit.io)
141
+ * [LangChain](https://python.langchain.com/)
142
+ * [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
143
+ * [TheBloke/zephyr-7B-beta-GGUF](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF)
144
+ * [CTransformers](https://github.com/marella/ctransformers)
145
+ * [Embeddings](https://huggingface.co/BAAI/bge-base-en-v1.5)
146
+ * [Chroma](https://docs.trychroma.com/?lang=py)
147
+ ___
148
+ """
149
+ )
150
+
151
+ add_vertical_space(2)
152
+
153
+ upload_pdf_files = st.sidebar.file_uploader(
154
+ "Upload a pdf files 📤", type="pdf", accept_multiple_files=True
155
+ )
156
+
157
+ if not upload_pdf_files:
158
+ st.info("👈 :red[Please upload pdf files] ⛔")
159
+ st.stop()
160
+
161
+ retriever = get_retriever(upload_pdf_files)
162
+
163
+ chain, chat_history = initialize_llm(retriever)
164
+
165
+ # load previous chat history
166
+ # re-draw the chat history in the chat window
167
+ for message in chat_history.messages:
168
+ st.chat_message(message.type).write(message.content)
169
+
170
+ if prompt := st.chat_input("Ask questions"):
171
+ with st.chat_message("human"):
172
+ st.markdown(prompt)
173
+
174
+ response = chain.invoke(prompt)
175
+
176
+ with st.chat_message("ai"):
177
+ st.write(response["answer"])
178
+
179
+
180
+ if __name__ == "__main__":
181
+ # init main func
182
+ main()
data/.gitkeep ADDED
File without changes
doc/zephyr-7b.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29b840eef6b47880bff422f5c6cea2fbe19fcd6c3831e78a0e56ec669a8654b0
3
+ size 3402315
img/Zephyr-7b.png ADDED
models/.gitkeep ADDED
File without changes
notebooks/chat_with_pdf_using_zephyr-7b_v1.ipynb ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# HuggingFaceHub API method"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 1,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "from langchain_community.document_loaders import PyPDFLoader\n",
17
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
18
+ "from langchain.vectorstores import Chroma\n",
19
+ "from langchain.retrievers import BM25Retriever, EnsembleRetriever\n",
20
+ "\n",
21
+ "from langchain_core.prompts import ChatPromptTemplate\n",
22
+ "from langchain_core.output_parsers import StrOutputParser\n",
23
+ "from langchain_core.runnables import RunnablePassthrough\n",
24
+ "\n",
25
+ "from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings\n",
26
+ "from langchain_community.llms import HuggingFaceHub"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "markdown",
31
+ "metadata": {},
32
+ "source": [
33
+ "## Load Data"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 2,
39
+ "metadata": {},
40
+ "outputs": [],
41
+ "source": [
42
+ "file_path = \"../data/Orca Progressive Learning from Complex.pdf\"\n",
43
+ "data_file = PyPDFLoader(file_path)\n",
44
+ "docs = data_file.load()"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "markdown",
49
+ "metadata": {},
50
+ "source": [
51
+ "## Split & Chunk Docs"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": 3,
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "# create chunks\n",
61
+ "splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n",
62
+ "chunks = splitter.split_documents(docs)"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "markdown",
67
+ "metadata": {},
68
+ "source": [
69
+ "## Load Embedder"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": 4,
75
+ "metadata": {},
76
+ "outputs": [],
77
+ "source": [
78
+ "HF_TOKEN = input(\"Enter your HuggingFace Token\")"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 5,
84
+ "metadata": {},
85
+ "outputs": [],
86
+ "source": [
87
+ "# https://huggingface.co/BAAI/bge-base-en-v1.5\n",
88
+ "embeddings = HuggingFaceInferenceAPIEmbeddings(\n",
89
+ " api_key=HF_TOKEN, model_name=\"BAAI/bge-base-en-v1.5\"\n",
90
+ ")"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": 6,
96
+ "metadata": {},
97
+ "outputs": [],
98
+ "source": [
99
+ "# retrieve k\n",
100
+ "k = 5"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "markdown",
105
+ "metadata": {},
106
+ "source": [
107
+ "## Vector Retriever"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "execution_count": 7,
113
+ "metadata": {},
114
+ "outputs": [],
115
+ "source": [
116
+ "vector_store = Chroma.from_documents(chunks, embeddings)"
117
+ ]
118
+ },
119
+ {
120
+ "cell_type": "code",
121
+ "execution_count": 8,
122
+ "metadata": {},
123
+ "outputs": [],
124
+ "source": [
125
+ "vector_retriever = vector_store.as_retriever(search_kwargs={\"k\": k})"
126
+ ]
127
+ },
128
+ {
129
+ "cell_type": "markdown",
130
+ "metadata": {},
131
+ "source": [
132
+ "## Semantic Retriever"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": 9,
138
+ "metadata": {},
139
+ "outputs": [],
140
+ "source": [
141
+ "semantic_retriever = BM25Retriever.from_documents(chunks)\n",
142
+ "semantic_retriever.k = k"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "markdown",
147
+ "metadata": {},
148
+ "source": [
149
+ "## Ensemble Retriever"
150
+ ]
151
+ },
152
+ {
153
+ "cell_type": "code",
154
+ "execution_count": 10,
155
+ "metadata": {},
156
+ "outputs": [],
157
+ "source": [
158
+ "ensemble_retriever = EnsembleRetriever(\n",
159
+ " retrievers=[vector_retriever, semantic_retriever],\n",
160
+ " weights=[0.5, 0.5]\n",
161
+ ")"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "markdown",
166
+ "metadata": {},
167
+ "source": [
168
+ "### LLM"
169
+ ]
170
+ },
171
+ {
172
+ "cell_type": "code",
173
+ "execution_count": 11,
174
+ "metadata": {},
175
+ "outputs": [
176
+ {
177
+ "name": "stderr",
178
+ "output_type": "stream",
179
+ "text": [
180
+ "/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The class `langchain_community.llms.huggingface_hub.HuggingFaceHub` was deprecated in langchain-community 0.0.21 and will be removed in 0.2.0. Use HuggingFaceEndpoint instead.\n",
181
+ " warn_deprecated(\n"
182
+ ]
183
+ }
184
+ ],
185
+ "source": [
186
+ "# HuggingFaceH4/zephyr-7b-beta\n",
187
+ "llm = HuggingFaceHub(\n",
188
+ " repo_id=\"HuggingFaceH4/zephyr-7b-beta\", \n",
189
+ " model_kwargs={\"temperature\": 0.1, \"max_new_tokens\": 1024},\n",
190
+ " huggingfacehub_api_token=HF_TOKEN\n",
191
+ ")"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "markdown",
196
+ "metadata": {},
197
+ "source": [
198
+ "### Prompting"
199
+ ]
200
+ },
201
+ {
202
+ "cell_type": "code",
203
+ "execution_count": 12,
204
+ "metadata": {},
205
+ "outputs": [],
206
+ "source": [
207
+ "template = \"\"\"\n",
208
+ "<|system|>>\n",
209
+ "You are a helpful AI Assistant that follows instructions extremely well.\n",
210
+ "Use the following context to answer user question.\n",
211
+ "\n",
212
+ "Think step by step before answering the question. \n",
213
+ "You will get a $100 tip if you provide correct answer.\n",
214
+ "\n",
215
+ "CONTEXT: {context}\n",
216
+ "</s>\n",
217
+ "<|user|>\n",
218
+ "{query}\n",
219
+ "</s>\n",
220
+ "<|assistant|>\n",
221
+ "\"\"\""
222
+ ]
223
+ },
224
+ {
225
+ "cell_type": "code",
226
+ "execution_count": 13,
227
+ "metadata": {},
228
+ "outputs": [],
229
+ "source": [
230
+ "prompt = ChatPromptTemplate.from_template(template)\n",
231
+ "output_parser = StrOutputParser()"
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": 14,
237
+ "metadata": {},
238
+ "outputs": [],
239
+ "source": [
240
+ "chain = (\n",
241
+ " {\"context\": ensemble_retriever, \"query\": RunnablePassthrough()}\n",
242
+ " | prompt\n",
243
+ " | llm\n",
244
+ " | output_parser\n",
245
+ ")"
246
+ ]
247
+ },
248
+ {
249
+ "cell_type": "code",
250
+ "execution_count": 15,
251
+ "metadata": {},
252
+ "outputs": [
253
+ {
254
+ "ename": "HfHubHTTPError",
255
+ "evalue": "429 Client Error: Too Many Requests for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta (Request ID: MB9PO09bJU8rFNr1BbEry)\n\nRate limit reached. You reached free usage limit (reset hourly). Please subscribe to a plan at https://huggingface.co/pricing to use the API at this rate",
256
+ "output_type": "error",
257
+ "traceback": [
258
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
259
+ "\u001b[0;31mHTTPError\u001b[0m Traceback (most recent call last)",
260
+ "File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/huggingface_hub/utils/_errors.py:304\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 304\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
261
+ "File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/requests/models.py:1021\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1020\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1021\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
262
+ "\u001b[0;31mHTTPError\u001b[0m: 429 Client Error: Too Many Requests for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta",
263
+ "\nThe above exception was the direct cause of the following exception:\n",
264
+ "\u001b[0;31mHfHubHTTPError\u001b[0m Traceback (most recent call last)",
265
+ "Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mchain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mWhat is instruction tuning?\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n",
266
+ "File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/runnables/base.py:2056\u001b[0m, in \u001b[0;36mRunnableSequence.invoke\u001b[0;34m(self, input, config)\u001b[0m\n\u001b[1;32m 2054\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 2055\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, step \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msteps):\n\u001b[0;32m-> 2056\u001b[0m \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mstep\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2057\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2058\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# mark each step as a child run\u001b[39;49;00m\n\u001b[1;32m 2059\u001b[0m \u001b[43m \u001b[49m\u001b[43mpatch_config\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2060\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_child\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mseq:step:\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mi\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2061\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2062\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2063\u001b[0m \u001b[38;5;66;03m# finish the root run\u001b[39;00m\n\u001b[1;32m 2064\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
267
+ "File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/language_models/llms.py:273\u001b[0m, in \u001b[0;36mBaseLLM.invoke\u001b[0;34m(self, input, config, stop, **kwargs)\u001b[0m\n\u001b[1;32m 263\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minvoke\u001b[39m(\n\u001b[1;32m 264\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 265\u001b[0m \u001b[38;5;28minput\u001b[39m: LanguageModelInput,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 270\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[1;32m 271\u001b[0m config \u001b[38;5;241m=\u001b[39m ensure_config(config)\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m (\n\u001b[0;32m--> 273\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_prompt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 274\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_convert_input\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 275\u001b[0m \u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 276\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcallbacks\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 277\u001b[0m \u001b[43m \u001b[49m\u001b[43mtags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtags\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmetadata\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 279\u001b[0m \u001b[43m \u001b[49m\u001b[43mrun_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrun_name\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 280\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 281\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 282\u001b[0m \u001b[38;5;241m.\u001b[39mgenerations[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 283\u001b[0m \u001b[38;5;241m.\u001b[39mtext\n\u001b[1;32m 284\u001b[0m )\n",
268
+ "File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/language_models/llms.py:568\u001b[0m, in \u001b[0;36mBaseLLM.generate_prompt\u001b[0;34m(self, prompts, stop, callbacks, **kwargs)\u001b[0m\n\u001b[1;32m 560\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_prompt\u001b[39m(\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 562\u001b[0m prompts: List[PromptValue],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 565\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 566\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m LLMResult:\n\u001b[1;32m 567\u001b[0m prompt_strings \u001b[38;5;241m=\u001b[39m [p\u001b[38;5;241m.\u001b[39mto_string() \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m prompts]\n\u001b[0;32m--> 568\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt_strings\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
269
+ "File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/language_models/llms.py:741\u001b[0m, in \u001b[0;36mBaseLLM.generate\u001b[0;34m(self, prompts, stop, callbacks, tags, metadata, run_name, **kwargs)\u001b[0m\n\u001b[1;32m 725\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 726\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAsked to cache, but no cache found at `langchain.cache`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 727\u001b[0m )\n\u001b[1;32m 728\u001b[0m run_managers \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 729\u001b[0m callback_manager\u001b[38;5;241m.\u001b[39mon_llm_start(\n\u001b[1;32m 730\u001b[0m dumpd(\u001b[38;5;28mself\u001b[39m),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 739\u001b[0m )\n\u001b[1;32m 740\u001b[0m ]\n\u001b[0;32m--> 741\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 742\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mbool\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mnew_arg_supported\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 743\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 744\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output\n\u001b[1;32m 745\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(missing_prompts) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
270
+ "File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/language_models/llms.py:605\u001b[0m, in \u001b[0;36mBaseLLM._generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 603\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m run_manager \u001b[38;5;129;01min\u001b[39;00m run_managers:\n\u001b[1;32m 604\u001b[0m run_manager\u001b[38;5;241m.\u001b[39mon_llm_error(e, response\u001b[38;5;241m=\u001b[39mLLMResult(generations\u001b[38;5;241m=\u001b[39m[]))\n\u001b[0;32m--> 605\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 606\u001b[0m flattened_outputs \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39mflatten()\n\u001b[1;32m 607\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m manager, flattened_output \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(run_managers, flattened_outputs):\n",
271
+ "File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/language_models/llms.py:592\u001b[0m, in \u001b[0;36mBaseLLM._generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 582\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_generate_helper\u001b[39m(\n\u001b[1;32m 583\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 584\u001b[0m prompts: List[\u001b[38;5;28mstr\u001b[39m],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 589\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m LLMResult:\n\u001b[1;32m 590\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 591\u001b[0m output \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m--> 592\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# TODO: support multiple run managers\u001b[39;49;00m\n\u001b[1;32m 596\u001b[0m \u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_managers\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 598\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 599\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m new_arg_supported\n\u001b[1;32m 600\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_generate(prompts, stop\u001b[38;5;241m=\u001b[39mstop)\n\u001b[1;32m 601\u001b[0m )\n\u001b[1;32m 602\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 603\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m run_manager \u001b[38;5;129;01min\u001b[39;00m run_managers:\n",
272
+ "File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/language_models/llms.py:1177\u001b[0m, in \u001b[0;36mLLM._generate\u001b[0;34m(self, prompts, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 1174\u001b[0m new_arg_supported \u001b[38;5;241m=\u001b[39m inspect\u001b[38;5;241m.\u001b[39msignature(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call)\u001b[38;5;241m.\u001b[39mparameters\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrun_manager\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1175\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m prompt \u001b[38;5;129;01min\u001b[39;00m prompts:\n\u001b[1;32m 1176\u001b[0m text \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m-> 1177\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1178\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m new_arg_supported\n\u001b[1;32m 1179\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call(prompt, stop\u001b[38;5;241m=\u001b[39mstop, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1180\u001b[0m )\n\u001b[1;32m 1181\u001b[0m generations\u001b[38;5;241m.\u001b[39mappend([Generation(text\u001b[38;5;241m=\u001b[39mtext)])\n\u001b[1;32m 1182\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m LLMResult(generations\u001b[38;5;241m=\u001b[39mgenerations)\n",
273
+ "File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_community/llms/huggingface_hub.py:135\u001b[0m, in \u001b[0;36mHuggingFaceHub._call\u001b[0;34m(self, prompt, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 132\u001b[0m _model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_kwargs \u001b[38;5;129;01mor\u001b[39;00m {}\n\u001b[1;32m 133\u001b[0m parameters \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m_model_kwargs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs}\n\u001b[0;32m--> 135\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 136\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minputs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparameters\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtask\u001b[49m\n\u001b[1;32m 137\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 138\u001b[0m response \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mloads(response\u001b[38;5;241m.\u001b[39mdecode())\n\u001b[1;32m 139\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124merror\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m response:\n",
274
+ "File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/huggingface_hub/inference/_client.py:242\u001b[0m, in \u001b[0;36mInferenceClient.post\u001b[0;34m(self, json, data, model, task, stream)\u001b[0m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InferenceTimeoutError(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInference call timed out: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merror\u001b[39;00m \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 241\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 242\u001b[0m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\u001b[38;5;241m.\u001b[39miter_lines() \u001b[38;5;28;01mif\u001b[39;00m stream \u001b[38;5;28;01melse\u001b[39;00m response\u001b[38;5;241m.\u001b[39mcontent\n\u001b[1;32m 244\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m error:\n",
275
+ "File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/huggingface_hub/utils/_errors.py:362\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m BadRequestError(message, response\u001b[38;5;241m=\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 360\u001b[0m \u001b[38;5;66;03m# Convert `HTTPError` into a `HfHubHTTPError` to display request information\u001b[39;00m\n\u001b[1;32m 361\u001b[0m \u001b[38;5;66;03m# as well (request id and/or server error message)\u001b[39;00m\n\u001b[0;32m--> 362\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HfHubHTTPError(\u001b[38;5;28mstr\u001b[39m(e), response\u001b[38;5;241m=\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n",
276
+ "\u001b[0;31mHfHubHTTPError\u001b[0m: 429 Client Error: Too Many Requests for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta (Request ID: MB9PO09bJU8rFNr1BbEry)\n\nRate limit reached. You reached free usage limit (reset hourly). Please subscribe to a plan at https://huggingface.co/pricing to use the API at this rate"
277
+ ]
278
+ }
279
+ ],
280
+ "source": [
281
+ "print(chain.invoke(\"What is instruction tuning?\"))"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": null,
287
+ "metadata": {},
288
+ "outputs": [],
289
+ "source": [
290
+ "print(chain.invoke(\"How does Orca compares to ChatGPT?\"))"
291
+ ]
292
+ },
293
+ {
294
+ "cell_type": "code",
295
+ "execution_count": null,
296
+ "metadata": {},
297
+ "outputs": [],
298
+ "source": []
299
+ },
300
+ {
301
+ "cell_type": "markdown",
302
+ "metadata": {},
303
+ "source": [
304
+ "_______________________________________________________"
305
+ ]
306
+ }
307
+ ],
308
+ "metadata": {
309
+ "kernelspec": {
310
+ "display_name": "Python 3",
311
+ "language": "python",
312
+ "name": "python3"
313
+ },
314
+ "language_info": {
315
+ "codemirror_mode": {
316
+ "name": "ipython",
317
+ "version": 3
318
+ },
319
+ "file_extension": ".py",
320
+ "mimetype": "text/x-python",
321
+ "name": "python",
322
+ "nbconvert_exporter": "python",
323
+ "pygments_lexer": "ipython3",
324
+ "version": "3.9.0"
325
+ }
326
+ },
327
+ "nbformat": 4,
328
+ "nbformat_minor": 2
329
+ }
notebooks/chat_with_pdf_using_zephyr-7b_v2.ipynb ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Using HuggingFace Load model directly method\n",
8
+ "\n",
9
+ "* AutoModelForCausalLM"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": null,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "from langchain_community.document_loaders import PyPDFLoader\n",
19
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
20
+ "from langchain.vectorstores import Chroma\n",
21
+ "from langchain.retrievers import BM25Retriever, EnsembleRetriever\n",
22
+ "\n",
23
+ "from langchain_core.prompts import ChatPromptTemplate\n",
24
+ "from langchain_core.output_parsers import StrOutputParser\n",
25
+ "from langchain_core.runnables import RunnablePassthrough\n",
26
+ "\n",
27
+ "from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": null,
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "import torch\n",
37
+ "from transformers import AutoModelForCausalLM"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": null,
43
+ "metadata": {},
44
+ "outputs": [],
45
+ "source": [
46
+ "file_path = \"../data/Orca Progressive Learning from Complex.pdf\"\n",
47
+ "data_file = PyPDFLoader(file_path)\n",
48
+ "docs = data_file.load()"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": null,
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "# create chunks\n",
58
+ "splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n",
59
+ "chunks = splitter.split_documents(docs)"
60
+ ]
61
+ },
62
+ {
63
+ "cell_type": "code",
64
+ "execution_count": null,
65
+ "metadata": {},
66
+ "outputs": [],
67
+ "source": [
68
+ "HF_TOKEN = input(\"Enter your HuggingFace Token\")"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": null,
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "# https://huggingface.co/BAAI/bge-base-en-v1.5\n",
78
+ "embeddings = HuggingFaceInferenceAPIEmbeddings(\n",
79
+ " api_key=HF_TOKEN, model_name=\"BAAI/bge-base-en-v1.5\"\n",
80
+ ")"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "code",
85
+ "execution_count": null,
86
+ "metadata": {},
87
+ "outputs": [],
88
+ "source": [
89
+ "# retrieve k\n",
90
+ "k = 5"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": null,
96
+ "metadata": {},
97
+ "outputs": [],
98
+ "source": [
99
+ "vector_store = Chroma.from_documents(chunks, embeddings)"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": null,
105
+ "metadata": {},
106
+ "outputs": [],
107
+ "source": [
108
+ "vector_retriever = vector_store.as_retriever(search_kwargs={\"k\": k})"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": null,
114
+ "metadata": {},
115
+ "outputs": [],
116
+ "source": [
117
+ "semantic_retriever = BM25Retriever.from_documents(chunks)\n",
118
+ "semantic_retriever.k = k"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": null,
124
+ "metadata": {},
125
+ "outputs": [],
126
+ "source": [
127
+ "ensemble_retriever = EnsembleRetriever(\n",
128
+ " retrievers=[vector_retriever, semantic_retriever], weights=[0.5, 0.5]\n",
129
+ ")"
130
+ ]
131
+ },
132
+ {
133
+ "cell_type": "code",
134
+ "execution_count": null,
135
+ "metadata": {},
136
+ "outputs": [],
137
+ "source": [
138
+ "# tokenizer = AutoTokenizer.from_pretrained(\"HuggingFaceH4/zephyr-7b-beta\")\n",
139
+ "llm = AutoModelForCausalLM.from_pretrained(\n",
140
+ " \"HuggingFaceH4/zephyr-7b-beta\", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True\n",
141
+ ")"
142
+ ]
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "execution_count": null,
147
+ "metadata": {},
148
+ "outputs": [],
149
+ "source": [
150
+ "template = \"\"\"\n",
151
+ "<|system|>\n",
152
+ "You are a helpful AI Assistant that follows instructions extremely well.\n",
153
+ "Use the following context to answer user question.\n",
154
+ "\n",
155
+ "Think step by step before answering the question.\n",
156
+ "You will get a $100 tip if you provide correct answer.\n",
157
+ "\n",
158
+ "CONTEXT: {context}\n",
159
+ "</s>\n",
160
+ "<|user|>\n",
161
+ "{query}\n",
162
+ "</s>\n",
163
+ "<|assistant|>\n",
164
+ "\"\"\""
165
+ ]
166
+ },
167
+ {
168
+ "cell_type": "code",
169
+ "execution_count": null,
170
+ "metadata": {},
171
+ "outputs": [],
172
+ "source": [
173
+ "prompt = ChatPromptTemplate.from_template(template)\n",
174
+ "output_parser = StrOutputParser()"
175
+ ]
176
+ },
177
+ {
178
+ "cell_type": "code",
179
+ "execution_count": null,
180
+ "metadata": {},
181
+ "outputs": [],
182
+ "source": [
183
+ "chain = (\n",
184
+ " {\"context\": ensemble_retriever, \"query\": RunnablePassthrough()}\n",
185
+ " | prompt\n",
186
+ " | llm\n",
187
+ " | output_parser\n",
188
+ ")"
189
+ ]
190
+ },
191
+ {
192
+ "cell_type": "code",
193
+ "execution_count": null,
194
+ "metadata": {},
195
+ "outputs": [],
196
+ "source": [
197
+ "print(chain.invoke(\"What is instruction tuning?\"))"
198
+ ]
199
+ }
200
+ ],
201
+ "metadata": {
202
+ "kernelspec": {
203
+ "display_name": "Python 3",
204
+ "language": "python",
205
+ "name": "python3"
206
+ },
207
+ "language_info": {
208
+ "codemirror_mode": {
209
+ "name": "ipython",
210
+ "version": 3
211
+ },
212
+ "file_extension": ".py",
213
+ "mimetype": "text/x-python",
214
+ "name": "python",
215
+ "nbconvert_exporter": "python",
216
+ "pygments_lexer": "ipython3",
217
+ "version": "3.9.0"
218
+ }
219
+ },
220
+ "nbformat": 4,
221
+ "nbformat_minor": 2
222
+ }
notebooks/chat_with_pdf_using_zephyr-7b_v3.ipynb ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Using HuggingFace use a pipeline as a high-level helper method\n",
8
+ "\n",
9
+ "* from transformers import pipeline"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 2,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "from langchain_community.document_loaders import PyPDFLoader\n",
19
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
20
+ "from langchain.vectorstores import Chroma\n",
21
+ "from langchain.retrievers import BM25Retriever, EnsembleRetriever\n",
22
+ "\n",
23
+ "from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": 3,
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "import torch\n",
33
+ "from transformers import pipeline"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 4,
39
+ "metadata": {},
40
+ "outputs": [],
41
+ "source": [
42
+ "file_path = \"../data/Orca Progressive Learning from Complex.pdf\"\n",
43
+ "data_file = PyPDFLoader(file_path)\n",
44
+ "docs = data_file.load()"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": 5,
50
+ "metadata": {},
51
+ "outputs": [],
52
+ "source": [
53
+ "# create chunks\n",
54
+ "splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n",
55
+ "chunks = splitter.split_documents(docs)"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": 6,
61
+ "metadata": {},
62
+ "outputs": [],
63
+ "source": [
64
+ "HF_TOKEN = input(\"Enter your HuggingFace Token\")"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": 7,
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "# https://huggingface.co/BAAI/bge-base-en-v1.5\n",
74
+ "embeddings = HuggingFaceInferenceAPIEmbeddings(\n",
75
+ " api_key=HF_TOKEN, model_name=\"BAAI/bge-base-en-v1.5\"\n",
76
+ ")"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 8,
82
+ "metadata": {},
83
+ "outputs": [],
84
+ "source": [
85
+ "# retrieve k\n",
86
+ "k = 5"
87
+ ]
88
+ },
89
+ {
90
+ "cell_type": "code",
91
+ "execution_count": 9,
92
+ "metadata": {},
93
+ "outputs": [],
94
+ "source": [
95
+ "vector_store = Chroma.from_documents(chunks, embeddings)\n",
96
+ "vector_retriever = vector_store.as_retriever(search_kwargs={\"k\": k})"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": 10,
102
+ "metadata": {},
103
+ "outputs": [],
104
+ "source": [
105
+ "semantic_retriever = BM25Retriever.from_documents(chunks)\n",
106
+ "semantic_retriever.k = k"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "code",
111
+ "execution_count": 11,
112
+ "metadata": {},
113
+ "outputs": [],
114
+ "source": [
115
+ "ensemble_retriever = EnsembleRetriever(\n",
116
+ " retrievers=[vector_retriever, semantic_retriever], weights=[0.5, 0.5]\n",
117
+ ")"
118
+ ]
119
+ },
120
+ {
121
+ "cell_type": "code",
122
+ "execution_count": 13,
123
+ "metadata": {},
124
+ "outputs": [
125
+ {
126
+ "data": {
127
+ "application/vnd.jupyter.widget-view+json": {
128
+ "model_id": "e7be2fbc6d0b4866b0ec4605ab2919eb",
129
+ "version_major": 2,
130
+ "version_minor": 0
131
+ },
132
+ "text/plain": [
133
+ "Loading checkpoint shards: 0%| | 0/8 [00:00<?, ?it/s]"
134
+ ]
135
+ },
136
+ "metadata": {},
137
+ "output_type": "display_data"
138
+ },
139
+ {
140
+ "name": "stderr",
141
+ "output_type": "stream",
142
+ "text": [
143
+ "WARNING:root:Some parameters are on the meta device device because they were offloaded to the disk and cpu.\n"
144
+ ]
145
+ }
146
+ ],
147
+ "source": [
148
+ "pipe = pipeline(\n",
149
+ " \"text-generation\",\n",
150
+ " model=\"HuggingFaceH4/zephyr-7b-beta\",\n",
151
+ " torch_dtype=torch.bfloat16,\n",
152
+ " device_map=\"auto\",\n",
153
+ ")"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": 14,
159
+ "metadata": {},
160
+ "outputs": [
161
+ {
162
+ "name": "stderr",
163
+ "output_type": "stream",
164
+ "text": [
165
+ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
166
+ ]
167
+ },
168
+ {
169
+ "name": "stdout",
170
+ "output_type": "stream",
171
+ "text": [
172
+ "<|system|>\n",
173
+ "You are a friendly chatbot who always responds in the style of a pirate</s>\n",
174
+ "<|user|>\n",
175
+ "How many helicopters can a human eat in one sitting?</s>\n",
176
+ "<|assistant|>\n",
177
+ "Matey, I'm afraid no human can eat a helicopter, as it's not food. Helicopters are machines used for transportation and other purposes, not a source of nourishment. I'd suggest you stick to eating hearty meals of grog, seafood, and maybe some plundered booty if ya fancy it! Arrrr!\n"
178
+ ]
179
+ }
180
+ ],
181
+ "source": [
182
+ "# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating\n",
183
+ "messages = [\n",
184
+ " {\n",
185
+ " \"role\": \"system\",\n",
186
+ " \"content\": \"You are a friendly chatbot who always responds in the style of a pirate\",\n",
187
+ " },\n",
188
+ " {\"role\": \"user\", \"content\": \"How many helicopters can a human eat in one sitting?\"},\n",
189
+ "]\n",
190
+ "\n",
191
+ "\n",
192
+ "prompt = pipe.tokenizer.apply_chat_template(\n",
193
+ " messages, tokenize=False, add_generation_prompt=True\n",
194
+ ")\n",
195
+ "\n",
196
+ "\n",
197
+ "outputs = pipe(\n",
198
+ " prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95\n",
199
+ ")\n",
200
+ "\n",
201
+ "print(outputs[0][\"generated_text\"])"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": null,
207
+ "metadata": {},
208
+ "outputs": [],
209
+ "source": []
210
+ },
211
+ {
212
+ "cell_type": "markdown",
213
+ "metadata": {},
214
+ "source": [
215
+ "_____________________"
216
+ ]
217
+ },
218
+ {
219
+ "cell_type": "code",
220
+ "execution_count": 15,
221
+ "metadata": {},
222
+ "outputs": [],
223
+ "source": [
224
+ "import textwrap\n",
225
+ "\n",
226
+ "def wrap_text(text, width=90): # preserve_newlines\n",
227
+ " # Split the input text into lines based on newline characters\n",
228
+ " lines = text.split(\"\\n\")\n",
229
+ "\n",
230
+ " # Wrap each line individually\n",
231
+ " wrapped_lines = [textwrap.fill(line, width=width) for line in lines]\n",
232
+ "\n",
233
+ " # Join the wrapped lines back together using newline characters\n",
234
+ " wrapped_text = \"\\n\".join(wrapped_lines)\n",
235
+ "\n",
236
+ " return wrapped_text"
237
+ ]
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "execution_count": 16,
242
+ "metadata": {},
243
+ "outputs": [],
244
+ "source": [
245
+ "def generate(input_text, system_prompt=\"\", max_length=512):\n",
246
+ " if system_prompt != \"\":\n",
247
+ " system_prompt = system_prompt\n",
248
+ " else:\n",
249
+ " system_prompt = (\n",
250
+ " \"You are a friendly chatbot who always responds in the style of a pirate\"\n",
251
+ " )\n",
252
+ " messages = [\n",
253
+ " {\n",
254
+ " \"role\": \"system\",\n",
255
+ " \"content\": system_prompt,\n",
256
+ " },\n",
257
+ " {\"role\": \"user\", \"content\": input_text},\n",
258
+ " ]\n",
259
+ "\n",
260
+ " prompt = pipe.tokenizer.apply_chat_template(\n",
261
+ " messages, tokenize=False, add_generation_prompt=True\n",
262
+ " )\n",
263
+ "\n",
264
+ " outputs = pipe(\n",
265
+ " prompt,\n",
266
+ " max_new_tokens=max_length,\n",
267
+ " do_sample=True,\n",
268
+ " temperature=0.7,\n",
269
+ " top_k=50,\n",
270
+ " top_p=0.95,\n",
271
+ " )\n",
272
+ " text = outputs[0][\"generated_text\"]\n",
273
+ " text = text.replace(prompt, \"\", 1)\n",
274
+ " wrapped_text = wrap_text(text)\n",
275
+ " \n",
276
+ " print(wrapped_text)"
277
+ ]
278
+ },
279
+ {
280
+ "cell_type": "code",
281
+ "execution_count": null,
282
+ "metadata": {},
283
+ "outputs": [],
284
+ "source": [
285
+ "generate(\n",
286
+ " \"\"\"Alice: I don't know why, I'm struggling to maintain focus while studying. Any suggestion? \\n\\n Bob:\"\"\",\n",
287
+ " system_prompt=\"You are Zephyr, a LLM that generates great conversations. continue as Bob here\",\n",
288
+ " max_length=512,\n",
289
+ ")"
290
+ ]
291
+ }
292
+ ],
293
+ "metadata": {
294
+ "kernelspec": {
295
+ "display_name": "Python 3",
296
+ "language": "python",
297
+ "name": "python3"
298
+ },
299
+ "language_info": {
300
+ "codemirror_mode": {
301
+ "name": "ipython",
302
+ "version": 3
303
+ },
304
+ "file_extension": ".py",
305
+ "mimetype": "text/x-python",
306
+ "name": "python",
307
+ "nbconvert_exporter": "python",
308
+ "pygments_lexer": "ipython3",
309
+ "version": "3.9.0"
310
+ }
311
+ },
312
+ "nbformat": 4,
313
+ "nbformat_minor": 2
314
+ }
notebooks/chat_with_pdf_using_zephyr-7b_v4.ipynb ADDED
@@ -0,0 +1,663 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Using Zephyr 7B Beta Quantised Model\n",
8
+ "\n",
9
+ "* [TheBloke/zephyr-7B-beta-GGUF](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF)\n",
10
+ "* Used CTransformers wrapper"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "%pip install torch==2.2.1\n",
20
+ "%pip install langchain==0.1.9\n",
21
+ "%pip install langchain-community==0.0.24\n",
22
+ "%pip install ctransformers==0.2.27\n",
23
+ "%pip install streamlit==1.31.1\n",
24
+ "%pip install streamlit-extras==0.4.0\n",
25
+ "%pip install langchain==0.1.9\n",
26
+ "%pip install rank_bm25==0.2.2\n",
27
+ "%pip install pypdf==4.0.2\n",
28
+ "%pip install chromadb==0.4.24\n",
29
+ "%pip install tiktoken==0.6.0"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 1,
35
+ "metadata": {},
36
+ "outputs": [],
37
+ "source": [
38
+ "import os\n",
39
+ "from langchain_community.llms import CTransformers\n",
40
+ "from langchain import PromptTemplate, LLMChain"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 2,
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "model_type = \"mistral\"\n",
50
+ "model_id = \"TheBloke/zephyr-7B-beta-GGUF\"\n",
51
+ "model_file = \"zephyr-7b-beta.Q4_K_S.gguf\""
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": 3,
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "config = {\n",
61
+ " \"max_new_tokens\": 1024,\n",
62
+ " \"repetition_penalty\": 1.1,\n",
63
+ " \"temperature\": 1,\n",
64
+ " \"top_k\": 50,\n",
65
+ " \"top_p\": 0.9,\n",
66
+ " \"stream\": True,\n",
67
+ " \"threads\": int(os.cpu_count() / 2),\n",
68
+ "}"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": 4,
74
+ "metadata": {},
75
+ "outputs": [
76
+ {
77
+ "data": {
78
+ "application/vnd.jupyter.widget-view+json": {
79
+ "model_id": "6469959ce27843a6b808f7c92e6b6a74",
80
+ "version_major": 2,
81
+ "version_minor": 0
82
+ },
83
+ "text/plain": [
84
+ "Fetching 1 files: 0%| | 0/1 [00:00<?, ?it/s]"
85
+ ]
86
+ },
87
+ "metadata": {},
88
+ "output_type": "display_data"
89
+ },
90
+ {
91
+ "data": {
92
+ "application/vnd.jupyter.widget-view+json": {
93
+ "model_id": "4dd3533c42c94adcb46b36bfbe2748a3",
94
+ "version_major": 2,
95
+ "version_minor": 0
96
+ },
97
+ "text/plain": [
98
+ "Fetching 1 files: 0%| | 0/1 [00:00<?, ?it/s]"
99
+ ]
100
+ },
101
+ "metadata": {},
102
+ "output_type": "display_data"
103
+ },
104
+ {
105
+ "data": {
106
+ "application/vnd.jupyter.widget-view+json": {
107
+ "model_id": "76fda961588e489e8d5c749ccb426596",
108
+ "version_major": 2,
109
+ "version_minor": 0
110
+ },
111
+ "text/plain": [
112
+ "zephyr-7b-beta.Q4_K_S.gguf: 0%| | 0.00/4.14G [00:00<?, ?B/s]"
113
+ ]
114
+ },
115
+ "metadata": {},
116
+ "output_type": "display_data"
117
+ },
118
+ {
119
+ "name": "stderr",
120
+ "output_type": "stream",
121
+ "text": [
122
+ "Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/fe/17/fe17596731f84a0d03bece77489780bc7e068323c0aeca88b6393d3e9e65dd49/cafa0b85b2efc15ca33023f3b87f8d0c44ddcace16b3fb608280e0eb8f425cb1?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27zephyr-7b-beta.Q4_K_S.gguf%3B+filename%3D%22zephyr-7b-beta.Q4_K_S.gguf%22%3B&Expires=1709696299&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwOTY5NjI5OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzE3L2ZlMTc1OTY3MzFmODRhMGQwM2JlY2U3NzQ4OTc4MGJjN2UwNjgzMjNjMGFlY2E4OGI2MzkzZDNlOWU2NWRkNDkvY2FmYTBiODViMmVmYzE1Y2EzMzAyM2YzYjg3ZjhkMGM0NGRkY2FjZTE2YjNmYjYwODI4MGUwZWI4ZjQyNWNiMT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=JJymveuF19P%7EYsnDVvRKvHbsUjRrNso4dCIEhQ591C6Ponli%7EvQZXE3jKIWNH0ZG%7El1ERzgSns5Qdhx9ImLRLyCtq0szMjeb7eycm%7E8BBpBH3%7EUle4RQoGm1056cJbbOqbiCyTQpFsoRe6N3ivAxTn11BjMY1b-dAmZnWbL%7E%7EyyY3Og7h9YVXX3g%7E-3I5FaWIwv-GTwPPtGiYJGAP23wYFY%7Eax59dAkwC38V9qOwYGTwm1knXNIQhWVxrcykflJos57vJESMntXRc9PFn0BNu0ZXu%7EYd7nBcyk3%7ELOJjsTKHwP76D3guyIuXduUbpBRVGi1kTnjVfdyEvtDRwSIr3Q__&Key-Pair-Id=KCD77M1F0VK2B: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n",
123
+ "Trying to resume download...\n"
124
+ ]
125
+ },
126
+ {
127
+ "data": {
128
+ "application/vnd.jupyter.widget-view+json": {
129
+ "model_id": "c44f1b49b4434d71a9630f5f451be6d5",
130
+ "version_major": 2,
131
+ "version_minor": 0
132
+ },
133
+ "text/plain": [
134
+ "zephyr-7b-beta.Q4_K_S.gguf: 0%| | 0.00/4.14G [00:00<?, ?B/s]"
135
+ ]
136
+ },
137
+ "metadata": {},
138
+ "output_type": "display_data"
139
+ },
140
+ {
141
+ "name": "stderr",
142
+ "output_type": "stream",
143
+ "text": [
144
+ "Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/fe/17/fe17596731f84a0d03bece77489780bc7e068323c0aeca88b6393d3e9e65dd49/cafa0b85b2efc15ca33023f3b87f8d0c44ddcace16b3fb608280e0eb8f425cb1?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27zephyr-7b-beta.Q4_K_S.gguf%3B+filename%3D%22zephyr-7b-beta.Q4_K_S.gguf%22%3B&Expires=1709696299&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwOTY5NjI5OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzE3L2ZlMTc1OTY3MzFmODRhMGQwM2JlY2U3NzQ4OTc4MGJjN2UwNjgzMjNjMGFlY2E4OGI2MzkzZDNlOWU2NWRkNDkvY2FmYTBiODViMmVmYzE1Y2EzMzAyM2YzYjg3ZjhkMGM0NGRkY2FjZTE2YjNmYjYwODI4MGUwZWI4ZjQyNWNiMT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=JJymveuF19P%7EYsnDVvRKvHbsUjRrNso4dCIEhQ591C6Ponli%7EvQZXE3jKIWNH0ZG%7El1ERzgSns5Qdhx9ImLRLyCtq0szMjeb7eycm%7E8BBpBH3%7EUle4RQoGm1056cJbbOqbiCyTQpFsoRe6N3ivAxTn11BjMY1b-dAmZnWbL%7E%7EyyY3Og7h9YVXX3g%7E-3I5FaWIwv-GTwPPtGiYJGAP23wYFY%7Eax59dAkwC38V9qOwYGTwm1knXNIQhWVxrcykflJos57vJESMntXRc9PFn0BNu0ZXu%7EYd7nBcyk3%7ELOJjsTKHwP76D3guyIuXduUbpBRVGi1kTnjVfdyEvtDRwSIr3Q__&Key-Pair-Id=KCD77M1F0VK2B: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n",
145
+ "Trying to resume download...\n"
146
+ ]
147
+ },
148
+ {
149
+ "data": {
150
+ "application/vnd.jupyter.widget-view+json": {
151
+ "model_id": "6de1cf2f043e4af48b96f6efbbdc7eae",
152
+ "version_major": 2,
153
+ "version_minor": 0
154
+ },
155
+ "text/plain": [
156
+ "zephyr-7b-beta.Q4_K_S.gguf: 0%| | 0.00/4.14G [00:00<?, ?B/s]"
157
+ ]
158
+ },
159
+ "metadata": {},
160
+ "output_type": "display_data"
161
+ }
162
+ ],
163
+ "source": [
164
+ "init_model = CTransformers(model=model_id, model_file=model_file, model_type=model_type, **config, lib=\"avx2\")"
165
+ ]
166
+ },
167
+ {
168
+ "cell_type": "markdown",
169
+ "metadata": {},
170
+ "source": [
171
+ "## Without Prompt Template"
172
+ ]
173
+ },
174
+ {
175
+ "cell_type": "code",
176
+ "execution_count": 5,
177
+ "metadata": {},
178
+ "outputs": [],
179
+ "source": [
180
+ "query = \"what is the meaning of the life ?\""
181
+ ]
182
+ },
183
+ {
184
+ "cell_type": "code",
185
+ "execution_count": 6,
186
+ "metadata": {},
187
+ "outputs": [
188
+ {
189
+ "name": "stderr",
190
+ "output_type": "stream",
191
+ "text": [
192
+ "/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The function `__call__` was deprecated in LangChain 0.1.7 and will be removed in 0.2.0. Use invoke instead.\n",
193
+ " warn_deprecated(\n"
194
+ ]
195
+ },
196
+ {
197
+ "name": "stdout",
198
+ "output_type": "stream",
199
+ "text": [
200
+ "\n",
201
+ "\n",
202
+ "what happens after we die ?\n",
203
+ "\n",
204
+ "is there any god or creator ?\n",
205
+ "\n",
206
+ "who am I really ?\n",
207
+ "\n",
208
+ "these are the questions that have always fascinated human mind and kept us thinking for ages. These questions are so profound, yet simple and so personal. We all have our own answers to these questions, whether in form of religion, spirituality or philosophy, which become a part of our life philosophy as we grow up.\n",
209
+ "\n",
210
+ "But there is another dimension where people look beyond the boundaries of these religions and philosophies. They go into a quest for truth that goes deeper than what they have been taught by their religion or philosophy. They start looking within themselves to find the answers. This quest takes them on a journey of self-discovery, which is often referred to as Spirituality.\n",
211
+ "\n",
212
+ "Spirituality, at its core, is an intense thirst to know the truth about life and ourselves. It is a longing for connection with something greater than oneself – God or the Universe. The spiritual quest takes us on a journey of self-reflection and discovery where we learn to observe ourselves in our daily lives and situations as they arise. This brings deep insights into our own nature and enables us to let go\n"
213
+ ]
214
+ }
215
+ ],
216
+ "source": [
217
+ "result = init_model(query)\n",
218
+ "print(result)"
219
+ ]
220
+ },
221
+ {
222
+ "cell_type": "markdown",
223
+ "metadata": {},
224
+ "source": [
225
+ "## With Prompt Template"
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "execution_count": 7,
231
+ "metadata": {},
232
+ "outputs": [],
233
+ "source": [
234
+ "template = \"\"\"You are a helpful AI Assistant that follows instructions extremely well.\n",
235
+ "Question: {question}\n",
236
+ "\n",
237
+ "Answer: Let's think step by step and answer it faithfully.\n",
238
+ "\"\"\""
239
+ ]
240
+ },
241
+ {
242
+ "cell_type": "code",
243
+ "execution_count": 8,
244
+ "metadata": {},
245
+ "outputs": [],
246
+ "source": [
247
+ "prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
248
+ ]
249
+ },
250
+ {
251
+ "cell_type": "code",
252
+ "execution_count": 9,
253
+ "metadata": {},
254
+ "outputs": [],
255
+ "source": [
256
+ "chain = LLMChain(prompt=prompt, llm=init_model, verbose=True)"
257
+ ]
258
+ },
259
+ {
260
+ "cell_type": "code",
261
+ "execution_count": 10,
262
+ "metadata": {},
263
+ "outputs": [],
264
+ "source": [
265
+ "query = \"What is LLM ?\""
266
+ ]
267
+ },
268
+ {
269
+ "cell_type": "code",
270
+ "execution_count": 11,
271
+ "metadata": {},
272
+ "outputs": [
273
+ {
274
+ "name": "stderr",
275
+ "output_type": "stream",
276
+ "text": [
277
+ "/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The function `run` was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
278
+ " warn_deprecated(\n"
279
+ ]
280
+ },
281
+ {
282
+ "name": "stdout",
283
+ "output_type": "stream",
284
+ "text": [
285
+ "\n",
286
+ "\n",
287
+ "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
288
+ "Prompt after formatting:\n",
289
+ "\u001b[32;1m\u001b[1;3mYou are a helpful AI Assistant that follows instructions extremely well.\n",
290
+ "Question: What is LLM ?\n",
291
+ "\n",
292
+ "Answer: Let's think step by step and answer it faithfully.\n",
293
+ "\u001b[0m\n",
294
+ "\n",
295
+ "\u001b[1m> Finished chain.\u001b[0m\n"
296
+ ]
297
+ }
298
+ ],
299
+ "source": [
300
+ "result = chain.run(query)"
301
+ ]
302
+ },
303
+ {
304
+ "cell_type": "code",
305
+ "execution_count": 13,
306
+ "metadata": {},
307
+ "outputs": [
308
+ {
309
+ "name": "stdout",
310
+ "output_type": "stream",
311
+ "text": [
312
+ "\n",
313
+ "LLM stands for Large Language Model. It refers to a type of machine learning algorithm specifically designed to process and generate human-like language, typically in the form of text or speech. These models are called \"large\" because they require vast amounts of training data to learn the complex patterns and relationships within language. The ultimate goal of LLMs is to enable more natural and intuitive interactions between humans and machines through enhanced communication capabilities.\n"
314
+ ]
315
+ }
316
+ ],
317
+ "source": [
318
+ "print(result)"
319
+ ]
320
+ },
321
+ {
322
+ "cell_type": "code",
323
+ "execution_count": null,
324
+ "metadata": {},
325
+ "outputs": [],
326
+ "source": []
327
+ },
328
+ {
329
+ "cell_type": "markdown",
330
+ "metadata": {},
331
+ "source": [
332
+ "## RAG - Talk to PDF"
333
+ ]
334
+ },
335
+ {
336
+ "cell_type": "code",
337
+ "execution_count": 14,
338
+ "metadata": {},
339
+ "outputs": [],
340
+ "source": [
341
+ "import os\n",
342
+ "from langchain_community.llms import CTransformers\n",
343
+ "from langchain_community.document_loaders import PyPDFLoader\n",
344
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
345
+ "from langchain.vectorstores import Chroma\n",
346
+ "from langchain.retrievers import BM25Retriever, EnsembleRetriever\n",
347
+ "\n",
348
+ "from langchain_core.prompts import ChatPromptTemplate\n",
349
+ "from langchain_core.output_parsers import StrOutputParser\n",
350
+ "from langchain_core.runnables import RunnablePassthrough\n",
351
+ "\n",
352
+ "from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings"
353
+ ]
354
+ },
355
+ {
356
+ "cell_type": "markdown",
357
+ "metadata": {},
358
+ "source": [
359
+ "### Load Data"
360
+ ]
361
+ },
362
+ {
363
+ "cell_type": "code",
364
+ "execution_count": 21,
365
+ "metadata": {},
366
+ "outputs": [],
367
+ "source": [
368
+ "file_path = \"../data/Orca Progressive Learning from Complex.pdf\"\n",
369
+ "data_file = PyPDFLoader(file_path)\n",
370
+ "docs = data_file.load()"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "markdown",
375
+ "metadata": {},
376
+ "source": [
377
+ "### Split & Chunk Docs"
378
+ ]
379
+ },
380
+ {
381
+ "cell_type": "code",
382
+ "execution_count": 22,
383
+ "metadata": {},
384
+ "outputs": [],
385
+ "source": [
386
+ "# create chunks\n",
387
+ "splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n",
388
+ "chunks = splitter.split_documents(docs)"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "markdown",
393
+ "metadata": {},
394
+ "source": [
395
+ "### Load Embedder"
396
+ ]
397
+ },
398
+ {
399
+ "cell_type": "code",
400
+ "execution_count": 23,
401
+ "metadata": {},
402
+ "outputs": [],
403
+ "source": [
404
+ "HF_TOKEN = input(\"Enter your HuggingFace Token\")"
405
+ ]
406
+ },
407
+ {
408
+ "cell_type": "code",
409
+ "execution_count": 24,
410
+ "metadata": {},
411
+ "outputs": [],
412
+ "source": [
413
+ "# https://huggingface.co/BAAI/bge-base-en-v1.5\n",
414
+ "embeddings = HuggingFaceInferenceAPIEmbeddings(\n",
415
+ " api_key=HF_TOKEN, model_name=\"BAAI/bge-base-en-v1.5\"\n",
416
+ ")"
417
+ ]
418
+ },
419
+ {
420
+ "cell_type": "markdown",
421
+ "metadata": {},
422
+ "source": [
423
+ "### Retrievers"
424
+ ]
425
+ },
426
+ {
427
+ "cell_type": "code",
428
+ "execution_count": 25,
429
+ "metadata": {},
430
+ "outputs": [],
431
+ "source": [
432
+ "# retrieve k\n",
433
+ "k = 5"
434
+ ]
435
+ },
436
+ {
437
+ "cell_type": "markdown",
438
+ "metadata": {},
439
+ "source": [
440
+ "#### Vector Retriever"
441
+ ]
442
+ },
443
+ {
444
+ "cell_type": "code",
445
+ "execution_count": 26,
446
+ "metadata": {},
447
+ "outputs": [],
448
+ "source": [
449
+ "vector_store = Chroma.from_documents(chunks, embeddings)\n",
450
+ "vector_retriever = vector_store.as_retriever(search_kwargs={\"k\": k})"
451
+ ]
452
+ },
453
+ {
454
+ "cell_type": "markdown",
455
+ "metadata": {},
456
+ "source": [
457
+ "#### Semantic Retriever"
458
+ ]
459
+ },
460
+ {
461
+ "cell_type": "code",
462
+ "execution_count": 27,
463
+ "metadata": {},
464
+ "outputs": [],
465
+ "source": [
466
+ "semantic_retriever = BM25Retriever.from_documents(chunks)\n",
467
+ "semantic_retriever.k = k"
468
+ ]
469
+ },
470
+ {
471
+ "cell_type": "markdown",
472
+ "metadata": {},
473
+ "source": [
474
+ "#### Ensemble Retriever"
475
+ ]
476
+ },
477
+ {
478
+ "cell_type": "code",
479
+ "execution_count": 28,
480
+ "metadata": {},
481
+ "outputs": [],
482
+ "source": [
483
+ "ensemble_retriever = EnsembleRetriever(\n",
484
+ " retrievers=[vector_retriever, semantic_retriever], weights=[0.5, 0.5]\n",
485
+ ")"
486
+ ]
487
+ },
488
+ {
489
+ "cell_type": "markdown",
490
+ "metadata": {},
491
+ "source": [
492
+ "### Init LLM Model"
493
+ ]
494
+ },
495
+ {
496
+ "cell_type": "code",
497
+ "execution_count": 29,
498
+ "metadata": {},
499
+ "outputs": [],
500
+ "source": [
501
+ "model_type = \"mistral\"\n",
502
+ "model_id = \"TheBloke/zephyr-7B-beta-GGUF\"\n",
503
+ "model_file = \"zephyr-7b-beta.Q4_K_S.gguf\""
504
+ ]
505
+ },
506
+ {
507
+ "cell_type": "code",
508
+ "execution_count": 49,
509
+ "metadata": {},
510
+ "outputs": [],
511
+ "source": [
512
+ "config = {\n",
513
+ " \"max_new_tokens\": 2048,\n",
514
+ " \"repetition_penalty\": 1.1,\n",
515
+ " \"temperature\": 1,\n",
516
+ " \"top_k\": 50,\n",
517
+ " \"top_p\": 0.9,\n",
518
+ " \"stream\": True,\n",
519
+ " \"context_length\": 4096,\n",
520
+ " \"gpu_layers\": 0,\n",
521
+ " \"threads\": int(os.cpu_count() / 2),\n",
522
+ "}"
523
+ ]
524
+ },
525
+ {
526
+ "cell_type": "code",
527
+ "execution_count": 50,
528
+ "metadata": {},
529
+ "outputs": [
530
+ {
531
+ "data": {
532
+ "application/vnd.jupyter.widget-view+json": {
533
+ "model_id": "c0281307720f46be8386fb08c0d655ad",
534
+ "version_major": 2,
535
+ "version_minor": 0
536
+ },
537
+ "text/plain": [
538
+ "Fetching 1 files: 0%| | 0/1 [00:00<?, ?it/s]"
539
+ ]
540
+ },
541
+ "metadata": {},
542
+ "output_type": "display_data"
543
+ },
544
+ {
545
+ "data": {
546
+ "application/vnd.jupyter.widget-view+json": {
547
+ "model_id": "2974900a3e474614b538b006079881fd",
548
+ "version_major": 2,
549
+ "version_minor": 0
550
+ },
551
+ "text/plain": [
552
+ "Fetching 1 files: 0%| | 0/1 [00:00<?, ?it/s]"
553
+ ]
554
+ },
555
+ "metadata": {},
556
+ "output_type": "display_data"
557
+ }
558
+ ],
559
+ "source": [
560
+ "llm = CTransformers(\n",
561
+ " model=model_id, model_file=model_file, model_type=model_type, config=config, lib=\"avx2\"\n",
562
+ ")"
563
+ ]
564
+ },
565
+ {
566
+ "cell_type": "markdown",
567
+ "metadata": {},
568
+ "source": [
569
+ "### Prompting"
570
+ ]
571
+ },
572
+ {
573
+ "cell_type": "code",
574
+ "execution_count": 51,
575
+ "metadata": {},
576
+ "outputs": [],
577
+ "source": [
578
+ "template = \"\"\"You are a helpful AI Assistant that follows instructions extremely well.\n",
579
+ "Use the following context to answer user question.\n",
580
+ "\n",
581
+ "Think step by step before answering the question. \n",
582
+ "You will get a $100 tip if you provide correct answer.\n",
583
+ "\n",
584
+ "Context: {context}\n",
585
+ "\n",
586
+ "Question: {question}\n",
587
+ "\n",
588
+ "Answer: Let's think step by step and answer it faithfully.\n",
589
+ "\"\"\""
590
+ ]
591
+ },
592
+ {
593
+ "cell_type": "code",
594
+ "execution_count": 52,
595
+ "metadata": {},
596
+ "outputs": [],
597
+ "source": [
598
+ "prompt = ChatPromptTemplate.from_template(template)\n",
599
+ "output_parser = StrOutputParser()"
600
+ ]
601
+ },
602
+ {
603
+ "cell_type": "code",
604
+ "execution_count": 53,
605
+ "metadata": {},
606
+ "outputs": [],
607
+ "source": [
608
+ "chain = (\n",
609
+ " {\"context\": ensemble_retriever, \"question\": RunnablePassthrough()}\n",
610
+ " | prompt\n",
611
+ " | llm\n",
612
+ " | output_parser\n",
613
+ ")"
614
+ ]
615
+ },
616
+ {
617
+ "cell_type": "code",
618
+ "execution_count": 54,
619
+ "metadata": {},
620
+ "outputs": [
621
+ {
622
+ "name": "stdout",
623
+ "output_type": "stream",
624
+ "text": [
625
+ "\n",
626
+ "Instruction tuning is a technique that allows pre-trained language models to learn from input (natural language descriptions of the task) and response pairs, for example, \"{\\\"instruction\\\": \\\"Arrange the words in the given sentence to form a grammatically\\ncorrect sentence.\\\", \\\"input\\\": \\\"the quickly brown fox jumped\\\", \\\"output\\\": \\\"the brown\\nfox jumped quickly\\\"} .\". It is commonly used for both language-only and multimodal tasks, such as image captioning and visual question answering. In recent times, many works have adopted instruction tuning to train smaller language models with outputs generated from large foundation models like GPT family. However, these approaches face several challenges, including limited task diversity, query complexity, and small-scale training data that understate the benefits of such methods. The Orca model presented in this thesis addresses these limitations by combining self-supervised learning, reinforcement learning, and instruction tuning to achieve competitive performance on multiple zero-shot benchmarks, reducing the gap with proprietary LLMs like ChatGPT and GPT-4.\n"
627
+ ]
628
+ }
629
+ ],
630
+ "source": [
631
+ "print(chain.invoke(\"What is instruction tuning?\"))"
632
+ ]
633
+ },
634
+ {
635
+ "cell_type": "code",
636
+ "execution_count": null,
637
+ "metadata": {},
638
+ "outputs": [],
639
+ "source": []
640
+ }
641
+ ],
642
+ "metadata": {
643
+ "kernelspec": {
644
+ "display_name": "Python 3",
645
+ "language": "python",
646
+ "name": "python3"
647
+ },
648
+ "language_info": {
649
+ "codemirror_mode": {
650
+ "name": "ipython",
651
+ "version": 3
652
+ },
653
+ "file_extension": ".py",
654
+ "mimetype": "text/x-python",
655
+ "name": "python",
656
+ "nbconvert_exporter": "python",
657
+ "pygments_lexer": "ipython3",
658
+ "version": "3.9.0"
659
+ }
660
+ },
661
+ "nbformat": 4,
662
+ "nbformat_minor": 2
663
+ }
notebooks/reference/YT_Mistral_7B_Zephyr_ɒ_Testing.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/reference/zephyr_7b_beta.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==2.2.1
2
+ langchain==0.1.9
3
+ langchain-community==0.0.24
4
+ ctransformers==0.2.27
5
+ streamlit==1.31.1
6
+ streamlit-extras==0.4.0
7
+ tiktoken==0.6.0
8
+ langchain==0.1.9
9
+ rank_bm25==0.2.2
10
+ pypdf==4.0.2
11
+ chromadb==0.4.24
requirements_local.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ipykernel
2
+ ipywidgets
3
+ torch==2.2.1
4
+ langchain==0.1.9
5
+ langchain-community==0.0.24
6
+ ctransformers==0.2.27
7
+ streamlit==1.31.1
8
+ streamlit-extras==0.4.0
9
+ tiktoken==0.6.0
10
+ langchain==0.1.9
11
+ rank_bm25==0.2.2
12
+ pypdf==4.0.2
13
+ chromadb==0.4.24
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.9.0
src/.gitkeep ADDED
File without changes