init commit
Browse files- .github/workflows/sync_to_huggingface_space.yml +20 -0
- .gitignore +160 -0
- README.md +18 -6
- app.py +182 -0
- data/.gitkeep +0 -0
- doc/zephyr-7b.tar.gz +3 -0
- img/Zephyr-7b.png +0 -0
- models/.gitkeep +0 -0
- notebooks/chat_with_pdf_using_zephyr-7b_v1.ipynb +329 -0
- notebooks/chat_with_pdf_using_zephyr-7b_v2.ipynb +222 -0
- notebooks/chat_with_pdf_using_zephyr-7b_v3.ipynb +314 -0
- notebooks/chat_with_pdf_using_zephyr-7b_v4.ipynb +663 -0
- notebooks/reference/YT_Mistral_7B_Zephyr_ɒ_Testing.ipynb +0 -0
- notebooks/reference/zephyr_7b_beta.ipynb +0 -0
- requirements.txt +11 -0
- requirements_local.txt +13 -0
- runtime.txt +1 -0
- src/.gitkeep +0 -0
.github/workflows/sync_to_huggingface_space.yml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Sync to HuggingFace Space
|
2 |
+
on:
|
3 |
+
push:
|
4 |
+
branches: [main]
|
5 |
+
|
6 |
+
# to run this workflow manually from the Actions tab
|
7 |
+
workflow_dispatch:
|
8 |
+
|
9 |
+
jobs:
|
10 |
+
sync-to-hub:
|
11 |
+
runs-on: ubuntu-latest
|
12 |
+
steps:
|
13 |
+
- uses: actions/checkout@v4
|
14 |
+
with:
|
15 |
+
fetch-depth: 0
|
16 |
+
lfs: true
|
17 |
+
- name: Push to hub
|
18 |
+
env:
|
19 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
20 |
+
run: git push --force https://thivav:[email protected]/spaces/thivav/chat_with_pdf_using_zephyr-7b-beta main
|
.gitignore
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
README.md
CHANGED
@@ -1,12 +1,24 @@
|
|
1 |
---
|
2 |
-
title: Chat With Pdf Using Zephyr-7b-
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.31.1
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Chat With Pdf Using Zephyr-7b-Beta
|
3 |
+
emoji: 🗣📢
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: green
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.31.1
|
8 |
app_file: app.py
|
9 |
+
pinned: true
|
10 |
---
|
11 |
|
12 |
+
![Zephyr-7b-beta](/img/Zephyr-7b.png)
|
13 |
+
|
14 |
+
# Chat with PDF using Zephyr-7b 🗣📢
|
15 |
+
|
16 |
+
#RAG | #Semantic | #Embedding | #HybridSearch | #EnsembleRetriever | #BAAI-Embeddings
|
17 |
+
|
18 |
+
Chat with pdf using [Zephyr-7b LLM](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
|
19 |
+
|
20 |
+
- [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta?)
|
21 |
+
- Zephyr-7b finetuned from [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
|
22 |
+
- [Embeddings](https://huggingface.co/BAAI/bge-base-en-v1.5)
|
23 |
+
|
24 |
+
[Chat with PDF using Zephyr-7b Beta - Playground](https://huggingface.co/spaces/thivav/chat_with_pdf_using_zephyr-7b-beta)
|
app.py
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import tempfile
|
3 |
+
|
4 |
+
import streamlit as st
|
5 |
+
from langchain.chains import ConversationalRetrievalChain
|
6 |
+
from langchain.memory import ConversationBufferMemory
|
7 |
+
from langchain.retrievers import EnsembleRetriever
|
8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
+
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
|
10 |
+
from langchain_community.document_loaders import PyPDFLoader
|
11 |
+
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
|
12 |
+
from langchain_community.llms import CTransformers
|
13 |
+
from langchain_community.retrievers import BM25Retriever
|
14 |
+
from langchain_community.vectorstores import Chroma
|
15 |
+
from streamlit_extras.add_vertical_space import add_vertical_space
|
16 |
+
|
17 |
+
|
18 |
+
@st.cache_resource(ttl="1h")
|
19 |
+
def get_retriever(pdf_files):
|
20 |
+
"""get retriever"""
|
21 |
+
|
22 |
+
docs = []
|
23 |
+
temp_dir = tempfile.TemporaryDirectory()
|
24 |
+
for pdf_file in pdf_files:
|
25 |
+
temp_pdf_file_path = os.path.join(temp_dir.name, pdf_file.name)
|
26 |
+
|
27 |
+
with open(temp_pdf_file_path, "wb") as f:
|
28 |
+
f.write(pdf_file.getvalue())
|
29 |
+
|
30 |
+
loader = PyPDFLoader(temp_pdf_file_path)
|
31 |
+
docs.extend(loader.load())
|
32 |
+
|
33 |
+
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
34 |
+
chunk_size=1500, chunk_overlap=200
|
35 |
+
)
|
36 |
+
chunks = text_splitter.split_documents(docs)
|
37 |
+
|
38 |
+
# get huggingface token from env secret
|
39 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
40 |
+
|
41 |
+
# embeddings
|
42 |
+
embeddings = HuggingFaceInferenceAPIEmbeddings(
|
43 |
+
api_key=HF_TOKEN,
|
44 |
+
model_name="BAAI/bge-base-en-v1.5",
|
45 |
+
)
|
46 |
+
|
47 |
+
# retrieve k
|
48 |
+
k = 5
|
49 |
+
|
50 |
+
# vector retriever
|
51 |
+
vector_store = Chroma.from_documents(chunks, embeddings)
|
52 |
+
vector_retriever = vector_store.as_retriever(search_kwargs={"k": k})
|
53 |
+
|
54 |
+
# semantic retriever
|
55 |
+
semantic_retriever = BM25Retriever.from_documents(chunks)
|
56 |
+
semantic_retriever.k = k
|
57 |
+
|
58 |
+
# ensemble retriever
|
59 |
+
ensemble_retriever = EnsembleRetriever(
|
60 |
+
retrievers=[vector_retriever, semantic_retriever], weights=[0.5, 0.5]
|
61 |
+
)
|
62 |
+
|
63 |
+
return ensemble_retriever
|
64 |
+
|
65 |
+
|
66 |
+
@st.cache_resource(ttl="1h")
|
67 |
+
def initialize_llm(_retriever):
|
68 |
+
"""initialize llm"""
|
69 |
+
|
70 |
+
# load llm model
|
71 |
+
model_type = "mistral"
|
72 |
+
model_id = "TheBloke/zephyr-7B-beta-GGUF"
|
73 |
+
model_file = "zephyr-7b-beta.Q4_K_S.gguf"
|
74 |
+
|
75 |
+
config = {
|
76 |
+
"max_new_tokens": 2048,
|
77 |
+
"repetition_penalty": 1.1,
|
78 |
+
"temperature": 1,
|
79 |
+
"top_k": 50,
|
80 |
+
"top_p": 0.9,
|
81 |
+
"stream": True,
|
82 |
+
"context_length": 4096,
|
83 |
+
"gpu_layers": 0,
|
84 |
+
"threads": int(os.cpu_count()),
|
85 |
+
}
|
86 |
+
|
87 |
+
llm = CTransformers(
|
88 |
+
model=model_id,
|
89 |
+
model_file=model_file,
|
90 |
+
model_type=model_type,
|
91 |
+
config=config,
|
92 |
+
lib="avx2",
|
93 |
+
)
|
94 |
+
|
95 |
+
chat_history = StreamlitChatMessageHistory()
|
96 |
+
|
97 |
+
# init chat history memory
|
98 |
+
memory = ConversationBufferMemory(
|
99 |
+
memory_key="chat_history", chat_memory=chat_history, return_messages=True
|
100 |
+
)
|
101 |
+
|
102 |
+
chain = ConversationalRetrievalChain.from_llm(
|
103 |
+
llm, retriever=_retriever, memory=memory, verbose=False
|
104 |
+
)
|
105 |
+
|
106 |
+
return chain, chat_history
|
107 |
+
|
108 |
+
|
109 |
+
def main():
|
110 |
+
"""main func"""
|
111 |
+
|
112 |
+
st.set_page_config(
|
113 |
+
page_title="Talk to PDF using Zephyr-7B-Beta",
|
114 |
+
page_icon="📰",
|
115 |
+
layout="centered",
|
116 |
+
initial_sidebar_state="expanded",
|
117 |
+
)
|
118 |
+
|
119 |
+
st.header("Talk to PDF files 📰", divider="rainbow")
|
120 |
+
st.subheader(
|
121 |
+
"Enjoy :red[talking] with :green[PDF] files using :sunglasses: Zephyr-7B-Beta"
|
122 |
+
)
|
123 |
+
st.markdown(
|
124 |
+
"""
|
125 |
+
* Used the [zephyr-7b-beta.Q4_K_S.gguf](https://huggingface.co/TheBloke/zephyr-7B-alpha-GGUF/blob/main/zephyr-7b-alpha.Q4_K_S.gguf) quantised
|
126 |
+
version of [Zephyr-7B Beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) model
|
127 |
+
from the [TheBloke/zephyr-7B-beta-GGUF](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF) repositry.
|
128 |
+
___
|
129 |
+
"""
|
130 |
+
)
|
131 |
+
|
132 |
+
st.sidebar.title("Talk to PDF 📰")
|
133 |
+
st.sidebar.markdown(
|
134 |
+
"[Checkout the repository](https://github.com/ThivaV/chat_with_pdf_using_zephyr-7b)"
|
135 |
+
)
|
136 |
+
st.sidebar.markdown(
|
137 |
+
"""
|
138 |
+
### This is a LLM powered chatbot, built using:
|
139 |
+
|
140 |
+
* [Streamlit](https://streamlit.io)
|
141 |
+
* [LangChain](https://python.langchain.com/)
|
142 |
+
* [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
|
143 |
+
* [TheBloke/zephyr-7B-beta-GGUF](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF)
|
144 |
+
* [CTransformers](https://github.com/marella/ctransformers)
|
145 |
+
* [Embeddings](https://huggingface.co/BAAI/bge-base-en-v1.5)
|
146 |
+
* [Chroma](https://docs.trychroma.com/?lang=py)
|
147 |
+
___
|
148 |
+
"""
|
149 |
+
)
|
150 |
+
|
151 |
+
add_vertical_space(2)
|
152 |
+
|
153 |
+
upload_pdf_files = st.sidebar.file_uploader(
|
154 |
+
"Upload a pdf files 📤", type="pdf", accept_multiple_files=True
|
155 |
+
)
|
156 |
+
|
157 |
+
if not upload_pdf_files:
|
158 |
+
st.info("👈 :red[Please upload pdf files] ⛔")
|
159 |
+
st.stop()
|
160 |
+
|
161 |
+
retriever = get_retriever(upload_pdf_files)
|
162 |
+
|
163 |
+
chain, chat_history = initialize_llm(retriever)
|
164 |
+
|
165 |
+
# load previous chat history
|
166 |
+
# re-draw the chat history in the chat window
|
167 |
+
for message in chat_history.messages:
|
168 |
+
st.chat_message(message.type).write(message.content)
|
169 |
+
|
170 |
+
if prompt := st.chat_input("Ask questions"):
|
171 |
+
with st.chat_message("human"):
|
172 |
+
st.markdown(prompt)
|
173 |
+
|
174 |
+
response = chain.invoke(prompt)
|
175 |
+
|
176 |
+
with st.chat_message("ai"):
|
177 |
+
st.write(response["answer"])
|
178 |
+
|
179 |
+
|
180 |
+
if __name__ == "__main__":
|
181 |
+
# init main func
|
182 |
+
main()
|
data/.gitkeep
ADDED
File without changes
|
doc/zephyr-7b.tar.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29b840eef6b47880bff422f5c6cea2fbe19fcd6c3831e78a0e56ec669a8654b0
|
3 |
+
size 3402315
|
img/Zephyr-7b.png
ADDED
models/.gitkeep
ADDED
File without changes
|
notebooks/chat_with_pdf_using_zephyr-7b_v1.ipynb
ADDED
@@ -0,0 +1,329 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"# HuggingFaceHub API method"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "code",
|
12 |
+
"execution_count": 1,
|
13 |
+
"metadata": {},
|
14 |
+
"outputs": [],
|
15 |
+
"source": [
|
16 |
+
"from langchain_community.document_loaders import PyPDFLoader\n",
|
17 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
18 |
+
"from langchain.vectorstores import Chroma\n",
|
19 |
+
"from langchain.retrievers import BM25Retriever, EnsembleRetriever\n",
|
20 |
+
"\n",
|
21 |
+
"from langchain_core.prompts import ChatPromptTemplate\n",
|
22 |
+
"from langchain_core.output_parsers import StrOutputParser\n",
|
23 |
+
"from langchain_core.runnables import RunnablePassthrough\n",
|
24 |
+
"\n",
|
25 |
+
"from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings\n",
|
26 |
+
"from langchain_community.llms import HuggingFaceHub"
|
27 |
+
]
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"cell_type": "markdown",
|
31 |
+
"metadata": {},
|
32 |
+
"source": [
|
33 |
+
"## Load Data"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"cell_type": "code",
|
38 |
+
"execution_count": 2,
|
39 |
+
"metadata": {},
|
40 |
+
"outputs": [],
|
41 |
+
"source": [
|
42 |
+
"file_path = \"../data/Orca Progressive Learning from Complex.pdf\"\n",
|
43 |
+
"data_file = PyPDFLoader(file_path)\n",
|
44 |
+
"docs = data_file.load()"
|
45 |
+
]
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"cell_type": "markdown",
|
49 |
+
"metadata": {},
|
50 |
+
"source": [
|
51 |
+
"## Split & Chunk Docs"
|
52 |
+
]
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"cell_type": "code",
|
56 |
+
"execution_count": 3,
|
57 |
+
"metadata": {},
|
58 |
+
"outputs": [],
|
59 |
+
"source": [
|
60 |
+
"# create chunks\n",
|
61 |
+
"splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n",
|
62 |
+
"chunks = splitter.split_documents(docs)"
|
63 |
+
]
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"cell_type": "markdown",
|
67 |
+
"metadata": {},
|
68 |
+
"source": [
|
69 |
+
"## Load Embedder"
|
70 |
+
]
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"cell_type": "code",
|
74 |
+
"execution_count": 4,
|
75 |
+
"metadata": {},
|
76 |
+
"outputs": [],
|
77 |
+
"source": [
|
78 |
+
"HF_TOKEN = input(\"Enter your HuggingFace Token\")"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"cell_type": "code",
|
83 |
+
"execution_count": 5,
|
84 |
+
"metadata": {},
|
85 |
+
"outputs": [],
|
86 |
+
"source": [
|
87 |
+
"# https://huggingface.co/BAAI/bge-base-en-v1.5\n",
|
88 |
+
"embeddings = HuggingFaceInferenceAPIEmbeddings(\n",
|
89 |
+
" api_key=HF_TOKEN, model_name=\"BAAI/bge-base-en-v1.5\"\n",
|
90 |
+
")"
|
91 |
+
]
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"cell_type": "code",
|
95 |
+
"execution_count": 6,
|
96 |
+
"metadata": {},
|
97 |
+
"outputs": [],
|
98 |
+
"source": [
|
99 |
+
"# retrieve k\n",
|
100 |
+
"k = 5"
|
101 |
+
]
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"cell_type": "markdown",
|
105 |
+
"metadata": {},
|
106 |
+
"source": [
|
107 |
+
"## Vector Retriever"
|
108 |
+
]
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"cell_type": "code",
|
112 |
+
"execution_count": 7,
|
113 |
+
"metadata": {},
|
114 |
+
"outputs": [],
|
115 |
+
"source": [
|
116 |
+
"vector_store = Chroma.from_documents(chunks, embeddings)"
|
117 |
+
]
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"cell_type": "code",
|
121 |
+
"execution_count": 8,
|
122 |
+
"metadata": {},
|
123 |
+
"outputs": [],
|
124 |
+
"source": [
|
125 |
+
"vector_retriever = vector_store.as_retriever(search_kwargs={\"k\": k})"
|
126 |
+
]
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"cell_type": "markdown",
|
130 |
+
"metadata": {},
|
131 |
+
"source": [
|
132 |
+
"## Semantic Retriever"
|
133 |
+
]
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"cell_type": "code",
|
137 |
+
"execution_count": 9,
|
138 |
+
"metadata": {},
|
139 |
+
"outputs": [],
|
140 |
+
"source": [
|
141 |
+
"semantic_retriever = BM25Retriever.from_documents(chunks)\n",
|
142 |
+
"semantic_retriever.k = k"
|
143 |
+
]
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"cell_type": "markdown",
|
147 |
+
"metadata": {},
|
148 |
+
"source": [
|
149 |
+
"## Ensemble Retriever"
|
150 |
+
]
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"cell_type": "code",
|
154 |
+
"execution_count": 10,
|
155 |
+
"metadata": {},
|
156 |
+
"outputs": [],
|
157 |
+
"source": [
|
158 |
+
"ensemble_retriever = EnsembleRetriever(\n",
|
159 |
+
" retrievers=[vector_retriever, semantic_retriever],\n",
|
160 |
+
" weights=[0.5, 0.5]\n",
|
161 |
+
")"
|
162 |
+
]
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"cell_type": "markdown",
|
166 |
+
"metadata": {},
|
167 |
+
"source": [
|
168 |
+
"### LLM"
|
169 |
+
]
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"cell_type": "code",
|
173 |
+
"execution_count": 11,
|
174 |
+
"metadata": {},
|
175 |
+
"outputs": [
|
176 |
+
{
|
177 |
+
"name": "stderr",
|
178 |
+
"output_type": "stream",
|
179 |
+
"text": [
|
180 |
+
"/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The class `langchain_community.llms.huggingface_hub.HuggingFaceHub` was deprecated in langchain-community 0.0.21 and will be removed in 0.2.0. Use HuggingFaceEndpoint instead.\n",
|
181 |
+
" warn_deprecated(\n"
|
182 |
+
]
|
183 |
+
}
|
184 |
+
],
|
185 |
+
"source": [
|
186 |
+
"# HuggingFaceH4/zephyr-7b-beta\n",
|
187 |
+
"llm = HuggingFaceHub(\n",
|
188 |
+
" repo_id=\"HuggingFaceH4/zephyr-7b-beta\", \n",
|
189 |
+
" model_kwargs={\"temperature\": 0.1, \"max_new_tokens\": 1024},\n",
|
190 |
+
" huggingfacehub_api_token=HF_TOKEN\n",
|
191 |
+
")"
|
192 |
+
]
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"cell_type": "markdown",
|
196 |
+
"metadata": {},
|
197 |
+
"source": [
|
198 |
+
"### Prompting"
|
199 |
+
]
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"cell_type": "code",
|
203 |
+
"execution_count": 12,
|
204 |
+
"metadata": {},
|
205 |
+
"outputs": [],
|
206 |
+
"source": [
|
207 |
+
"template = \"\"\"\n",
|
208 |
+
"<|system|>>\n",
|
209 |
+
"You are a helpful AI Assistant that follows instructions extremely well.\n",
|
210 |
+
"Use the following context to answer user question.\n",
|
211 |
+
"\n",
|
212 |
+
"Think step by step before answering the question. \n",
|
213 |
+
"You will get a $100 tip if you provide correct answer.\n",
|
214 |
+
"\n",
|
215 |
+
"CONTEXT: {context}\n",
|
216 |
+
"</s>\n",
|
217 |
+
"<|user|>\n",
|
218 |
+
"{query}\n",
|
219 |
+
"</s>\n",
|
220 |
+
"<|assistant|>\n",
|
221 |
+
"\"\"\""
|
222 |
+
]
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"cell_type": "code",
|
226 |
+
"execution_count": 13,
|
227 |
+
"metadata": {},
|
228 |
+
"outputs": [],
|
229 |
+
"source": [
|
230 |
+
"prompt = ChatPromptTemplate.from_template(template)\n",
|
231 |
+
"output_parser = StrOutputParser()"
|
232 |
+
]
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"cell_type": "code",
|
236 |
+
"execution_count": 14,
|
237 |
+
"metadata": {},
|
238 |
+
"outputs": [],
|
239 |
+
"source": [
|
240 |
+
"chain = (\n",
|
241 |
+
" {\"context\": ensemble_retriever, \"query\": RunnablePassthrough()}\n",
|
242 |
+
" | prompt\n",
|
243 |
+
" | llm\n",
|
244 |
+
" | output_parser\n",
|
245 |
+
")"
|
246 |
+
]
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"cell_type": "code",
|
250 |
+
"execution_count": 15,
|
251 |
+
"metadata": {},
|
252 |
+
"outputs": [
|
253 |
+
{
|
254 |
+
"ename": "HfHubHTTPError",
|
255 |
+
"evalue": "429 Client Error: Too Many Requests for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta (Request ID: MB9PO09bJU8rFNr1BbEry)\n\nRate limit reached. You reached free usage limit (reset hourly). Please subscribe to a plan at https://huggingface.co/pricing to use the API at this rate",
|
256 |
+
"output_type": "error",
|
257 |
+
"traceback": [
|
258 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
259 |
+
"\u001b[0;31mHTTPError\u001b[0m Traceback (most recent call last)",
|
260 |
+
"File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/huggingface_hub/utils/_errors.py:304\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 304\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
261 |
+
"File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/requests/models.py:1021\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1020\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1021\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
|
262 |
+
"\u001b[0;31mHTTPError\u001b[0m: 429 Client Error: Too Many Requests for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta",
|
263 |
+
"\nThe above exception was the direct cause of the following exception:\n",
|
264 |
+
"\u001b[0;31mHfHubHTTPError\u001b[0m Traceback (most recent call last)",
|
265 |
+
"Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mchain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mWhat is instruction tuning?\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n",
|
266 |
+
"File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/runnables/base.py:2056\u001b[0m, in \u001b[0;36mRunnableSequence.invoke\u001b[0;34m(self, input, config)\u001b[0m\n\u001b[1;32m 2054\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 2055\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, step \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msteps):\n\u001b[0;32m-> 2056\u001b[0m \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mstep\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2057\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2058\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# mark each step as a child run\u001b[39;49;00m\n\u001b[1;32m 2059\u001b[0m \u001b[43m \u001b[49m\u001b[43mpatch_config\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2060\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_child\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mseq:step:\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mi\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2061\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2062\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2063\u001b[0m \u001b[38;5;66;03m# finish the root run\u001b[39;00m\n\u001b[1;32m 2064\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
267 |
+
"File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/language_models/llms.py:273\u001b[0m, in \u001b[0;36mBaseLLM.invoke\u001b[0;34m(self, input, config, stop, **kwargs)\u001b[0m\n\u001b[1;32m 263\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minvoke\u001b[39m(\n\u001b[1;32m 264\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 265\u001b[0m \u001b[38;5;28minput\u001b[39m: LanguageModelInput,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 270\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[1;32m 271\u001b[0m config \u001b[38;5;241m=\u001b[39m ensure_config(config)\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m (\n\u001b[0;32m--> 273\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_prompt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 274\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_convert_input\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 275\u001b[0m \u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 276\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcallbacks\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 277\u001b[0m \u001b[43m \u001b[49m\u001b[43mtags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtags\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmetadata\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 279\u001b[0m \u001b[43m \u001b[49m\u001b[43mrun_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrun_name\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 280\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 281\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 282\u001b[0m \u001b[38;5;241m.\u001b[39mgenerations[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 283\u001b[0m \u001b[38;5;241m.\u001b[39mtext\n\u001b[1;32m 284\u001b[0m )\n",
|
268 |
+
"File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/language_models/llms.py:568\u001b[0m, in \u001b[0;36mBaseLLM.generate_prompt\u001b[0;34m(self, prompts, stop, callbacks, **kwargs)\u001b[0m\n\u001b[1;32m 560\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_prompt\u001b[39m(\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 562\u001b[0m prompts: List[PromptValue],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 565\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 566\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m LLMResult:\n\u001b[1;32m 567\u001b[0m prompt_strings \u001b[38;5;241m=\u001b[39m [p\u001b[38;5;241m.\u001b[39mto_string() \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m prompts]\n\u001b[0;32m--> 568\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt_strings\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
269 |
+
"File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/language_models/llms.py:741\u001b[0m, in \u001b[0;36mBaseLLM.generate\u001b[0;34m(self, prompts, stop, callbacks, tags, metadata, run_name, **kwargs)\u001b[0m\n\u001b[1;32m 725\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 726\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAsked to cache, but no cache found at `langchain.cache`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 727\u001b[0m )\n\u001b[1;32m 728\u001b[0m run_managers \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 729\u001b[0m callback_manager\u001b[38;5;241m.\u001b[39mon_llm_start(\n\u001b[1;32m 730\u001b[0m dumpd(\u001b[38;5;28mself\u001b[39m),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 739\u001b[0m )\n\u001b[1;32m 740\u001b[0m ]\n\u001b[0;32m--> 741\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 742\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mbool\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mnew_arg_supported\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 743\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 744\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output\n\u001b[1;32m 745\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(missing_prompts) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
|
270 |
+
"File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/language_models/llms.py:605\u001b[0m, in \u001b[0;36mBaseLLM._generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 603\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m run_manager \u001b[38;5;129;01min\u001b[39;00m run_managers:\n\u001b[1;32m 604\u001b[0m run_manager\u001b[38;5;241m.\u001b[39mon_llm_error(e, response\u001b[38;5;241m=\u001b[39mLLMResult(generations\u001b[38;5;241m=\u001b[39m[]))\n\u001b[0;32m--> 605\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 606\u001b[0m flattened_outputs \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39mflatten()\n\u001b[1;32m 607\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m manager, flattened_output \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(run_managers, flattened_outputs):\n",
|
271 |
+
"File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/language_models/llms.py:592\u001b[0m, in \u001b[0;36mBaseLLM._generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 582\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_generate_helper\u001b[39m(\n\u001b[1;32m 583\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 584\u001b[0m prompts: List[\u001b[38;5;28mstr\u001b[39m],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 589\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m LLMResult:\n\u001b[1;32m 590\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 591\u001b[0m output \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m--> 592\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# TODO: support multiple run managers\u001b[39;49;00m\n\u001b[1;32m 596\u001b[0m \u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_managers\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 598\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 599\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m new_arg_supported\n\u001b[1;32m 600\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_generate(prompts, stop\u001b[38;5;241m=\u001b[39mstop)\n\u001b[1;32m 601\u001b[0m )\n\u001b[1;32m 602\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 603\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m run_manager \u001b[38;5;129;01min\u001b[39;00m run_managers:\n",
|
272 |
+
"File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/language_models/llms.py:1177\u001b[0m, in \u001b[0;36mLLM._generate\u001b[0;34m(self, prompts, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 1174\u001b[0m new_arg_supported \u001b[38;5;241m=\u001b[39m inspect\u001b[38;5;241m.\u001b[39msignature(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call)\u001b[38;5;241m.\u001b[39mparameters\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrun_manager\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1175\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m prompt \u001b[38;5;129;01min\u001b[39;00m prompts:\n\u001b[1;32m 1176\u001b[0m text \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m-> 1177\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1178\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m new_arg_supported\n\u001b[1;32m 1179\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call(prompt, stop\u001b[38;5;241m=\u001b[39mstop, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1180\u001b[0m )\n\u001b[1;32m 1181\u001b[0m generations\u001b[38;5;241m.\u001b[39mappend([Generation(text\u001b[38;5;241m=\u001b[39mtext)])\n\u001b[1;32m 1182\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m LLMResult(generations\u001b[38;5;241m=\u001b[39mgenerations)\n",
|
273 |
+
"File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_community/llms/huggingface_hub.py:135\u001b[0m, in \u001b[0;36mHuggingFaceHub._call\u001b[0;34m(self, prompt, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 132\u001b[0m _model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_kwargs \u001b[38;5;129;01mor\u001b[39;00m {}\n\u001b[1;32m 133\u001b[0m parameters \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m_model_kwargs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs}\n\u001b[0;32m--> 135\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 136\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minputs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparameters\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtask\u001b[49m\n\u001b[1;32m 137\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 138\u001b[0m response \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mloads(response\u001b[38;5;241m.\u001b[39mdecode())\n\u001b[1;32m 139\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124merror\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m response:\n",
|
274 |
+
"File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/huggingface_hub/inference/_client.py:242\u001b[0m, in \u001b[0;36mInferenceClient.post\u001b[0;34m(self, json, data, model, task, stream)\u001b[0m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InferenceTimeoutError(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInference call timed out: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merror\u001b[39;00m \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 241\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 242\u001b[0m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\u001b[38;5;241m.\u001b[39miter_lines() \u001b[38;5;28;01mif\u001b[39;00m stream \u001b[38;5;28;01melse\u001b[39;00m response\u001b[38;5;241m.\u001b[39mcontent\n\u001b[1;32m 244\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m error:\n",
|
275 |
+
"File \u001b[0;32m/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/huggingface_hub/utils/_errors.py:362\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m BadRequestError(message, response\u001b[38;5;241m=\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 360\u001b[0m \u001b[38;5;66;03m# Convert `HTTPError` into a `HfHubHTTPError` to display request information\u001b[39;00m\n\u001b[1;32m 361\u001b[0m \u001b[38;5;66;03m# as well (request id and/or server error message)\u001b[39;00m\n\u001b[0;32m--> 362\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HfHubHTTPError(\u001b[38;5;28mstr\u001b[39m(e), response\u001b[38;5;241m=\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n",
|
276 |
+
"\u001b[0;31mHfHubHTTPError\u001b[0m: 429 Client Error: Too Many Requests for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta (Request ID: MB9PO09bJU8rFNr1BbEry)\n\nRate limit reached. You reached free usage limit (reset hourly). Please subscribe to a plan at https://huggingface.co/pricing to use the API at this rate"
|
277 |
+
]
|
278 |
+
}
|
279 |
+
],
|
280 |
+
"source": [
|
281 |
+
"print(chain.invoke(\"What is instruction tuning?\"))"
|
282 |
+
]
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"cell_type": "code",
|
286 |
+
"execution_count": null,
|
287 |
+
"metadata": {},
|
288 |
+
"outputs": [],
|
289 |
+
"source": [
|
290 |
+
"print(chain.invoke(\"How does Orca compares to ChatGPT?\"))"
|
291 |
+
]
|
292 |
+
},
|
293 |
+
{
|
294 |
+
"cell_type": "code",
|
295 |
+
"execution_count": null,
|
296 |
+
"metadata": {},
|
297 |
+
"outputs": [],
|
298 |
+
"source": []
|
299 |
+
},
|
300 |
+
{
|
301 |
+
"cell_type": "markdown",
|
302 |
+
"metadata": {},
|
303 |
+
"source": [
|
304 |
+
"_______________________________________________________"
|
305 |
+
]
|
306 |
+
}
|
307 |
+
],
|
308 |
+
"metadata": {
|
309 |
+
"kernelspec": {
|
310 |
+
"display_name": "Python 3",
|
311 |
+
"language": "python",
|
312 |
+
"name": "python3"
|
313 |
+
},
|
314 |
+
"language_info": {
|
315 |
+
"codemirror_mode": {
|
316 |
+
"name": "ipython",
|
317 |
+
"version": 3
|
318 |
+
},
|
319 |
+
"file_extension": ".py",
|
320 |
+
"mimetype": "text/x-python",
|
321 |
+
"name": "python",
|
322 |
+
"nbconvert_exporter": "python",
|
323 |
+
"pygments_lexer": "ipython3",
|
324 |
+
"version": "3.9.0"
|
325 |
+
}
|
326 |
+
},
|
327 |
+
"nbformat": 4,
|
328 |
+
"nbformat_minor": 2
|
329 |
+
}
|
notebooks/chat_with_pdf_using_zephyr-7b_v2.ipynb
ADDED
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"# Using HuggingFace Load model directly method\n",
|
8 |
+
"\n",
|
9 |
+
"* AutoModelForCausalLM"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "code",
|
14 |
+
"execution_count": null,
|
15 |
+
"metadata": {},
|
16 |
+
"outputs": [],
|
17 |
+
"source": [
|
18 |
+
"from langchain_community.document_loaders import PyPDFLoader\n",
|
19 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
20 |
+
"from langchain.vectorstores import Chroma\n",
|
21 |
+
"from langchain.retrievers import BM25Retriever, EnsembleRetriever\n",
|
22 |
+
"\n",
|
23 |
+
"from langchain_core.prompts import ChatPromptTemplate\n",
|
24 |
+
"from langchain_core.output_parsers import StrOutputParser\n",
|
25 |
+
"from langchain_core.runnables import RunnablePassthrough\n",
|
26 |
+
"\n",
|
27 |
+
"from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings"
|
28 |
+
]
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"cell_type": "code",
|
32 |
+
"execution_count": null,
|
33 |
+
"metadata": {},
|
34 |
+
"outputs": [],
|
35 |
+
"source": [
|
36 |
+
"import torch\n",
|
37 |
+
"from transformers import AutoModelForCausalLM"
|
38 |
+
]
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"cell_type": "code",
|
42 |
+
"execution_count": null,
|
43 |
+
"metadata": {},
|
44 |
+
"outputs": [],
|
45 |
+
"source": [
|
46 |
+
"file_path = \"../data/Orca Progressive Learning from Complex.pdf\"\n",
|
47 |
+
"data_file = PyPDFLoader(file_path)\n",
|
48 |
+
"docs = data_file.load()"
|
49 |
+
]
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"cell_type": "code",
|
53 |
+
"execution_count": null,
|
54 |
+
"metadata": {},
|
55 |
+
"outputs": [],
|
56 |
+
"source": [
|
57 |
+
"# create chunks\n",
|
58 |
+
"splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n",
|
59 |
+
"chunks = splitter.split_documents(docs)"
|
60 |
+
]
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"cell_type": "code",
|
64 |
+
"execution_count": null,
|
65 |
+
"metadata": {},
|
66 |
+
"outputs": [],
|
67 |
+
"source": [
|
68 |
+
"HF_TOKEN = input(\"Enter your HuggingFace Token\")"
|
69 |
+
]
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"cell_type": "code",
|
73 |
+
"execution_count": null,
|
74 |
+
"metadata": {},
|
75 |
+
"outputs": [],
|
76 |
+
"source": [
|
77 |
+
"# https://huggingface.co/BAAI/bge-base-en-v1.5\n",
|
78 |
+
"embeddings = HuggingFaceInferenceAPIEmbeddings(\n",
|
79 |
+
" api_key=HF_TOKEN, model_name=\"BAAI/bge-base-en-v1.5\"\n",
|
80 |
+
")"
|
81 |
+
]
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"cell_type": "code",
|
85 |
+
"execution_count": null,
|
86 |
+
"metadata": {},
|
87 |
+
"outputs": [],
|
88 |
+
"source": [
|
89 |
+
"# retrieve k\n",
|
90 |
+
"k = 5"
|
91 |
+
]
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"cell_type": "code",
|
95 |
+
"execution_count": null,
|
96 |
+
"metadata": {},
|
97 |
+
"outputs": [],
|
98 |
+
"source": [
|
99 |
+
"vector_store = Chroma.from_documents(chunks, embeddings)"
|
100 |
+
]
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"cell_type": "code",
|
104 |
+
"execution_count": null,
|
105 |
+
"metadata": {},
|
106 |
+
"outputs": [],
|
107 |
+
"source": [
|
108 |
+
"vector_retriever = vector_store.as_retriever(search_kwargs={\"k\": k})"
|
109 |
+
]
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"cell_type": "code",
|
113 |
+
"execution_count": null,
|
114 |
+
"metadata": {},
|
115 |
+
"outputs": [],
|
116 |
+
"source": [
|
117 |
+
"semantic_retriever = BM25Retriever.from_documents(chunks)\n",
|
118 |
+
"semantic_retriever.k = k"
|
119 |
+
]
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"cell_type": "code",
|
123 |
+
"execution_count": null,
|
124 |
+
"metadata": {},
|
125 |
+
"outputs": [],
|
126 |
+
"source": [
|
127 |
+
"ensemble_retriever = EnsembleRetriever(\n",
|
128 |
+
" retrievers=[vector_retriever, semantic_retriever], weights=[0.5, 0.5]\n",
|
129 |
+
")"
|
130 |
+
]
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"cell_type": "code",
|
134 |
+
"execution_count": null,
|
135 |
+
"metadata": {},
|
136 |
+
"outputs": [],
|
137 |
+
"source": [
|
138 |
+
"# tokenizer = AutoTokenizer.from_pretrained(\"HuggingFaceH4/zephyr-7b-beta\")\n",
|
139 |
+
"llm = AutoModelForCausalLM.from_pretrained(\n",
|
140 |
+
" \"HuggingFaceH4/zephyr-7b-beta\", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True\n",
|
141 |
+
")"
|
142 |
+
]
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"cell_type": "code",
|
146 |
+
"execution_count": null,
|
147 |
+
"metadata": {},
|
148 |
+
"outputs": [],
|
149 |
+
"source": [
|
150 |
+
"template = \"\"\"\n",
|
151 |
+
"<|system|>\n",
|
152 |
+
"You are a helpful AI Assistant that follows instructions extremely well.\n",
|
153 |
+
"Use the following context to answer user question.\n",
|
154 |
+
"\n",
|
155 |
+
"Think step by step before answering the question.\n",
|
156 |
+
"You will get a $100 tip if you provide correct answer.\n",
|
157 |
+
"\n",
|
158 |
+
"CONTEXT: {context}\n",
|
159 |
+
"</s>\n",
|
160 |
+
"<|user|>\n",
|
161 |
+
"{query}\n",
|
162 |
+
"</s>\n",
|
163 |
+
"<|assistant|>\n",
|
164 |
+
"\"\"\""
|
165 |
+
]
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"cell_type": "code",
|
169 |
+
"execution_count": null,
|
170 |
+
"metadata": {},
|
171 |
+
"outputs": [],
|
172 |
+
"source": [
|
173 |
+
"prompt = ChatPromptTemplate.from_template(template)\n",
|
174 |
+
"output_parser = StrOutputParser()"
|
175 |
+
]
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"cell_type": "code",
|
179 |
+
"execution_count": null,
|
180 |
+
"metadata": {},
|
181 |
+
"outputs": [],
|
182 |
+
"source": [
|
183 |
+
"chain = (\n",
|
184 |
+
" {\"context\": ensemble_retriever, \"query\": RunnablePassthrough()}\n",
|
185 |
+
" | prompt\n",
|
186 |
+
" | llm\n",
|
187 |
+
" | output_parser\n",
|
188 |
+
")"
|
189 |
+
]
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"cell_type": "code",
|
193 |
+
"execution_count": null,
|
194 |
+
"metadata": {},
|
195 |
+
"outputs": [],
|
196 |
+
"source": [
|
197 |
+
"print(chain.invoke(\"What is instruction tuning?\"))"
|
198 |
+
]
|
199 |
+
}
|
200 |
+
],
|
201 |
+
"metadata": {
|
202 |
+
"kernelspec": {
|
203 |
+
"display_name": "Python 3",
|
204 |
+
"language": "python",
|
205 |
+
"name": "python3"
|
206 |
+
},
|
207 |
+
"language_info": {
|
208 |
+
"codemirror_mode": {
|
209 |
+
"name": "ipython",
|
210 |
+
"version": 3
|
211 |
+
},
|
212 |
+
"file_extension": ".py",
|
213 |
+
"mimetype": "text/x-python",
|
214 |
+
"name": "python",
|
215 |
+
"nbconvert_exporter": "python",
|
216 |
+
"pygments_lexer": "ipython3",
|
217 |
+
"version": "3.9.0"
|
218 |
+
}
|
219 |
+
},
|
220 |
+
"nbformat": 4,
|
221 |
+
"nbformat_minor": 2
|
222 |
+
}
|
notebooks/chat_with_pdf_using_zephyr-7b_v3.ipynb
ADDED
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"# Using HuggingFace use a pipeline as a high-level helper method\n",
|
8 |
+
"\n",
|
9 |
+
"* from transformers import pipeline"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "code",
|
14 |
+
"execution_count": 2,
|
15 |
+
"metadata": {},
|
16 |
+
"outputs": [],
|
17 |
+
"source": [
|
18 |
+
"from langchain_community.document_loaders import PyPDFLoader\n",
|
19 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
20 |
+
"from langchain.vectorstores import Chroma\n",
|
21 |
+
"from langchain.retrievers import BM25Retriever, EnsembleRetriever\n",
|
22 |
+
"\n",
|
23 |
+
"from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings"
|
24 |
+
]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"cell_type": "code",
|
28 |
+
"execution_count": 3,
|
29 |
+
"metadata": {},
|
30 |
+
"outputs": [],
|
31 |
+
"source": [
|
32 |
+
"import torch\n",
|
33 |
+
"from transformers import pipeline"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"cell_type": "code",
|
38 |
+
"execution_count": 4,
|
39 |
+
"metadata": {},
|
40 |
+
"outputs": [],
|
41 |
+
"source": [
|
42 |
+
"file_path = \"../data/Orca Progressive Learning from Complex.pdf\"\n",
|
43 |
+
"data_file = PyPDFLoader(file_path)\n",
|
44 |
+
"docs = data_file.load()"
|
45 |
+
]
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"cell_type": "code",
|
49 |
+
"execution_count": 5,
|
50 |
+
"metadata": {},
|
51 |
+
"outputs": [],
|
52 |
+
"source": [
|
53 |
+
"# create chunks\n",
|
54 |
+
"splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n",
|
55 |
+
"chunks = splitter.split_documents(docs)"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"cell_type": "code",
|
60 |
+
"execution_count": 6,
|
61 |
+
"metadata": {},
|
62 |
+
"outputs": [],
|
63 |
+
"source": [
|
64 |
+
"HF_TOKEN = input(\"Enter your HuggingFace Token\")"
|
65 |
+
]
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"cell_type": "code",
|
69 |
+
"execution_count": 7,
|
70 |
+
"metadata": {},
|
71 |
+
"outputs": [],
|
72 |
+
"source": [
|
73 |
+
"# https://huggingface.co/BAAI/bge-base-en-v1.5\n",
|
74 |
+
"embeddings = HuggingFaceInferenceAPIEmbeddings(\n",
|
75 |
+
" api_key=HF_TOKEN, model_name=\"BAAI/bge-base-en-v1.5\"\n",
|
76 |
+
")"
|
77 |
+
]
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"cell_type": "code",
|
81 |
+
"execution_count": 8,
|
82 |
+
"metadata": {},
|
83 |
+
"outputs": [],
|
84 |
+
"source": [
|
85 |
+
"# retrieve k\n",
|
86 |
+
"k = 5"
|
87 |
+
]
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"cell_type": "code",
|
91 |
+
"execution_count": 9,
|
92 |
+
"metadata": {},
|
93 |
+
"outputs": [],
|
94 |
+
"source": [
|
95 |
+
"vector_store = Chroma.from_documents(chunks, embeddings)\n",
|
96 |
+
"vector_retriever = vector_store.as_retriever(search_kwargs={\"k\": k})"
|
97 |
+
]
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"cell_type": "code",
|
101 |
+
"execution_count": 10,
|
102 |
+
"metadata": {},
|
103 |
+
"outputs": [],
|
104 |
+
"source": [
|
105 |
+
"semantic_retriever = BM25Retriever.from_documents(chunks)\n",
|
106 |
+
"semantic_retriever.k = k"
|
107 |
+
]
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"cell_type": "code",
|
111 |
+
"execution_count": 11,
|
112 |
+
"metadata": {},
|
113 |
+
"outputs": [],
|
114 |
+
"source": [
|
115 |
+
"ensemble_retriever = EnsembleRetriever(\n",
|
116 |
+
" retrievers=[vector_retriever, semantic_retriever], weights=[0.5, 0.5]\n",
|
117 |
+
")"
|
118 |
+
]
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"cell_type": "code",
|
122 |
+
"execution_count": 13,
|
123 |
+
"metadata": {},
|
124 |
+
"outputs": [
|
125 |
+
{
|
126 |
+
"data": {
|
127 |
+
"application/vnd.jupyter.widget-view+json": {
|
128 |
+
"model_id": "e7be2fbc6d0b4866b0ec4605ab2919eb",
|
129 |
+
"version_major": 2,
|
130 |
+
"version_minor": 0
|
131 |
+
},
|
132 |
+
"text/plain": [
|
133 |
+
"Loading checkpoint shards: 0%| | 0/8 [00:00<?, ?it/s]"
|
134 |
+
]
|
135 |
+
},
|
136 |
+
"metadata": {},
|
137 |
+
"output_type": "display_data"
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"name": "stderr",
|
141 |
+
"output_type": "stream",
|
142 |
+
"text": [
|
143 |
+
"WARNING:root:Some parameters are on the meta device device because they were offloaded to the disk and cpu.\n"
|
144 |
+
]
|
145 |
+
}
|
146 |
+
],
|
147 |
+
"source": [
|
148 |
+
"pipe = pipeline(\n",
|
149 |
+
" \"text-generation\",\n",
|
150 |
+
" model=\"HuggingFaceH4/zephyr-7b-beta\",\n",
|
151 |
+
" torch_dtype=torch.bfloat16,\n",
|
152 |
+
" device_map=\"auto\",\n",
|
153 |
+
")"
|
154 |
+
]
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"cell_type": "code",
|
158 |
+
"execution_count": 14,
|
159 |
+
"metadata": {},
|
160 |
+
"outputs": [
|
161 |
+
{
|
162 |
+
"name": "stderr",
|
163 |
+
"output_type": "stream",
|
164 |
+
"text": [
|
165 |
+
"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
|
166 |
+
]
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"name": "stdout",
|
170 |
+
"output_type": "stream",
|
171 |
+
"text": [
|
172 |
+
"<|system|>\n",
|
173 |
+
"You are a friendly chatbot who always responds in the style of a pirate</s>\n",
|
174 |
+
"<|user|>\n",
|
175 |
+
"How many helicopters can a human eat in one sitting?</s>\n",
|
176 |
+
"<|assistant|>\n",
|
177 |
+
"Matey, I'm afraid no human can eat a helicopter, as it's not food. Helicopters are machines used for transportation and other purposes, not a source of nourishment. I'd suggest you stick to eating hearty meals of grog, seafood, and maybe some plundered booty if ya fancy it! Arrrr!\n"
|
178 |
+
]
|
179 |
+
}
|
180 |
+
],
|
181 |
+
"source": [
|
182 |
+
"# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating\n",
|
183 |
+
"messages = [\n",
|
184 |
+
" {\n",
|
185 |
+
" \"role\": \"system\",\n",
|
186 |
+
" \"content\": \"You are a friendly chatbot who always responds in the style of a pirate\",\n",
|
187 |
+
" },\n",
|
188 |
+
" {\"role\": \"user\", \"content\": \"How many helicopters can a human eat in one sitting?\"},\n",
|
189 |
+
"]\n",
|
190 |
+
"\n",
|
191 |
+
"\n",
|
192 |
+
"prompt = pipe.tokenizer.apply_chat_template(\n",
|
193 |
+
" messages, tokenize=False, add_generation_prompt=True\n",
|
194 |
+
")\n",
|
195 |
+
"\n",
|
196 |
+
"\n",
|
197 |
+
"outputs = pipe(\n",
|
198 |
+
" prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95\n",
|
199 |
+
")\n",
|
200 |
+
"\n",
|
201 |
+
"print(outputs[0][\"generated_text\"])"
|
202 |
+
]
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"cell_type": "code",
|
206 |
+
"execution_count": null,
|
207 |
+
"metadata": {},
|
208 |
+
"outputs": [],
|
209 |
+
"source": []
|
210 |
+
},
|
211 |
+
{
|
212 |
+
"cell_type": "markdown",
|
213 |
+
"metadata": {},
|
214 |
+
"source": [
|
215 |
+
"_____________________"
|
216 |
+
]
|
217 |
+
},
|
218 |
+
{
|
219 |
+
"cell_type": "code",
|
220 |
+
"execution_count": 15,
|
221 |
+
"metadata": {},
|
222 |
+
"outputs": [],
|
223 |
+
"source": [
|
224 |
+
"import textwrap\n",
|
225 |
+
"\n",
|
226 |
+
"def wrap_text(text, width=90): # preserve_newlines\n",
|
227 |
+
" # Split the input text into lines based on newline characters\n",
|
228 |
+
" lines = text.split(\"\\n\")\n",
|
229 |
+
"\n",
|
230 |
+
" # Wrap each line individually\n",
|
231 |
+
" wrapped_lines = [textwrap.fill(line, width=width) for line in lines]\n",
|
232 |
+
"\n",
|
233 |
+
" # Join the wrapped lines back together using newline characters\n",
|
234 |
+
" wrapped_text = \"\\n\".join(wrapped_lines)\n",
|
235 |
+
"\n",
|
236 |
+
" return wrapped_text"
|
237 |
+
]
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"cell_type": "code",
|
241 |
+
"execution_count": 16,
|
242 |
+
"metadata": {},
|
243 |
+
"outputs": [],
|
244 |
+
"source": [
|
245 |
+
"def generate(input_text, system_prompt=\"\", max_length=512):\n",
|
246 |
+
" if system_prompt != \"\":\n",
|
247 |
+
" system_prompt = system_prompt\n",
|
248 |
+
" else:\n",
|
249 |
+
" system_prompt = (\n",
|
250 |
+
" \"You are a friendly chatbot who always responds in the style of a pirate\"\n",
|
251 |
+
" )\n",
|
252 |
+
" messages = [\n",
|
253 |
+
" {\n",
|
254 |
+
" \"role\": \"system\",\n",
|
255 |
+
" \"content\": system_prompt,\n",
|
256 |
+
" },\n",
|
257 |
+
" {\"role\": \"user\", \"content\": input_text},\n",
|
258 |
+
" ]\n",
|
259 |
+
"\n",
|
260 |
+
" prompt = pipe.tokenizer.apply_chat_template(\n",
|
261 |
+
" messages, tokenize=False, add_generation_prompt=True\n",
|
262 |
+
" )\n",
|
263 |
+
"\n",
|
264 |
+
" outputs = pipe(\n",
|
265 |
+
" prompt,\n",
|
266 |
+
" max_new_tokens=max_length,\n",
|
267 |
+
" do_sample=True,\n",
|
268 |
+
" temperature=0.7,\n",
|
269 |
+
" top_k=50,\n",
|
270 |
+
" top_p=0.95,\n",
|
271 |
+
" )\n",
|
272 |
+
" text = outputs[0][\"generated_text\"]\n",
|
273 |
+
" text = text.replace(prompt, \"\", 1)\n",
|
274 |
+
" wrapped_text = wrap_text(text)\n",
|
275 |
+
" \n",
|
276 |
+
" print(wrapped_text)"
|
277 |
+
]
|
278 |
+
},
|
279 |
+
{
|
280 |
+
"cell_type": "code",
|
281 |
+
"execution_count": null,
|
282 |
+
"metadata": {},
|
283 |
+
"outputs": [],
|
284 |
+
"source": [
|
285 |
+
"generate(\n",
|
286 |
+
" \"\"\"Alice: I don't know why, I'm struggling to maintain focus while studying. Any suggestion? \\n\\n Bob:\"\"\",\n",
|
287 |
+
" system_prompt=\"You are Zephyr, a LLM that generates great conversations. continue as Bob here\",\n",
|
288 |
+
" max_length=512,\n",
|
289 |
+
")"
|
290 |
+
]
|
291 |
+
}
|
292 |
+
],
|
293 |
+
"metadata": {
|
294 |
+
"kernelspec": {
|
295 |
+
"display_name": "Python 3",
|
296 |
+
"language": "python",
|
297 |
+
"name": "python3"
|
298 |
+
},
|
299 |
+
"language_info": {
|
300 |
+
"codemirror_mode": {
|
301 |
+
"name": "ipython",
|
302 |
+
"version": 3
|
303 |
+
},
|
304 |
+
"file_extension": ".py",
|
305 |
+
"mimetype": "text/x-python",
|
306 |
+
"name": "python",
|
307 |
+
"nbconvert_exporter": "python",
|
308 |
+
"pygments_lexer": "ipython3",
|
309 |
+
"version": "3.9.0"
|
310 |
+
}
|
311 |
+
},
|
312 |
+
"nbformat": 4,
|
313 |
+
"nbformat_minor": 2
|
314 |
+
}
|
notebooks/chat_with_pdf_using_zephyr-7b_v4.ipynb
ADDED
@@ -0,0 +1,663 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"# Using Zephyr 7B Beta Quantised Model\n",
|
8 |
+
"\n",
|
9 |
+
"* [TheBloke/zephyr-7B-beta-GGUF](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF)\n",
|
10 |
+
"* Used CTransformers wrapper"
|
11 |
+
]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"cell_type": "code",
|
15 |
+
"execution_count": null,
|
16 |
+
"metadata": {},
|
17 |
+
"outputs": [],
|
18 |
+
"source": [
|
19 |
+
"%pip install torch==2.2.1\n",
|
20 |
+
"%pip install langchain==0.1.9\n",
|
21 |
+
"%pip install langchain-community==0.0.24\n",
|
22 |
+
"%pip install ctransformers==0.2.27\n",
|
23 |
+
"%pip install streamlit==1.31.1\n",
|
24 |
+
"%pip install streamlit-extras==0.4.0\n",
|
25 |
+
"%pip install langchain==0.1.9\n",
|
26 |
+
"%pip install rank_bm25==0.2.2\n",
|
27 |
+
"%pip install pypdf==4.0.2\n",
|
28 |
+
"%pip install chromadb==0.4.24\n",
|
29 |
+
"%pip install tiktoken==0.6.0"
|
30 |
+
]
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"cell_type": "code",
|
34 |
+
"execution_count": 1,
|
35 |
+
"metadata": {},
|
36 |
+
"outputs": [],
|
37 |
+
"source": [
|
38 |
+
"import os\n",
|
39 |
+
"from langchain_community.llms import CTransformers\n",
|
40 |
+
"from langchain import PromptTemplate, LLMChain"
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"cell_type": "code",
|
45 |
+
"execution_count": 2,
|
46 |
+
"metadata": {},
|
47 |
+
"outputs": [],
|
48 |
+
"source": [
|
49 |
+
"model_type = \"mistral\"\n",
|
50 |
+
"model_id = \"TheBloke/zephyr-7B-beta-GGUF\"\n",
|
51 |
+
"model_file = \"zephyr-7b-beta.Q4_K_S.gguf\""
|
52 |
+
]
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"cell_type": "code",
|
56 |
+
"execution_count": 3,
|
57 |
+
"metadata": {},
|
58 |
+
"outputs": [],
|
59 |
+
"source": [
|
60 |
+
"config = {\n",
|
61 |
+
" \"max_new_tokens\": 1024,\n",
|
62 |
+
" \"repetition_penalty\": 1.1,\n",
|
63 |
+
" \"temperature\": 1,\n",
|
64 |
+
" \"top_k\": 50,\n",
|
65 |
+
" \"top_p\": 0.9,\n",
|
66 |
+
" \"stream\": True,\n",
|
67 |
+
" \"threads\": int(os.cpu_count() / 2),\n",
|
68 |
+
"}"
|
69 |
+
]
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"cell_type": "code",
|
73 |
+
"execution_count": 4,
|
74 |
+
"metadata": {},
|
75 |
+
"outputs": [
|
76 |
+
{
|
77 |
+
"data": {
|
78 |
+
"application/vnd.jupyter.widget-view+json": {
|
79 |
+
"model_id": "6469959ce27843a6b808f7c92e6b6a74",
|
80 |
+
"version_major": 2,
|
81 |
+
"version_minor": 0
|
82 |
+
},
|
83 |
+
"text/plain": [
|
84 |
+
"Fetching 1 files: 0%| | 0/1 [00:00<?, ?it/s]"
|
85 |
+
]
|
86 |
+
},
|
87 |
+
"metadata": {},
|
88 |
+
"output_type": "display_data"
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"data": {
|
92 |
+
"application/vnd.jupyter.widget-view+json": {
|
93 |
+
"model_id": "4dd3533c42c94adcb46b36bfbe2748a3",
|
94 |
+
"version_major": 2,
|
95 |
+
"version_minor": 0
|
96 |
+
},
|
97 |
+
"text/plain": [
|
98 |
+
"Fetching 1 files: 0%| | 0/1 [00:00<?, ?it/s]"
|
99 |
+
]
|
100 |
+
},
|
101 |
+
"metadata": {},
|
102 |
+
"output_type": "display_data"
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"data": {
|
106 |
+
"application/vnd.jupyter.widget-view+json": {
|
107 |
+
"model_id": "76fda961588e489e8d5c749ccb426596",
|
108 |
+
"version_major": 2,
|
109 |
+
"version_minor": 0
|
110 |
+
},
|
111 |
+
"text/plain": [
|
112 |
+
"zephyr-7b-beta.Q4_K_S.gguf: 0%| | 0.00/4.14G [00:00<?, ?B/s]"
|
113 |
+
]
|
114 |
+
},
|
115 |
+
"metadata": {},
|
116 |
+
"output_type": "display_data"
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"name": "stderr",
|
120 |
+
"output_type": "stream",
|
121 |
+
"text": [
|
122 |
+
"Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/fe/17/fe17596731f84a0d03bece77489780bc7e068323c0aeca88b6393d3e9e65dd49/cafa0b85b2efc15ca33023f3b87f8d0c44ddcace16b3fb608280e0eb8f425cb1?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27zephyr-7b-beta.Q4_K_S.gguf%3B+filename%3D%22zephyr-7b-beta.Q4_K_S.gguf%22%3B&Expires=1709696299&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwOTY5NjI5OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzE3L2ZlMTc1OTY3MzFmODRhMGQwM2JlY2U3NzQ4OTc4MGJjN2UwNjgzMjNjMGFlY2E4OGI2MzkzZDNlOWU2NWRkNDkvY2FmYTBiODViMmVmYzE1Y2EzMzAyM2YzYjg3ZjhkMGM0NGRkY2FjZTE2YjNmYjYwODI4MGUwZWI4ZjQyNWNiMT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=JJymveuF19P%7EYsnDVvRKvHbsUjRrNso4dCIEhQ591C6Ponli%7EvQZXE3jKIWNH0ZG%7El1ERzgSns5Qdhx9ImLRLyCtq0szMjeb7eycm%7E8BBpBH3%7EUle4RQoGm1056cJbbOqbiCyTQpFsoRe6N3ivAxTn11BjMY1b-dAmZnWbL%7E%7EyyY3Og7h9YVXX3g%7E-3I5FaWIwv-GTwPPtGiYJGAP23wYFY%7Eax59dAkwC38V9qOwYGTwm1knXNIQhWVxrcykflJos57vJESMntXRc9PFn0BNu0ZXu%7EYd7nBcyk3%7ELOJjsTKHwP76D3guyIuXduUbpBRVGi1kTnjVfdyEvtDRwSIr3Q__&Key-Pair-Id=KCD77M1F0VK2B: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n",
|
123 |
+
"Trying to resume download...\n"
|
124 |
+
]
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"data": {
|
128 |
+
"application/vnd.jupyter.widget-view+json": {
|
129 |
+
"model_id": "c44f1b49b4434d71a9630f5f451be6d5",
|
130 |
+
"version_major": 2,
|
131 |
+
"version_minor": 0
|
132 |
+
},
|
133 |
+
"text/plain": [
|
134 |
+
"zephyr-7b-beta.Q4_K_S.gguf: 0%| | 0.00/4.14G [00:00<?, ?B/s]"
|
135 |
+
]
|
136 |
+
},
|
137 |
+
"metadata": {},
|
138 |
+
"output_type": "display_data"
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"name": "stderr",
|
142 |
+
"output_type": "stream",
|
143 |
+
"text": [
|
144 |
+
"Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/fe/17/fe17596731f84a0d03bece77489780bc7e068323c0aeca88b6393d3e9e65dd49/cafa0b85b2efc15ca33023f3b87f8d0c44ddcace16b3fb608280e0eb8f425cb1?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27zephyr-7b-beta.Q4_K_S.gguf%3B+filename%3D%22zephyr-7b-beta.Q4_K_S.gguf%22%3B&Expires=1709696299&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwOTY5NjI5OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzE3L2ZlMTc1OTY3MzFmODRhMGQwM2JlY2U3NzQ4OTc4MGJjN2UwNjgzMjNjMGFlY2E4OGI2MzkzZDNlOWU2NWRkNDkvY2FmYTBiODViMmVmYzE1Y2EzMzAyM2YzYjg3ZjhkMGM0NGRkY2FjZTE2YjNmYjYwODI4MGUwZWI4ZjQyNWNiMT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=JJymveuF19P%7EYsnDVvRKvHbsUjRrNso4dCIEhQ591C6Ponli%7EvQZXE3jKIWNH0ZG%7El1ERzgSns5Qdhx9ImLRLyCtq0szMjeb7eycm%7E8BBpBH3%7EUle4RQoGm1056cJbbOqbiCyTQpFsoRe6N3ivAxTn11BjMY1b-dAmZnWbL%7E%7EyyY3Og7h9YVXX3g%7E-3I5FaWIwv-GTwPPtGiYJGAP23wYFY%7Eax59dAkwC38V9qOwYGTwm1knXNIQhWVxrcykflJos57vJESMntXRc9PFn0BNu0ZXu%7EYd7nBcyk3%7ELOJjsTKHwP76D3guyIuXduUbpBRVGi1kTnjVfdyEvtDRwSIr3Q__&Key-Pair-Id=KCD77M1F0VK2B: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n",
|
145 |
+
"Trying to resume download...\n"
|
146 |
+
]
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"data": {
|
150 |
+
"application/vnd.jupyter.widget-view+json": {
|
151 |
+
"model_id": "6de1cf2f043e4af48b96f6efbbdc7eae",
|
152 |
+
"version_major": 2,
|
153 |
+
"version_minor": 0
|
154 |
+
},
|
155 |
+
"text/plain": [
|
156 |
+
"zephyr-7b-beta.Q4_K_S.gguf: 0%| | 0.00/4.14G [00:00<?, ?B/s]"
|
157 |
+
]
|
158 |
+
},
|
159 |
+
"metadata": {},
|
160 |
+
"output_type": "display_data"
|
161 |
+
}
|
162 |
+
],
|
163 |
+
"source": [
|
164 |
+
"init_model = CTransformers(model=model_id, model_file=model_file, model_type=model_type, **config, lib=\"avx2\")"
|
165 |
+
]
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"cell_type": "markdown",
|
169 |
+
"metadata": {},
|
170 |
+
"source": [
|
171 |
+
"## Without Prompt Template"
|
172 |
+
]
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"cell_type": "code",
|
176 |
+
"execution_count": 5,
|
177 |
+
"metadata": {},
|
178 |
+
"outputs": [],
|
179 |
+
"source": [
|
180 |
+
"query = \"what is the meaning of the life ?\""
|
181 |
+
]
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"cell_type": "code",
|
185 |
+
"execution_count": 6,
|
186 |
+
"metadata": {},
|
187 |
+
"outputs": [
|
188 |
+
{
|
189 |
+
"name": "stderr",
|
190 |
+
"output_type": "stream",
|
191 |
+
"text": [
|
192 |
+
"/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The function `__call__` was deprecated in LangChain 0.1.7 and will be removed in 0.2.0. Use invoke instead.\n",
|
193 |
+
" warn_deprecated(\n"
|
194 |
+
]
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"name": "stdout",
|
198 |
+
"output_type": "stream",
|
199 |
+
"text": [
|
200 |
+
"\n",
|
201 |
+
"\n",
|
202 |
+
"what happens after we die ?\n",
|
203 |
+
"\n",
|
204 |
+
"is there any god or creator ?\n",
|
205 |
+
"\n",
|
206 |
+
"who am I really ?\n",
|
207 |
+
"\n",
|
208 |
+
"these are the questions that have always fascinated human mind and kept us thinking for ages. These questions are so profound, yet simple and so personal. We all have our own answers to these questions, whether in form of religion, spirituality or philosophy, which become a part of our life philosophy as we grow up.\n",
|
209 |
+
"\n",
|
210 |
+
"But there is another dimension where people look beyond the boundaries of these religions and philosophies. They go into a quest for truth that goes deeper than what they have been taught by their religion or philosophy. They start looking within themselves to find the answers. This quest takes them on a journey of self-discovery, which is often referred to as Spirituality.\n",
|
211 |
+
"\n",
|
212 |
+
"Spirituality, at its core, is an intense thirst to know the truth about life and ourselves. It is a longing for connection with something greater than oneself – God or the Universe. The spiritual quest takes us on a journey of self-reflection and discovery where we learn to observe ourselves in our daily lives and situations as they arise. This brings deep insights into our own nature and enables us to let go\n"
|
213 |
+
]
|
214 |
+
}
|
215 |
+
],
|
216 |
+
"source": [
|
217 |
+
"result = init_model(query)\n",
|
218 |
+
"print(result)"
|
219 |
+
]
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"cell_type": "markdown",
|
223 |
+
"metadata": {},
|
224 |
+
"source": [
|
225 |
+
"## With Prompt Template"
|
226 |
+
]
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"cell_type": "code",
|
230 |
+
"execution_count": 7,
|
231 |
+
"metadata": {},
|
232 |
+
"outputs": [],
|
233 |
+
"source": [
|
234 |
+
"template = \"\"\"You are a helpful AI Assistant that follows instructions extremely well.\n",
|
235 |
+
"Question: {question}\n",
|
236 |
+
"\n",
|
237 |
+
"Answer: Let's think step by step and answer it faithfully.\n",
|
238 |
+
"\"\"\""
|
239 |
+
]
|
240 |
+
},
|
241 |
+
{
|
242 |
+
"cell_type": "code",
|
243 |
+
"execution_count": 8,
|
244 |
+
"metadata": {},
|
245 |
+
"outputs": [],
|
246 |
+
"source": [
|
247 |
+
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
|
248 |
+
]
|
249 |
+
},
|
250 |
+
{
|
251 |
+
"cell_type": "code",
|
252 |
+
"execution_count": 9,
|
253 |
+
"metadata": {},
|
254 |
+
"outputs": [],
|
255 |
+
"source": [
|
256 |
+
"chain = LLMChain(prompt=prompt, llm=init_model, verbose=True)"
|
257 |
+
]
|
258 |
+
},
|
259 |
+
{
|
260 |
+
"cell_type": "code",
|
261 |
+
"execution_count": 10,
|
262 |
+
"metadata": {},
|
263 |
+
"outputs": [],
|
264 |
+
"source": [
|
265 |
+
"query = \"What is LLM ?\""
|
266 |
+
]
|
267 |
+
},
|
268 |
+
{
|
269 |
+
"cell_type": "code",
|
270 |
+
"execution_count": 11,
|
271 |
+
"metadata": {},
|
272 |
+
"outputs": [
|
273 |
+
{
|
274 |
+
"name": "stderr",
|
275 |
+
"output_type": "stream",
|
276 |
+
"text": [
|
277 |
+
"/mnt/d/repo/experiments/chat_with_pdf_using_zephyr-7b/venv/lib/python3.9/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The function `run` was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
|
278 |
+
" warn_deprecated(\n"
|
279 |
+
]
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"name": "stdout",
|
283 |
+
"output_type": "stream",
|
284 |
+
"text": [
|
285 |
+
"\n",
|
286 |
+
"\n",
|
287 |
+
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
288 |
+
"Prompt after formatting:\n",
|
289 |
+
"\u001b[32;1m\u001b[1;3mYou are a helpful AI Assistant that follows instructions extremely well.\n",
|
290 |
+
"Question: What is LLM ?\n",
|
291 |
+
"\n",
|
292 |
+
"Answer: Let's think step by step and answer it faithfully.\n",
|
293 |
+
"\u001b[0m\n",
|
294 |
+
"\n",
|
295 |
+
"\u001b[1m> Finished chain.\u001b[0m\n"
|
296 |
+
]
|
297 |
+
}
|
298 |
+
],
|
299 |
+
"source": [
|
300 |
+
"result = chain.run(query)"
|
301 |
+
]
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"cell_type": "code",
|
305 |
+
"execution_count": 13,
|
306 |
+
"metadata": {},
|
307 |
+
"outputs": [
|
308 |
+
{
|
309 |
+
"name": "stdout",
|
310 |
+
"output_type": "stream",
|
311 |
+
"text": [
|
312 |
+
"\n",
|
313 |
+
"LLM stands for Large Language Model. It refers to a type of machine learning algorithm specifically designed to process and generate human-like language, typically in the form of text or speech. These models are called \"large\" because they require vast amounts of training data to learn the complex patterns and relationships within language. The ultimate goal of LLMs is to enable more natural and intuitive interactions between humans and machines through enhanced communication capabilities.\n"
|
314 |
+
]
|
315 |
+
}
|
316 |
+
],
|
317 |
+
"source": [
|
318 |
+
"print(result)"
|
319 |
+
]
|
320 |
+
},
|
321 |
+
{
|
322 |
+
"cell_type": "code",
|
323 |
+
"execution_count": null,
|
324 |
+
"metadata": {},
|
325 |
+
"outputs": [],
|
326 |
+
"source": []
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"cell_type": "markdown",
|
330 |
+
"metadata": {},
|
331 |
+
"source": [
|
332 |
+
"## RAG - Talk to PDF"
|
333 |
+
]
|
334 |
+
},
|
335 |
+
{
|
336 |
+
"cell_type": "code",
|
337 |
+
"execution_count": 14,
|
338 |
+
"metadata": {},
|
339 |
+
"outputs": [],
|
340 |
+
"source": [
|
341 |
+
"import os\n",
|
342 |
+
"from langchain_community.llms import CTransformers\n",
|
343 |
+
"from langchain_community.document_loaders import PyPDFLoader\n",
|
344 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
345 |
+
"from langchain.vectorstores import Chroma\n",
|
346 |
+
"from langchain.retrievers import BM25Retriever, EnsembleRetriever\n",
|
347 |
+
"\n",
|
348 |
+
"from langchain_core.prompts import ChatPromptTemplate\n",
|
349 |
+
"from langchain_core.output_parsers import StrOutputParser\n",
|
350 |
+
"from langchain_core.runnables import RunnablePassthrough\n",
|
351 |
+
"\n",
|
352 |
+
"from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings"
|
353 |
+
]
|
354 |
+
},
|
355 |
+
{
|
356 |
+
"cell_type": "markdown",
|
357 |
+
"metadata": {},
|
358 |
+
"source": [
|
359 |
+
"### Load Data"
|
360 |
+
]
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"cell_type": "code",
|
364 |
+
"execution_count": 21,
|
365 |
+
"metadata": {},
|
366 |
+
"outputs": [],
|
367 |
+
"source": [
|
368 |
+
"file_path = \"../data/Orca Progressive Learning from Complex.pdf\"\n",
|
369 |
+
"data_file = PyPDFLoader(file_path)\n",
|
370 |
+
"docs = data_file.load()"
|
371 |
+
]
|
372 |
+
},
|
373 |
+
{
|
374 |
+
"cell_type": "markdown",
|
375 |
+
"metadata": {},
|
376 |
+
"source": [
|
377 |
+
"### Split & Chunk Docs"
|
378 |
+
]
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"cell_type": "code",
|
382 |
+
"execution_count": 22,
|
383 |
+
"metadata": {},
|
384 |
+
"outputs": [],
|
385 |
+
"source": [
|
386 |
+
"# create chunks\n",
|
387 |
+
"splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)\n",
|
388 |
+
"chunks = splitter.split_documents(docs)"
|
389 |
+
]
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"cell_type": "markdown",
|
393 |
+
"metadata": {},
|
394 |
+
"source": [
|
395 |
+
"### Load Embedder"
|
396 |
+
]
|
397 |
+
},
|
398 |
+
{
|
399 |
+
"cell_type": "code",
|
400 |
+
"execution_count": 23,
|
401 |
+
"metadata": {},
|
402 |
+
"outputs": [],
|
403 |
+
"source": [
|
404 |
+
"HF_TOKEN = input(\"Enter your HuggingFace Token\")"
|
405 |
+
]
|
406 |
+
},
|
407 |
+
{
|
408 |
+
"cell_type": "code",
|
409 |
+
"execution_count": 24,
|
410 |
+
"metadata": {},
|
411 |
+
"outputs": [],
|
412 |
+
"source": [
|
413 |
+
"# https://huggingface.co/BAAI/bge-base-en-v1.5\n",
|
414 |
+
"embeddings = HuggingFaceInferenceAPIEmbeddings(\n",
|
415 |
+
" api_key=HF_TOKEN, model_name=\"BAAI/bge-base-en-v1.5\"\n",
|
416 |
+
")"
|
417 |
+
]
|
418 |
+
},
|
419 |
+
{
|
420 |
+
"cell_type": "markdown",
|
421 |
+
"metadata": {},
|
422 |
+
"source": [
|
423 |
+
"### Retrievers"
|
424 |
+
]
|
425 |
+
},
|
426 |
+
{
|
427 |
+
"cell_type": "code",
|
428 |
+
"execution_count": 25,
|
429 |
+
"metadata": {},
|
430 |
+
"outputs": [],
|
431 |
+
"source": [
|
432 |
+
"# retrieve k\n",
|
433 |
+
"k = 5"
|
434 |
+
]
|
435 |
+
},
|
436 |
+
{
|
437 |
+
"cell_type": "markdown",
|
438 |
+
"metadata": {},
|
439 |
+
"source": [
|
440 |
+
"#### Vector Retriever"
|
441 |
+
]
|
442 |
+
},
|
443 |
+
{
|
444 |
+
"cell_type": "code",
|
445 |
+
"execution_count": 26,
|
446 |
+
"metadata": {},
|
447 |
+
"outputs": [],
|
448 |
+
"source": [
|
449 |
+
"vector_store = Chroma.from_documents(chunks, embeddings)\n",
|
450 |
+
"vector_retriever = vector_store.as_retriever(search_kwargs={\"k\": k})"
|
451 |
+
]
|
452 |
+
},
|
453 |
+
{
|
454 |
+
"cell_type": "markdown",
|
455 |
+
"metadata": {},
|
456 |
+
"source": [
|
457 |
+
"#### Semantic Retriever"
|
458 |
+
]
|
459 |
+
},
|
460 |
+
{
|
461 |
+
"cell_type": "code",
|
462 |
+
"execution_count": 27,
|
463 |
+
"metadata": {},
|
464 |
+
"outputs": [],
|
465 |
+
"source": [
|
466 |
+
"semantic_retriever = BM25Retriever.from_documents(chunks)\n",
|
467 |
+
"semantic_retriever.k = k"
|
468 |
+
]
|
469 |
+
},
|
470 |
+
{
|
471 |
+
"cell_type": "markdown",
|
472 |
+
"metadata": {},
|
473 |
+
"source": [
|
474 |
+
"#### Ensemble Retriever"
|
475 |
+
]
|
476 |
+
},
|
477 |
+
{
|
478 |
+
"cell_type": "code",
|
479 |
+
"execution_count": 28,
|
480 |
+
"metadata": {},
|
481 |
+
"outputs": [],
|
482 |
+
"source": [
|
483 |
+
"ensemble_retriever = EnsembleRetriever(\n",
|
484 |
+
" retrievers=[vector_retriever, semantic_retriever], weights=[0.5, 0.5]\n",
|
485 |
+
")"
|
486 |
+
]
|
487 |
+
},
|
488 |
+
{
|
489 |
+
"cell_type": "markdown",
|
490 |
+
"metadata": {},
|
491 |
+
"source": [
|
492 |
+
"### Init LLM Model"
|
493 |
+
]
|
494 |
+
},
|
495 |
+
{
|
496 |
+
"cell_type": "code",
|
497 |
+
"execution_count": 29,
|
498 |
+
"metadata": {},
|
499 |
+
"outputs": [],
|
500 |
+
"source": [
|
501 |
+
"model_type = \"mistral\"\n",
|
502 |
+
"model_id = \"TheBloke/zephyr-7B-beta-GGUF\"\n",
|
503 |
+
"model_file = \"zephyr-7b-beta.Q4_K_S.gguf\""
|
504 |
+
]
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"cell_type": "code",
|
508 |
+
"execution_count": 49,
|
509 |
+
"metadata": {},
|
510 |
+
"outputs": [],
|
511 |
+
"source": [
|
512 |
+
"config = {\n",
|
513 |
+
" \"max_new_tokens\": 2048,\n",
|
514 |
+
" \"repetition_penalty\": 1.1,\n",
|
515 |
+
" \"temperature\": 1,\n",
|
516 |
+
" \"top_k\": 50,\n",
|
517 |
+
" \"top_p\": 0.9,\n",
|
518 |
+
" \"stream\": True,\n",
|
519 |
+
" \"context_length\": 4096,\n",
|
520 |
+
" \"gpu_layers\": 0,\n",
|
521 |
+
" \"threads\": int(os.cpu_count() / 2),\n",
|
522 |
+
"}"
|
523 |
+
]
|
524 |
+
},
|
525 |
+
{
|
526 |
+
"cell_type": "code",
|
527 |
+
"execution_count": 50,
|
528 |
+
"metadata": {},
|
529 |
+
"outputs": [
|
530 |
+
{
|
531 |
+
"data": {
|
532 |
+
"application/vnd.jupyter.widget-view+json": {
|
533 |
+
"model_id": "c0281307720f46be8386fb08c0d655ad",
|
534 |
+
"version_major": 2,
|
535 |
+
"version_minor": 0
|
536 |
+
},
|
537 |
+
"text/plain": [
|
538 |
+
"Fetching 1 files: 0%| | 0/1 [00:00<?, ?it/s]"
|
539 |
+
]
|
540 |
+
},
|
541 |
+
"metadata": {},
|
542 |
+
"output_type": "display_data"
|
543 |
+
},
|
544 |
+
{
|
545 |
+
"data": {
|
546 |
+
"application/vnd.jupyter.widget-view+json": {
|
547 |
+
"model_id": "2974900a3e474614b538b006079881fd",
|
548 |
+
"version_major": 2,
|
549 |
+
"version_minor": 0
|
550 |
+
},
|
551 |
+
"text/plain": [
|
552 |
+
"Fetching 1 files: 0%| | 0/1 [00:00<?, ?it/s]"
|
553 |
+
]
|
554 |
+
},
|
555 |
+
"metadata": {},
|
556 |
+
"output_type": "display_data"
|
557 |
+
}
|
558 |
+
],
|
559 |
+
"source": [
|
560 |
+
"llm = CTransformers(\n",
|
561 |
+
" model=model_id, model_file=model_file, model_type=model_type, config=config, lib=\"avx2\"\n",
|
562 |
+
")"
|
563 |
+
]
|
564 |
+
},
|
565 |
+
{
|
566 |
+
"cell_type": "markdown",
|
567 |
+
"metadata": {},
|
568 |
+
"source": [
|
569 |
+
"### Prompting"
|
570 |
+
]
|
571 |
+
},
|
572 |
+
{
|
573 |
+
"cell_type": "code",
|
574 |
+
"execution_count": 51,
|
575 |
+
"metadata": {},
|
576 |
+
"outputs": [],
|
577 |
+
"source": [
|
578 |
+
"template = \"\"\"You are a helpful AI Assistant that follows instructions extremely well.\n",
|
579 |
+
"Use the following context to answer user question.\n",
|
580 |
+
"\n",
|
581 |
+
"Think step by step before answering the question. \n",
|
582 |
+
"You will get a $100 tip if you provide correct answer.\n",
|
583 |
+
"\n",
|
584 |
+
"Context: {context}\n",
|
585 |
+
"\n",
|
586 |
+
"Question: {question}\n",
|
587 |
+
"\n",
|
588 |
+
"Answer: Let's think step by step and answer it faithfully.\n",
|
589 |
+
"\"\"\""
|
590 |
+
]
|
591 |
+
},
|
592 |
+
{
|
593 |
+
"cell_type": "code",
|
594 |
+
"execution_count": 52,
|
595 |
+
"metadata": {},
|
596 |
+
"outputs": [],
|
597 |
+
"source": [
|
598 |
+
"prompt = ChatPromptTemplate.from_template(template)\n",
|
599 |
+
"output_parser = StrOutputParser()"
|
600 |
+
]
|
601 |
+
},
|
602 |
+
{
|
603 |
+
"cell_type": "code",
|
604 |
+
"execution_count": 53,
|
605 |
+
"metadata": {},
|
606 |
+
"outputs": [],
|
607 |
+
"source": [
|
608 |
+
"chain = (\n",
|
609 |
+
" {\"context\": ensemble_retriever, \"question\": RunnablePassthrough()}\n",
|
610 |
+
" | prompt\n",
|
611 |
+
" | llm\n",
|
612 |
+
" | output_parser\n",
|
613 |
+
")"
|
614 |
+
]
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"cell_type": "code",
|
618 |
+
"execution_count": 54,
|
619 |
+
"metadata": {},
|
620 |
+
"outputs": [
|
621 |
+
{
|
622 |
+
"name": "stdout",
|
623 |
+
"output_type": "stream",
|
624 |
+
"text": [
|
625 |
+
"\n",
|
626 |
+
"Instruction tuning is a technique that allows pre-trained language models to learn from input (natural language descriptions of the task) and response pairs, for example, \"{\\\"instruction\\\": \\\"Arrange the words in the given sentence to form a grammatically\\ncorrect sentence.\\\", \\\"input\\\": \\\"the quickly brown fox jumped\\\", \\\"output\\\": \\\"the brown\\nfox jumped quickly\\\"} .\". It is commonly used for both language-only and multimodal tasks, such as image captioning and visual question answering. In recent times, many works have adopted instruction tuning to train smaller language models with outputs generated from large foundation models like GPT family. However, these approaches face several challenges, including limited task diversity, query complexity, and small-scale training data that understate the benefits of such methods. The Orca model presented in this thesis addresses these limitations by combining self-supervised learning, reinforcement learning, and instruction tuning to achieve competitive performance on multiple zero-shot benchmarks, reducing the gap with proprietary LLMs like ChatGPT and GPT-4.\n"
|
627 |
+
]
|
628 |
+
}
|
629 |
+
],
|
630 |
+
"source": [
|
631 |
+
"print(chain.invoke(\"What is instruction tuning?\"))"
|
632 |
+
]
|
633 |
+
},
|
634 |
+
{
|
635 |
+
"cell_type": "code",
|
636 |
+
"execution_count": null,
|
637 |
+
"metadata": {},
|
638 |
+
"outputs": [],
|
639 |
+
"source": []
|
640 |
+
}
|
641 |
+
],
|
642 |
+
"metadata": {
|
643 |
+
"kernelspec": {
|
644 |
+
"display_name": "Python 3",
|
645 |
+
"language": "python",
|
646 |
+
"name": "python3"
|
647 |
+
},
|
648 |
+
"language_info": {
|
649 |
+
"codemirror_mode": {
|
650 |
+
"name": "ipython",
|
651 |
+
"version": 3
|
652 |
+
},
|
653 |
+
"file_extension": ".py",
|
654 |
+
"mimetype": "text/x-python",
|
655 |
+
"name": "python",
|
656 |
+
"nbconvert_exporter": "python",
|
657 |
+
"pygments_lexer": "ipython3",
|
658 |
+
"version": "3.9.0"
|
659 |
+
}
|
660 |
+
},
|
661 |
+
"nbformat": 4,
|
662 |
+
"nbformat_minor": 2
|
663 |
+
}
|
notebooks/reference/YT_Mistral_7B_Zephyr_ɒ_Testing.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/reference/zephyr_7b_beta.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.2.1
|
2 |
+
langchain==0.1.9
|
3 |
+
langchain-community==0.0.24
|
4 |
+
ctransformers==0.2.27
|
5 |
+
streamlit==1.31.1
|
6 |
+
streamlit-extras==0.4.0
|
7 |
+
tiktoken==0.6.0
|
8 |
+
langchain==0.1.9
|
9 |
+
rank_bm25==0.2.2
|
10 |
+
pypdf==4.0.2
|
11 |
+
chromadb==0.4.24
|
requirements_local.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ipykernel
|
2 |
+
ipywidgets
|
3 |
+
torch==2.2.1
|
4 |
+
langchain==0.1.9
|
5 |
+
langchain-community==0.0.24
|
6 |
+
ctransformers==0.2.27
|
7 |
+
streamlit==1.31.1
|
8 |
+
streamlit-extras==0.4.0
|
9 |
+
tiktoken==0.6.0
|
10 |
+
langchain==0.1.9
|
11 |
+
rank_bm25==0.2.2
|
12 |
+
pypdf==4.0.2
|
13 |
+
chromadb==0.4.24
|
runtime.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python-3.9.0
|
src/.gitkeep
ADDED
File without changes
|