Spaces:

thivav
/

chat_with_pdf_using_zephyr-7b-beta

Sleeping

File size: 5,576 Bytes

e88c82c

import os
import tempfile

import streamlit as st
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.retrievers import EnsembleRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain_community.llms import CTransformers
from langchain_community.retrievers import BM25Retriever
from langchain_community.vectorstores import Chroma
from streamlit_extras.add_vertical_space import add_vertical_space


@st.cache_resource(ttl="1h")
def get_retriever(pdf_files):
    """get retriever"""

    docs = []
    temp_dir = tempfile.TemporaryDirectory()
    for pdf_file in pdf_files:
        temp_pdf_file_path = os.path.join(temp_dir.name, pdf_file.name)

        with open(temp_pdf_file_path, "wb") as f:
            f.write(pdf_file.getvalue())

        loader = PyPDFLoader(temp_pdf_file_path)
        docs.extend(loader.load())

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=1500, chunk_overlap=200
    )
    chunks = text_splitter.split_documents(docs)

    # get huggingface token from env secret
    HF_TOKEN = os.environ.get("HF_TOKEN")

    # embeddings
    embeddings = HuggingFaceInferenceAPIEmbeddings(
        api_key=HF_TOKEN,
        model_name="BAAI/bge-base-en-v1.5",
    )

    # retrieve k
    k = 5

    # vector retriever
    vector_store = Chroma.from_documents(chunks, embeddings)
    vector_retriever = vector_store.as_retriever(search_kwargs={"k": k})

    # semantic retriever
    semantic_retriever = BM25Retriever.from_documents(chunks)
    semantic_retriever.k = k

    # ensemble retriever
    ensemble_retriever = EnsembleRetriever(
        retrievers=[vector_retriever, semantic_retriever], weights=[0.5, 0.5]
    )

    return ensemble_retriever


@st.cache_resource(ttl="1h")
def initialize_llm(_retriever):
    """initialize llm"""

    # load llm model
    model_type = "mistral"
    model_id = "TheBloke/zephyr-7B-beta-GGUF"
    model_file = "zephyr-7b-beta.Q4_K_S.gguf"

    config = {
        "max_new_tokens": 2048,
        "repetition_penalty": 1.1,
        "temperature": 1,
        "top_k": 50,
        "top_p": 0.9,
        "stream": True,
        "context_length": 4096,
        "gpu_layers": 0,
        "threads": int(os.cpu_count()),
    }

    llm = CTransformers(
        model=model_id,
        model_file=model_file,
        model_type=model_type,
        config=config,
        lib="avx2",
    )

    chat_history = StreamlitChatMessageHistory()

    # init chat history memory
    memory = ConversationBufferMemory(
        memory_key="chat_history", chat_memory=chat_history, return_messages=True
    )

    chain = ConversationalRetrievalChain.from_llm(
        llm, retriever=_retriever, memory=memory, verbose=False
    )

    return chain, chat_history


def main():
    """main func"""

    st.set_page_config(
        page_title="Talk to PDF using Zephyr-7B-Beta",
        page_icon="📰",
        layout="centered",
        initial_sidebar_state="expanded",
    )

    st.header("Talk to PDF files 📰", divider="rainbow")
    st.subheader(
        "Enjoy :red[talking] with :green[PDF] files using :sunglasses: Zephyr-7B-Beta"
    )
    st.markdown(
        """
            * Used the [zephyr-7b-beta.Q4_K_S.gguf](https://huggingface.co/TheBloke/zephyr-7B-alpha-GGUF/blob/main/zephyr-7b-alpha.Q4_K_S.gguf) quantised 
            version of [Zephyr-7B Beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) model 
            from the [TheBloke/zephyr-7B-beta-GGUF](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF) repositry.
            ___
        """
    )

    st.sidebar.title("Talk to PDF 📰")
    st.sidebar.markdown(
        "[Checkout the repository](https://github.com/ThivaV/chat_with_pdf_using_zephyr-7b)"
    )
    st.sidebar.markdown(
        """
            ### This is a LLM powered chatbot, built using:
                
            * [Streamlit](https://streamlit.io)
            * [LangChain](https://python.langchain.com/)
            * [HuggingFaceH4/zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
            * [TheBloke/zephyr-7B-beta-GGUF](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF)
            * [CTransformers](https://github.com/marella/ctransformers)
            * [Embeddings](https://huggingface.co/BAAI/bge-base-en-v1.5)
            * [Chroma](https://docs.trychroma.com/?lang=py)
            ___
            """
    )

    add_vertical_space(2)

    upload_pdf_files = st.sidebar.file_uploader(
        "Upload a pdf files 📤", type="pdf", accept_multiple_files=True
    )

    if not upload_pdf_files:
        st.info("👈 :red[Please upload pdf files] ⛔")
        st.stop()

    retriever = get_retriever(upload_pdf_files)

    chain, chat_history = initialize_llm(retriever)

    # load previous chat history
    # re-draw the chat history in the chat window
    for message in chat_history.messages:
        st.chat_message(message.type).write(message.content)

    if prompt := st.chat_input("Ask questions"):
        with st.chat_message("human"):
            st.markdown(prompt)

        response = chain.invoke(prompt)

        with st.chat_message("ai"):
            st.write(response["answer"])


if __name__ == "__main__":
    # init main func
    main()