import os
import streamlit as st
from typing import List
from dotenv import load_dotenv
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
import tempfile

# Load environment variables
load_dotenv()

# Initialize OpenAI API key
openai_api_key = os.getenv('OPENAI_API_KEY')

# Initialize embedding model using OpenAI
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key, model="text-embedding-3-small")

# Initialize vector store
vector_store = None

# Store PDF file paths
pdf_files = {}

# Define the path for the FAISS index
FAISS_INDEX_PATH = "faiss_index"
FAISS_INDEX_FILE = os.path.join(FAISS_INDEX_PATH, "index.faiss")

@st.cache_resource
def process_pdf(uploaded_file):
    """Process the uploaded PDF and add it to the vector store."""
    global vector_store, pdf_files
    
    # Create a temporary file to store the uploaded PDF
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
        tmp_file.write(uploaded_file.getvalue())
        tmp_file_path = tmp_file.name

    loader = PyPDFLoader(tmp_file_path)
    documents = loader.load()
    pdf_files[uploaded_file.name] = tmp_file_path

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(documents)

    if vector_store is None:
        vector_store = FAISS.from_documents(texts, embeddings)
    else:
        vector_store.add_documents(texts)

    # Save the updated vector store
    if not os.path.exists(FAISS_INDEX_PATH):
        os.makedirs(FAISS_INDEX_PATH)
    vector_store.save_local(FAISS_INDEX_PATH)

    # Clean up the temporary file
    os.unlink(tmp_file_path)

def main():
    st.title("PDF Question Answering System")

    # File uploader
    uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

    if uploaded_file is not None:
        process_pdf(uploaded_file)
        st.success(f"PDF '{uploaded_file.name}' processed. You can now ask questions!")

    # User input
    user_question = st.text_input("Ask a question about the PDFs:")

    if user_question:
        if vector_store is None:
            st.error("Error: No PDFs have been uploaded yet.")
            return

        retriever = vector_store.as_retriever(search_kwargs={"k": 3})

        # Initialize the OpenAI language model
        llm = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4o-mini", temperature=0)

        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever,
            return_source_documents=True
        )

        result = qa_chain(user_question)
        answer = result['result']
        source_docs = result['source_documents']

        st.write("Answer:", answer)

        if source_docs:
            st.subheader("Sources:")
            unique_sources = set()
            for doc in source_docs:
                file_name = os.path.basename(doc.metadata['source'])
                if file_name in pdf_files and file_name not in unique_sources:
                    unique_sources.add(file_name)
                    file_path = pdf_files[file_name]
                    st.write(f"Source: {file_name}")
                    with open(file_path, "rb") as file:
                        st.download_button(
                            label=f"Download {file_name}",
                            data=file,
                            file_name=file_name,
                            mime="application/pdf"
                        )

            other_sources = [doc.metadata['source'] for doc in source_docs if os.path.basename(doc.metadata['source']) not in pdf_files]
            unique_other_sources = set(other_sources)
            if unique_other_sources:
                st.subheader("Other Sources:")
                for source in unique_other_sources:
                    st.write(f"- {source}")

if __name__ == "__main__":
    main()