import streamlit as st import os from langchain_groq import ChatGroq from langchain_community.document_loaders import WebBaseLoader from langchain_community.embeddings import OllamaEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate from langchain.chains import create_retrieval_chain from langchain_community.vectorstores import FAISS from langchain_community.document_loaders import PyPDFLoader from langchain_community.document_loaders import PyPDFDirectoryLoader from langchain_community.embeddings import HuggingFaceBgeEmbeddings # from langchain.vectorstores.cassandra import Cassandra from langchain_community.vectorstores import Cassandra from langchain_community.llms import Ollama from cassandra.auth import PlainTextAuthProvider import tempfile import cassio from PyPDF2 import PdfReader from cassandra.cluster import Cluster import warnings # from langchain.vectorstores import Pinecone from langchain_community.vectorstores import Pinecone import pinecone from pinecone import Pinecone, ServerlessSpec from langchain_pinecone import PineconeVectorStore warnings.filterwarnings("ignore") from dotenv import load_dotenv import time load_dotenv() ASTRA_DB_SECURE_BUNDLE_PATH ="G:/GENAI/Medical_chat_bot/src/secure-connect-medical-bot.zip" LANGCHAIN_TRACING_V2="true" LANGCHAIN_API_KEY=os.getenv('LANGCHAIN_API_KEY') LANGCHAIN_PROJECT="Medical_chatbot" LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" def doc_loader(pdf_reader): # print('im from doc_loc fn') encode_kwargs = {'normalize_embeddings': True} huggigface_embeddings=HuggingFaceBgeEmbeddings( model_name='BAAI/bge-small-en-v1.5', # model_name='sentence-transformers/all-MiniLM-16-v2', model_kwargs={'device':'cpu'}, encode_kwargs=encode_kwargs) loader=PyPDFLoader(pdf_reader) documents=loader.load_and_split() # print('iam after documents loader called') text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200) final_documents=text_splitter.split_documents(documents) # print('iam after final_documents called',final_documents) # os.environ['PINECONE_API_KEY'] = os.getenv('PINECONE_API_KEY') os.environ['PINECONE_API_ENV'] = "pdf_query_db" pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY')) index = pc.Index("pdf-query-index") namespace = "pdf_query_medical" def namespace_exists(index, namespace): try: stats = index.describe_index_stats() return namespace in stats['namespaces'] except pinecone.core.client.exceptions.NotFoundException: return False if namespace_exists(index, namespace): print(f"Namespace '{namespace}' exist.") pinecone_vector_store = PineconeVectorStore(embedding=huggigface_embeddings,index_name="pdf-query-index", namespace=namespace) # pinecone_vector_store = index.query(f"SELECT * FROM {namespace}") # return pinecone_vector_store else: print(f"Namespace '{namespace}' does not exist. It will be created upon upsertion.") pinecone_vector_store=PineconeVectorStore(embedding=huggigface_embeddings,index_name="pdf-query-index",namespace=namespace) pinecone_vector_store.add_documents(final_documents) return pinecone_vector_store # def doc_loader(pdf_reader): # # print('im from doc_loc fn') # encode_kwargs = {'normalize_embeddings': True} # huggigface_embeddings=HuggingFaceBgeEmbeddings( # model_name='BAAI/bge-small-en-v1.5', # # model_name='sentence-transformers/all-MiniLM-16-v2', # model_kwargs={'device':'cpu'}, # encode_kwargs=encode_kwargs) # loader=PyPDFLoader(pdf_reader) # documents=loader.load_and_split() # # print('iam after documents loader called') # text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200) # final_documents=text_splitter.split_documents(documents) # # print('iam after final_documents called',final_documents) # astrasession = Cluster( # cloud={"secure_connect_bundle": ASTRA_DB_SECURE_BUNDLE_PATH}, # auth_provider=PlainTextAuthProvider("token", ASTRA_DB_APPLICATION_TOKEN), # ).connect() # check_table_query = f""" # SELECT table_name FROM system_schema.tables # WHERE keyspace_name='{ASTRA_DB_KEYSPACE}' AND table_name='{ASTRA_DB_TABLE}'; # """ # try: # result = astrasession.execute(check_table_query) # if result.one(): # return_query=f""" select * from '{ASTRA_DB_KEYSPACE}'.'{ASTRA_DB_TABLE}'; """ # astra_vector_store=astrasession.execute(return_query) # return astra_vector_store # else: # print(f"Table {ASTRA_DB_KEYSPACE}.{ASTRA_DB_TABLE} does not exist. Try to create table.") # astra_vector_store=Cassandra( # embedding=huggigface_embeddings, # table_name='medical_bot_demo', # session=astrasession, # keyspace=ASTRA_DB_KEYSPACE # ) # astra_vector_store.add_documents(final_documents) # if astra_vector_store: # print("Vector store created successfully") # return astra_vector_store # except Exception as e: # print(f"Error checking/creating keyspace: {e}")