Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
from langchain_groq import ChatGroq | |
from langchain_community.document_loaders import WebBaseLoader | |
from langchain_community.embeddings import OllamaEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain.chains import create_retrieval_chain | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain_community.document_loaders import PyPDFDirectoryLoader | |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings | |
# from langchain.vectorstores.cassandra import Cassandra | |
from langchain_community.vectorstores import Cassandra | |
from langchain_community.llms import Ollama | |
from cassandra.auth import PlainTextAuthProvider | |
import tempfile | |
import cassio | |
from PyPDF2 import PdfReader | |
from cassandra.cluster import Cluster | |
import warnings | |
# from langchain.vectorstores import Pinecone | |
from langchain_community.vectorstores import Pinecone | |
import pinecone | |
from pinecone import Pinecone, ServerlessSpec | |
from langchain_pinecone import PineconeVectorStore | |
warnings.filterwarnings("ignore") | |
from dotenv import load_dotenv | |
import time | |
load_dotenv() | |
ASTRA_DB_SECURE_BUNDLE_PATH ="G:/GENAI/Medical_chat_bot/src/secure-connect-medical-bot.zip" | |
LANGCHAIN_TRACING_V2="true" | |
LANGCHAIN_API_KEY=os.getenv('LANGCHAIN_API_KEY') | |
LANGCHAIN_PROJECT="Medical_chatbot" | |
LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" | |
def doc_loader(pdf_reader): | |
# print('im from doc_loc fn') | |
encode_kwargs = {'normalize_embeddings': True} | |
huggigface_embeddings=HuggingFaceBgeEmbeddings( | |
model_name='BAAI/bge-small-en-v1.5', | |
# model_name='sentence-transformers/all-MiniLM-16-v2', | |
model_kwargs={'device':'cpu'}, | |
encode_kwargs=encode_kwargs) | |
loader=PyPDFLoader(pdf_reader) | |
documents=loader.load_and_split() | |
# print('iam after documents loader called') | |
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200) | |
final_documents=text_splitter.split_documents(documents) | |
# print('iam after final_documents called',final_documents) | |
# os.environ['PINECONE_API_KEY'] = os.getenv('PINECONE_API_KEY') | |
os.environ['PINECONE_API_ENV'] = "pdf_query_db" | |
pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY')) | |
index = pc.Index("pdf-query-index") | |
namespace = "pdf_query_medical" | |
def namespace_exists(index, namespace): | |
try: | |
stats = index.describe_index_stats() | |
return namespace in stats['namespaces'] | |
except pinecone.core.client.exceptions.NotFoundException: | |
return False | |
if namespace_exists(index, namespace): | |
print(f"Namespace '{namespace}' exist.") | |
pinecone_vector_store = PineconeVectorStore(embedding=huggigface_embeddings,index_name="pdf-query-index", namespace=namespace) | |
# pinecone_vector_store = index.query(f"SELECT * FROM {namespace}") | |
# return pinecone_vector_store | |
else: | |
print(f"Namespace '{namespace}' does not exist. It will be created upon upsertion.") | |
pinecone_vector_store=PineconeVectorStore(embedding=huggigface_embeddings,index_name="pdf-query-index",namespace=namespace) | |
pinecone_vector_store.add_documents(final_documents) | |
return pinecone_vector_store | |
# def doc_loader(pdf_reader): | |
# # print('im from doc_loc fn') | |
# encode_kwargs = {'normalize_embeddings': True} | |
# huggigface_embeddings=HuggingFaceBgeEmbeddings( | |
# model_name='BAAI/bge-small-en-v1.5', | |
# # model_name='sentence-transformers/all-MiniLM-16-v2', | |
# model_kwargs={'device':'cpu'}, | |
# encode_kwargs=encode_kwargs) | |
# loader=PyPDFLoader(pdf_reader) | |
# documents=loader.load_and_split() | |
# # print('iam after documents loader called') | |
# text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200) | |
# final_documents=text_splitter.split_documents(documents) | |
# # print('iam after final_documents called',final_documents) | |
# astrasession = Cluster( | |
# cloud={"secure_connect_bundle": ASTRA_DB_SECURE_BUNDLE_PATH}, | |
# auth_provider=PlainTextAuthProvider("token", ASTRA_DB_APPLICATION_TOKEN), | |
# ).connect() | |
# check_table_query = f""" | |
# SELECT table_name FROM system_schema.tables | |
# WHERE keyspace_name='{ASTRA_DB_KEYSPACE}' AND table_name='{ASTRA_DB_TABLE}'; | |
# """ | |
# try: | |
# result = astrasession.execute(check_table_query) | |
# if result.one(): | |
# return_query=f""" select * from '{ASTRA_DB_KEYSPACE}'.'{ASTRA_DB_TABLE}'; """ | |
# astra_vector_store=astrasession.execute(return_query) | |
# return astra_vector_store | |
# else: | |
# print(f"Table {ASTRA_DB_KEYSPACE}.{ASTRA_DB_TABLE} does not exist. Try to create table.") | |
# astra_vector_store=Cassandra( | |
# embedding=huggigface_embeddings, | |
# table_name='medical_bot_demo', | |
# session=astrasession, | |
# keyspace=ASTRA_DB_KEYSPACE | |
# ) | |
# astra_vector_store.add_documents(final_documents) | |
# if astra_vector_store: | |
# print("Vector store created successfully") | |
# return astra_vector_store | |
# except Exception as e: | |
# print(f"Error checking/creating keyspace: {e}") | |