Spaces:

Technocoloredgeek
/

AIE4_Midterm_V1

Sleeping

App Files Files Community

AIE4_Midterm_V1 / app.py

Technocoloredgeek

Update app.py

c8181a4 verified about 2 months ago

raw

history blame contribute delete

4.58 kB

	import streamlit as st
	import os
	from langchain_community.document_loaders import PyMuPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_openai import OpenAIEmbeddings, ChatOpenAI
	from langchain_qdrant import QdrantVectorStore
	from langchain.prompts import ChatPromptTemplate
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough
	from qdrant_client import QdrantClient
	from qdrant_client.http.models import Distance, VectorParams
	from operator import itemgetter
	from langchain_community.embeddings import HuggingFaceEmbeddings # Add this line

	from sentence_transformers import SentenceTransformer

	model = SentenceTransformer("Technocoloredgeek/midterm-finetuned-embedding")

	# Set up API keys
	os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]

	# PDF links
	pdf_links = [
	"https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf",
	"https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf"
	]

	@st.cache_resource
	def load_and_process_pdfs(pdf_links):
	documents = []
	for link in pdf_links:
	loader = PyMuPDFLoader(file_path=link)
	documents.extend(loader.load())

	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=500,
	chunk_overlap=40,
	length_function=len,
	is_separator_regex=False
	)

	return text_splitter.split_documents(documents)

	@st.cache_resource
	def setup_vectorstore():
	LOCATION = ":memory:"
	COLLECTION_NAME = "AI_Ethics_Framework"

	qdrant_client = QdrantClient(location=LOCATION)

	# Use your SentenceTransformer model for embeddings
	embeddings = HuggingFaceEmbeddings(model_name="Technocoloredgeek/midterm-finetuned-embedding")

	# Get the vector size from the embeddings
	VECTOR_SIZE = len(embeddings.embed_query("test"))

	# Create the collection
	qdrant_client.create_collection(
	collection_name=COLLECTION_NAME,
	vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE),
	)

	# Create the vector store with the new embeddings
	qdrant_vector_store = QdrantVectorStore(
	client=qdrant_client,
	collection_name=COLLECTION_NAME,
	embedding=embeddings
	)

	# Load and add documents
	documents = load_and_process_pdfs(pdf_links)
	qdrant_vector_store.add_documents(documents)

	return qdrant_vector_store

	@st.cache_resource
	def create_rag_pipeline(_vector_store):
	retriever = _vector_store.as_retriever()

	template = """
	You are an expert AI assistant with deep knowledge of business, technology, and entrepreneurship. Your task is to provide accurate, insightful answers based solely on the given context. Follow these guidelines:
	1. Analyze the question carefully to understand the core information being sought.
	2. Thoroughly examine the provided context, identifying key relevant information.
	3. Formulate a clear, concise answer that directly addresses the question.
	4. Use specific details and examples from the context to support your answer.
	5. If the context doesn't contain sufficient information to fully answer the question, state this clearly and say,'I don't know'.
	6. Do not introduce any information not present in the context.
	7. If asked for an opinion or recommendation, base it strictly on insights from the context.
	8. Use a confident, authoritative tone while maintaining accuracy.
	9. If you cannot provide a clear answer to the question, reply with "I don't know".
	Question:
	{question}
	Context:
	{context}
	Answer:
	"""

	prompt = ChatPromptTemplate.from_template(template)
	primary_qa_llm = ChatOpenAI(model_name="gpt-4", temperature=0)

	retrieval_augmented_qa_chain = (
	{"context": itemgetter("question") \| retriever, "question": itemgetter("question")}
	\| RunnablePassthrough.assign(context=itemgetter("context"))
	\| {"response": prompt \| primary_qa_llm, "context": itemgetter("context")}
	)

	return retrieval_augmented_qa_chain

	# Streamlit UI
	st.title("Ask About AI Ethics!")

	vector_store = setup_vectorstore()
	rag_pipeline = create_rag_pipeline(vector_store)

	user_query = st.text_input("Enter your question about AI Ethics:")

	if user_query:
	with st.spinner("Generating response..."):
	result = rag_pipeline.invoke({"question": user_query})

	st.write("Response:")
	st.write(result["response"].content)

	st.write("Context Used:")
	st.write(result["context"])