Asaad Almutareb
fetch vectorstore
eb3ce64
raw
history blame
3.63 kB
from langchain.tools import BaseTool, StructuredTool, tool
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
#from langchain.tools import Tool
from langchain_google_community import GoogleSearchAPIWrapper
from langchain_community.embeddings.sentence_transformer import (
SentenceTransformerEmbeddings,
)
from langchain_community.vectorstores import Chroma
import ast
from rag_app.loading_data.load_S3_vector_stores import get_chroma_vs
import chromadb
from rag_app.utils.utils import (
parse_list_to_dicts, format_search_results
)
from rag_app.database.db_handler import (
add_many
)
import os
# from innovation_pathfinder_ai.utils import create_wikipedia_urls_from_text
persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
embedding_model = os.getenv("EMBEDDING_MODEL")
if not os.path.exists(persist_directory):
get_chroma_vs()
@tool
def memory_search(query:str) -> str:
"""Search the memory vector store for existing knowledge and relevent pervious researches. \
This is your primary source to start your search with checking what you already have learned from the past, before going online."""
# Since we have more than one collections we should change the name of this tool
client = chromadb.PersistentClient(
path=persist_directory,
)
collection_name = os.getenv('CONVERSATION_COLLECTION_NAME')
#store using envar
embedding_function = SentenceTransformerEmbeddings(
model_name=embedding_model,
)
vector_db = Chroma(
client=client, # client for Chroma
collection_name=collection_name,
embedding_function=embedding_function,
)
retriever = vector_db.as_retriever()
docs = retriever.invoke(query)
return docs.__str__()
@tool
def knowledgeBase_search(query:str) -> str:
"""Suche die interne Datenbank nach passenden Versicherungsprodukten und Informationen zu den Versicherungen"""
# Since we have more than one collections we should change the name of this tool
# client = chromadb.PersistentClient(
# path=persist_directory,
# )
#collection_name="ArxivPapers"
#store using envar
embedding_function = SentenceTransformerEmbeddings(
model_name=embedding_model
)
# vector_db = Chroma(
# client=client, # client for Chroma
# #collection_name=collection_name,
# embedding_function=embedding_function,
# )
vector_db = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)
retriever = vector_db.as_retriever(search_type="mmr", search_kwargs={'k':5, 'fetch_k':10})
# This is deprecated, changed to invoke
# LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.
docs = retriever.invoke(query)
for doc in docs:
print(doc)
return docs.__str__()
@tool
def google_search(query: str) -> str:
"""Verbessere die Ergebnisse durch eine Suche über die Webseite der Versicherung. Erstelle eine neue Suchanfrage, um die Erfolgschancen zu verbesseren."""
websearch = GoogleSearchAPIWrapper()
search_results:dict = websearch.results(query, 3)
print(search_results)
if len(search_results)>1:
cleaner_sources =format_search_results(search_results)
parsed_csources = parse_list_to_dicts(cleaner_sources)
add_many(parsed_csources)
else:
cleaner_sources = search_results
return cleaner_sources.__str__()