Spaces:
Runtime error
Runtime error
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_community.embeddings import BedrockEmbeddings | |
from langchain_aws import ChatBedrock | |
from langchain_community.vectorstores import Chroma | |
#Las variables de ambiente AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_DEFAULT_REGION | |
#se deben configurar en la línea de comando del sistema operativo | |
def initLLM(): | |
return ChatBedrock(model_id="anthropic.claude-3-sonnet-20240229-v1:0") | |
def initEmbedder(): | |
return BedrockEmbeddings(model_id='amazon.titan-embed-text-v1') | |
def initChromaDB(document_chunks,embbeder): | |
return Chroma.from_documents(document_chunks,embedding=embbeder, persist_directory='./data') | |
def embedding(thePathFile,embedder): | |
#cargar el archivo PDF | |
loader = PyPDFLoader(thePathFile) | |
pages = loader.load() | |
print(len(pages)) | |
#hacer chunk de 500 caracteres | |
document_splitter=CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=100) | |
document_chunks=document_splitter.split_documents(pages) | |
print(len(document_chunks)) | |
print(embedder) | |
if embedder is not None: | |
print("Cargando a la base vectorial...") | |
vectorDB=initChromaDB(document_chunks, embedder) | |
print("Fin carga") | |
return vectorDB | |
# Ejecutar la aplicación | |
if __name__ == "__main__": | |
bedrock_llm=initLLM() | |
bedrock_embedder=initEmbedder() | |
chromaDB=embedding("el principito.pdf",bedrock_embedder) | |
print(chromaDB) |