JobGPT / loaders.py
Aditya Patkar
Added cover letter generator module
6c6956f
raw
history blame
1.7 kB
'''
This module contains all the loaders
'''
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
openai_api_key=os.environ['OPENAI_API_KEY']
def load_pdf(path: str = "resume.pdf"):
'''
Load a pdf file from a stringio object
'''
pdf_loader = PyPDFLoader(path)
documents = pdf_loader.load()
return documents
def get_embeddings(documents):
'''
Get embeddings from a list of documents
'''
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = splitter.split_documents(documents)
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
return texts, embeddings
def get_db(texts, embeddings):
'''
Get a vectorstore from a list of texts and embeddings
'''
db = Chroma.from_documents(texts, embeddings)
return db
def get_retriever(db):
'''
Get a retriever from a vectorstore
'''
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":1})
return retriever
def get_chain_for_pdf(path):
'''
Get a conversation chain from a path
'''
documents = load_pdf(path)
texts, embeddings = get_embeddings(documents)
db = get_db(texts, embeddings)
retriever = get_retriever(db)
chain = RetrievalQA.from_chain_type(
llm=ChatOpenAI(temperature=0, openai_api_key=openai_api_key),
chain_type="stuff",
retriever=retriever,
return_source_documents=True)
return chain