Spaces:

LeoWalker
/

ResumeParser

Running

App Files Files Community

ResumeParser / app.py

LeoWalker

updated how to write the fields out

a7ddb9f 10 months ago

raw

history blame

4.18 kB

	from dotenv import load_dotenv
	import io
	import streamlit as st
	from langchain.prompts import PromptTemplate
	from langchain.output_parsers import PydanticOutputParser
	from langchain_community.chat_models import ChatAnthropic
	from langchain_openai import ChatOpenAI
	from pydantic import ValidationError
	from resume_template import Resume
	from json import JSONDecodeError
	import PyPDF2
	import json
	import time

	load_dotenv()

	def pdf_to_string(file):
	"""
	Convert a PDF file to a string.

	Parameters:
	file (io.BytesIO): A file-like object representing the PDF file.

	Returns:
	str: The extracted text from the PDF.
	"""
	pdf_reader = PyPDF2.PdfReader(file)
	num_pages = len(pdf_reader.pages)
	text = ''
	for i in range(num_pages):
	page = pdf_reader.pages[i]
	text += page.extract_text()
	file.close()
	return text

	def extract_resume_fields(full_text, model):
	"""
	Analyze a resume text and extract structured information using a specified language model.

	Parameters:
	full_text (str): The text content of the resume.
	model (str): The language model object to use for processing the text.

	Returns:
	dict: A dictionary containing structured information extracted from the resume.
	"""
	# The Resume object is imported from the local resume_template file

	with open("prompts/resume_extraction.prompt", "r") as f:
	template = f.read()

	parser = PydanticOutputParser(pydantic_object=Resume)

	prompt_template = PromptTemplate(
	template=template,
	input_variables=["resume"],
	partial_variables={"response_template": parser.get_format_instructions()},
	)
	# Invoke the language model and process the resume
	formatted_input = prompt_template.format_prompt(resume=full_text)
	llm = llm_dict.get(model, ChatOpenAI(temperature=0, model=model))
	# print("llm", llm)
	output = llm.invoke(formatted_input.to_string())

	# print(output) # Print the output object for debugging

	try:
	parsed_output = parser.parse(output.content)
	json_output = parsed_output.json()
	print(json_output)
	return json_output

	except ValidationError as e:
	print(f"Validation error: {e}")
	print(output)
	return output.content

	except JSONDecodeError as e:
	print(f"JSONDecodeError error: {e}")
	print(output)
	return output.content

	st.title("Resume Parser")

	# Set up the LLM dictionary
	llm_dict = {
	"gpt-4-1106-preview": ChatOpenAI(temperature=0, model="gpt-4-1106-preview"),
	"gpt-4": ChatOpenAI(temperature=0, model="gpt-4"),
	"gpt-3.5-turbo-1106": ChatOpenAI(temperature=0, model="gpt-3.5-turbo-1106"),
	"claude-2": ChatAnthropic(model="claude-2", max_tokens=20_000),
	"claude-instant-1": ChatAnthropic(model="claude-instant-1", max_tokens=20_000)
	}

	# Add a Streamlit dropdown menu for model selection
	selected_model = st.selectbox("Select a model", list(llm_dict.keys()))

	# Add a file uploader
	uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")

	# Check if a file is uploaded
	if uploaded_file is not None:
	# Add a button to trigger the conversion
	if st.button("Convert PDF to Text"):
	start_time = time.time() # Start the timer

	# Convert the uploaded file to a string
	text = pdf_to_string(uploaded_file)

	# Extract resume fields using the selected model
	extracted_fields = extract_resume_fields(text, selected_model)

	end_time = time.time() # Stop the timer
	elapsed_time = end_time - start_time # Calculate the elapsed time

	# Display the elapsed time
	st.write(f"Extraction completed in {elapsed_time:.2f} seconds")

	# # Display the extracted fields on the Streamlit app
	# st.json(extracted_fields)

	# If extracted_fields is a JSON string, convert it to a dictionary
	if isinstance(extracted_fields, str):
	extracted_fields = json.loads(extracted_fields)

	for key, value in extracted_fields.items():
	st.write(f"{key}: {value}")