from dotenv import load_dotenv import io import streamlit as st import streamlit.components.v1 as components import base64 from langchain.prompts import PromptTemplate from langchain_core.output_parsers import PydanticOutputParser from langchain_anthropic import ChatAnthropic from langchain_openai import ChatOpenAI from langchain_groq import ChatGroq from langchain_google_genai import ChatGoogleGenerativeAI from langchain_core.exceptions import OutputParserException from pydantic import ValidationError from langchain_core.pydantic_v1 import BaseModel, Field from resume_template import Resume from json import JSONDecodeError import PyPDF2 import json import time import os # Set the LANGCHAIN_TRACING_V2 environment variable to 'true' os.environ['LANGCHAIN_TRACING_V2'] = 'true' # Set the LANGCHAIN_PROJECT environment variable to the desired project name os.environ['LANGCHAIN_PROJECT'] = 'Resume_Project' load_dotenv() llm_dict = { "GPT 3.5 turbo": ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-0125"), "GPT 4o": ChatOpenAI(temperature=0, model_name="gpt-4o"), "Anthropic 3.5 Sonnet": ChatAnthropic(model="claude-3-5-sonnet-20240620"), "Llama 3 8b": ChatGroq(model_name="llama3-8b-8192"), "Llama 3 70b": ChatGroq(model_name="llama3-70b-8192"), "Gemma 7b": ChatGroq(model_name="gemma-7b-it"), "Mixtral 8x7b": ChatGroq(model_name="mixtral-8x7b-32768"), "Gemini 1.5 Pro": ChatGoogleGenerativeAI(model="gemini-1.5-pro"), "Gemini 1.5 Flash": ChatGoogleGenerativeAI(model="gemini-1.5-flash"), } def pdf_to_string(file): """ Convert a PDF file to a string. Parameters: file (io.BytesIO): A file-like object representing the PDF file. Returns: str: The extracted text from the PDF. """ pdf_reader = PyPDF2.PdfReader(file) num_pages = len(pdf_reader.pages) text = '' for i in range(num_pages): page = pdf_reader.pages[i] text += page.extract_text() file.close() return text class CustomOutputParserException(Exception): pass def extract_resume_fields(full_text, model): """ Analyze a resume text and extract structured information using a specified language model. Parameters: full_text (str): The text content of the resume. model (str): The language model object to use for processing the text. Returns: dict: A dictionary containing structured information extracted from the resume. """ # The Resume object is imported from the local resume_template file with open("prompts/resume_extraction.prompt", "r") as f: template = f.read() parser = PydanticOutputParser(pydantic_object=Resume) prompt_template = PromptTemplate( template=template, input_variables=["resume"], partial_variables={"response_template": parser.get_format_instructions()}, ) llm = llm_dict.get(model, ChatOpenAI(temperature=0, model=model)) chain = prompt_template | llm | parser max_attempts = 3 attempt = 1 while attempt <= max_attempts: try: output = chain.invoke(full_text) print(output) return output except (CustomOutputParserException, ValidationError) as e: if attempt == max_attempts: raise e else: print(f"Parsing error occurred. Retrying (attempt {attempt + 1}/{max_attempts})...") attempt += 1 return None def display_extracted_fields(obj, section_title=None, indent=0): if section_title: st.subheader(section_title) for field_name, field_value in obj: if field_name in ["personal_details", "education", "work_experience", "projects", "skills", "certifications", "publications", "awards", "additional_sections"]: st.write(" " * indent + f"**{field_name.replace('_', ' ').title()}**:") if isinstance(field_value, BaseModel): display_extracted_fields(field_value, None, indent + 1) elif isinstance(field_value, list): for item in field_value: if isinstance(item, BaseModel): display_extracted_fields(item, None, indent + 1) else: st.write(" " * (indent + 1) + "- " + str(item)) else: st.write(" " * (indent + 1) + str(field_value)) else: st.write(" " * indent + f"{field_name.replace('_', ' ').title()}: " + str(field_value)) def get_json_download_link(json_str, download_name): # Convert the JSON string back to a dictionary data = json.loads(json_str) # Convert the dictionary back to a JSON string with 4 spaces indentation json_str_formatted = json.dumps(data, indent=4) b64 = base64.b64encode(json_str_formatted.encode()).decode() href = f'Click here to download the JSON file' return href