Spaces:

LeoWalker
/

ResumeParser

Running

App Files Files Community

LeoWalker commited on Apr 30

Commit

828df56

•

1 Parent(s): 024bd40

updated streamlit app and Resume object with newer libraries, updated requirements.txt accordingly. works with openai and anthropic

Browse files

Files changed (3) hide show

app.py +55 -45
requirements.txt +82 -10
resume_template.py +3 -3

app.py CHANGED Viewed

@@ -2,15 +2,22 @@ from dotenv import load_dotenv
 import io
 import streamlit as st
 from langchain.prompts import PromptTemplate
-from langchain.output_parsers import PydanticOutputParser
-from langchain_community.chat_models import ChatAnthropic
 from langchain_openai import ChatOpenAI
 from pydantic import ValidationError
 from resume_template import Resume
 from json import JSONDecodeError
 import PyPDF2
 import json
 import time
 load_dotenv()
@@ -33,14 +40,13 @@ def pdf_to_string(file):
     file.close()
     return text
 def extract_resume_fields(full_text, model):
     """
     Analyze a resume text and extract structured information using a specified language model.
     Parameters:
     full_text (str): The text content of the resume.
     model (str): The language model object to use for processing the text.
     Returns:
     dict: A dictionary containing structured information extracted from the resume.
     """
@@ -57,71 +63,75 @@ def extract_resume_fields(full_text, model):
         partial_variables={"response_template": parser.get_format_instructions()},
     )
     # Invoke the language model and process the resume
-    formatted_input = prompt_template.format_prompt(resume=full_text)
     llm = llm_dict.get(model, ChatOpenAI(temperature=0, model=model))
     # print("llm", llm)
-    output = llm.invoke(formatted_input.to_string())
     # print(output)  # Print the output object for debugging
-    try:
-        parsed_output = parser.parse(output.content)
-        json_output = parsed_output.json()
-        print(json_output)
-        return json_output
-    except ValidationError as e:
-        print(f"Validation error: {e}")
-        print(output)
-        return output.content
-    except JSONDecodeError as e:
-        print(f"JSONDecodeError error: {e}")
-        print(output)
-        return output.content
 st.title("Resume Parser")
-# Set up the LLM dictionary
 llm_dict = {
-    # "gpt-4-1106-preview": ChatOpenAI(temperature=0, model="gpt-4-1106-preview"),
-    # "gpt-4": ChatOpenAI(temperature=0, model="gpt-4"),
-    "gpt-3.5-turbo-1106": ChatOpenAI(temperature=0, model="gpt-3.5-turbo-1106"),
-    # "claude-2": ChatAnthropic(model="claude-2", max_tokens=20_000),
-    "claude-instant-1": ChatAnthropic(model="claude-instant-1", max_tokens=20_000)
 }
-# Add a Streamlit dropdown menu for model selection
 selected_model = st.selectbox("Select a model", list(llm_dict.keys()))
-# Add a file uploader
 uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
-# Check if a file is uploaded
 if uploaded_file is not None:
-    # Add a button to trigger the conversion
     if st.button("Convert PDF to Text"):
-        start_time = time.time()  # Start the timer
-        # Convert the uploaded file to a string
         text = pdf_to_string(uploaded_file)
-        # Extract resume fields using the selected model
         extracted_fields = extract_resume_fields(text, selected_model)
-        end_time = time.time()  # Stop the timer
-        elapsed_time = end_time - start_time  # Calculate the elapsed time
-        # Display the elapsed time
         st.write(f"Extraction completed in {elapsed_time:.2f} seconds")
-        # # Display the extracted fields on the Streamlit app
-        # st.json(extracted_fields)
-        # If extracted_fields is a JSON string, convert it to a dictionary
-        if isinstance(extracted_fields, str):
-            extracted_fields = json.loads(extracted_fields)
-        for key, value in extracted_fields.items():
-            st.write(f"{key}: {value}")

 import io
 import streamlit as st
 from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import PydanticOutputParser
+from langchain_anthropic import ChatAnthropic
 from langchain_openai import ChatOpenAI
 from pydantic import ValidationError
+from langchain_core.pydantic_v1 import BaseModel, Field
 from resume_template import Resume
 from json import JSONDecodeError
 import PyPDF2
 import json
 import time
+import os
+# Set the LANGCHAIN_TRACING_V2 environment variable to 'true'
+os.environ['LANGCHAIN_TRACING_V2'] = 'true'
+# Set the LANGCHAIN_PROJECT environment variable to the desired project name
+os.environ['LANGCHAIN_PROJECT'] = 'Resume_Project'
 load_dotenv()
     file.close()
     return text
 def extract_resume_fields(full_text, model):
     """
     Analyze a resume text and extract structured information using a specified language model.
     Parameters:
     full_text (str): The text content of the resume.
     model (str): The language model object to use for processing the text.
     Returns:
     dict: A dictionary containing structured information extracted from the resume.
     """
         partial_variables={"response_template": parser.get_format_instructions()},
     )
     # Invoke the language model and process the resume
+    # formatted_input = prompt_template.format_prompt(resume=full_text)
     llm = llm_dict.get(model, ChatOpenAI(temperature=0, model=model))
     # print("llm", llm)
+    # output = llm.invoke(formatted_input.to_string())
+    chain = prompt_template | llm | parser
+    output = chain.invoke(full_text)
     # print(output)  # Print the output object for debugging
+    print(output)
+    return output
+    # try:
+    #     parsed_output = parser.parse(output.content)
+    #     json_output = parsed_output.json()
+    #     print(json_output)
+    #     return json_output
+    # except ValidationError as e:
+    #     print(f"Validation error: {e}")
+    #     print(output)
+    #     return output.content
+    # except JSONDecodeError as e:
+    #     print(f"JSONDecodeError error: {e}")
+    #     print(output)
+    #     return output.content
+def display_extracted_fields(obj, section_title=None, indent=0):
+    if section_title:
+        st.subheader(section_title)
+    for field_name, field_value in obj:
+        if isinstance(field_value, BaseModel):
+            display_extracted_fields(field_value, field_name, indent + 1)
+        elif isinstance(field_value, list):
+            st.write(" " * indent + field_name + ":")
+            for item in field_value:
+                if isinstance(item, BaseModel):
+                    display_extracted_fields(item, None, indent + 1)
+                else:
+                    st.write(" " * (indent + 1) + "- " + str(item))
+        else:
+            st.write(" " * indent + field_name + ": " + str(field_value))
 st.title("Resume Parser")
 llm_dict = {
+    "gpt-3.5-turbo": ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"),
+    "sonnet": ChatAnthropic(model_name="claude-3-sonnet-20240229"),
 }
 selected_model = st.selectbox("Select a model", list(llm_dict.keys()))
 uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
 if uploaded_file is not None:
     if st.button("Convert PDF to Text"):
+        start_time = time.time()
         text = pdf_to_string(uploaded_file)
         extracted_fields = extract_resume_fields(text, selected_model)
+        end_time = time.time()
+        elapsed_time = end_time - start_time
         st.write(f"Extraction completed in {elapsed_time:.2f} seconds")
+        display_extracted_fields(extracted_fields, "Extracted Resume Fields")
+        # for key, value in extracted_fields.items():
+        #     st.write(f"{key}: {value}")

requirements.txt CHANGED Viewed

@@ -1,10 +1,82 @@
-streamlit
-python-dotenv
-pydantic
-PyPDF2
-openai
-anthropic
-langchain
-langchain-community
-langchain_openai

+aiohttp==3.9.5
+aiosignal==1.3.1
+altair==5.3.0
+annotated-types==0.6.0
+anthropic==0.25.7
+anyio==4.3.0
+attrs==23.2.0
+blinker==1.8.1
+cachetools==5.3.3
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+dataclasses-json==0.6.5
+defusedxml==0.7.1
+distro==1.9.0
+filelock==3.14.0
+frozenlist==1.4.1
+fsspec==2024.3.1
+gitdb==4.0.11
+GitPython==3.1.43
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.0
+huggingface-hub==0.22.2
+idna==3.7
+Jinja2==3.1.3
+jsonpatch==1.33
+jsonpointer==2.4
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+langchain==0.1.16
+langchain-anthropic==0.1.11
+langchain-community==0.0.34
+langchain-core==0.1.46
+langchain-openai==0.1.4
+langchain-text-splitters==0.0.1
+langsmith==0.1.52
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.21.1
+mdurl==0.1.2
+multidict==6.0.5
+mypy-extensions==1.0.0
+numpy==1.26.4
+openai==1.24.0
+orjson==3.10.1
+packaging==23.2
+pandas==2.2.2
+pillow==10.3.0
+protobuf==4.25.3
+pyarrow==16.0.0
+pydantic==2.7.1
+pydantic_core==2.18.2
+pydeck==0.9.0
+Pygments==2.17.2
+PyPDF2==3.0.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.35.0
+regex==2024.4.28
+requests==2.31.0
+rich==13.7.1
+rpds-py==0.18.0
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+SQLAlchemy==2.0.29
+streamlit==1.33.0
+tenacity==8.2.3
+tiktoken==0.6.0
+tokenizers==0.19.1
+toml==0.10.2
+toolz==0.12.1
+tornado==6.4
+tqdm==4.66.2
+typing-inspect==0.9.0
+typing_extensions==4.11.0
+tzdata==2024.1
+urllib3==2.2.1
+yarl==1.9.4

resume_template.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from pydantic import BaseModel, Field, ValidationError
 from typing import List, Optional, Dict
 # The following classes are for the resume template
@@ -9,8 +9,8 @@ class ContactInfo(BaseModel):
     linkedin: Optional[str] = None
 class PersonalDetails(BaseModel):
-    full_name: str
-    contact_info: ContactInfo
     professional_summary: Optional[str] = None
 class Education(BaseModel):

 from typing import List, Optional, Dict
+from langchain_core.pydantic_v1 import BaseModel, Field
 # The following classes are for the resume template
     linkedin: Optional[str] = None
 class PersonalDetails(BaseModel):
+    full_name: str = None
+    contact_info: ContactInfo
     professional_summary: Optional[str] = None
 class Education(BaseModel):