LeoWalker commited on
Commit
d5e81f7
1 Parent(s): 47faf2e

app.py: Extended the available models to Llama3, Gemma, and Mistral. Still don't have gemini working.

Browse files
Files changed (1) hide show
  1. app.py +54 -36
app.py CHANGED
@@ -1,10 +1,16 @@
1
  from dotenv import load_dotenv
2
  import io
3
  import streamlit as st
 
 
 
4
  from langchain.prompts import PromptTemplate
5
  from langchain_core.output_parsers import PydanticOutputParser
6
  from langchain_anthropic import ChatAnthropic
7
  from langchain_openai import ChatOpenAI
 
 
 
8
  from pydantic import ValidationError
9
  from langchain_core.pydantic_v1 import BaseModel, Field
10
  from resume_template import Resume
@@ -13,6 +19,8 @@ import PyPDF2
13
  import json
14
  import time
15
  import os
 
 
16
  # Set the LANGCHAIN_TRACING_V2 environment variable to 'true'
17
  os.environ['LANGCHAIN_TRACING_V2'] = 'true'
18
 
@@ -73,31 +81,15 @@ def extract_resume_fields(full_text, model):
73
  output = chain.invoke(full_text)
74
  print(output)
75
  return output
76
- except ValidationError as e:
77
  if attempt == max_attempts:
78
  raise e
79
  else:
80
- print(f"Validation error occurred. Retrying (attempt {attempt + 1}/{max_attempts})...")
81
  attempt += 1
82
 
83
  return None
84
 
85
- # try:
86
- # parsed_output = parser.parse(output.content)
87
- # json_output = parsed_output.json()
88
- # print(json_output)
89
- # return json_output
90
-
91
- # except ValidationError as e:
92
- # print(f"Validation error: {e}")
93
- # print(output)
94
- # return output.content
95
-
96
- # except JSONDecodeError as e:
97
- # print(f"JSONDecodeError error: {e}")
98
- # print(output)
99
- # return output.content
100
-
101
  def display_extracted_fields(obj, section_title=None, indent=0):
102
  if section_title:
103
  st.subheader(section_title)
@@ -117,33 +109,59 @@ def display_extracted_fields(obj, section_title=None, indent=0):
117
  else:
118
  st.write(" " * indent + f"{field_name.replace('_', ' ').title()}: " + str(field_value))
119
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  st.title("Resume Parser")
122
 
123
  llm_dict = {
124
  "GPT 3.5 turbo": ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"),
125
  "Anthropic Sonnet": ChatAnthropic(model_name="claude-3-sonnet-20240229"),
 
 
 
 
126
  }
127
 
128
- selected_model = st.selectbox("Select a model", list(llm_dict.keys()))
129
 
130
  uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
 
 
 
 
 
 
 
131
 
132
  if uploaded_file is not None:
133
- if st.button("Convert PDF to Text"):
134
- start_time = time.time()
135
-
136
- text = pdf_to_string(uploaded_file)
137
-
138
- extracted_fields = extract_resume_fields(text, selected_model)
139
-
140
- end_time = time.time()
141
- elapsed_time = end_time - start_time
142
-
143
- st.write(f"Extraction completed in {elapsed_time:.2f} seconds")
144
-
145
- display_extracted_fields(extracted_fields, "Extracted Resume Fields")
146
-
147
- # for key, value in extracted_fields.items():
148
- # st.write(f"{key}: {value}")
149
-
 
 
 
 
1
  from dotenv import load_dotenv
2
  import io
3
  import streamlit as st
4
+ import streamlit.components.v1 as components
5
+ import base64
6
+
7
  from langchain.prompts import PromptTemplate
8
  from langchain_core.output_parsers import PydanticOutputParser
9
  from langchain_anthropic import ChatAnthropic
10
  from langchain_openai import ChatOpenAI
11
+ from langchain_groq import ChatGroq
12
+ from langchain_google_genai import ChatGoogleGenerativeAI
13
+ from langchain_core.exceptions import OutputParserException
14
  from pydantic import ValidationError
15
  from langchain_core.pydantic_v1 import BaseModel, Field
16
  from resume_template import Resume
 
19
  import json
20
  import time
21
  import os
22
+
23
+
24
  # Set the LANGCHAIN_TRACING_V2 environment variable to 'true'
25
  os.environ['LANGCHAIN_TRACING_V2'] = 'true'
26
 
 
81
  output = chain.invoke(full_text)
82
  print(output)
83
  return output
84
+ except (OutputParserException, ValidationError) as e:
85
  if attempt == max_attempts:
86
  raise e
87
  else:
88
+ print(f"Parsing error occurred. Retrying (attempt {attempt + 1}/{max_attempts})...")
89
  attempt += 1
90
 
91
  return None
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  def display_extracted_fields(obj, section_title=None, indent=0):
94
  if section_title:
95
  st.subheader(section_title)
 
109
  else:
110
  st.write(" " * indent + f"{field_name.replace('_', ' ').title()}: " + str(field_value))
111
 
112
+ def get_json_download_link(json_str, download_name):
113
+ # Convert the JSON string back to a dictionary
114
+ data = json.loads(json_str)
115
+
116
+ # Convert the dictionary back to a JSON string with 4 spaces indentation
117
+ json_str_formatted = json.dumps(data, indent=4)
118
+
119
+ b64 = base64.b64encode(json_str_formatted.encode()).decode()
120
+ href = f'<a href="data:file/json;base64,{b64}" download="{download_name}.json">Click here to download the JSON file</a>'
121
+ return href
122
+
123
+ st.set_page_config(layout="wide")
124
 
125
  st.title("Resume Parser")
126
 
127
  llm_dict = {
128
  "GPT 3.5 turbo": ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"),
129
  "Anthropic Sonnet": ChatAnthropic(model_name="claude-3-sonnet-20240229"),
130
+ "Llama 3": ChatGroq(model_name="llama3-70b-8192"),
131
+ "Gemma": ChatGroq(model_name="gemma-7b-it"),
132
+ "Mistral": ChatGroq(model_name="mixtral-8x7b-32768"),
133
+ # "Gemini 1.5 Pro": ChatGoogleGenerativeAI(model_name="gemini-1.5-pro-latest"),
134
  }
135
 
136
+
137
 
138
  uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
139
+ col1, col2 = st.columns(2)
140
+
141
+ with col1:
142
+ selected_model1 = st.selectbox("Select Model 1", list(llm_dict.keys()), index=list(llm_dict.keys()).index("Llama 3"))
143
+
144
+ with col2:
145
+ selected_model2 = st.selectbox("Select Model 2", list(llm_dict.keys()), index=list(llm_dict.keys()).index("Mistral"))
146
 
147
  if uploaded_file is not None:
148
+ text = pdf_to_string(uploaded_file)
149
+
150
+ if st.button("Extract Resume Fields"):
151
+ col1, col2 = st.columns(2)
152
+
153
+ with col1:
154
+ start_time = time.time()
155
+ extracted_fields1 = extract_resume_fields(text, selected_model1)
156
+ end_time = time.time()
157
+ elapsed_time = end_time - start_time
158
+ st.write(f"Extraction completed in {elapsed_time:.2f} seconds")
159
+ display_extracted_fields(extracted_fields1, "Extracted Resume Fields (Model 1)")
160
+
161
+ with col2:
162
+ start_time = time.time()
163
+ extracted_fields2 = extract_resume_fields(text, selected_model2)
164
+ end_time = time.time()
165
+ elapsed_time = end_time - start_time
166
+ st.write(f"Extraction completed in {elapsed_time:.2f} seconds")
167
+ display_extracted_fields(extracted_fields2, "Extracted Resume Fields (Model 2)")