awinml commited on
Commit
6966109
1 Parent(s): ac5b87a

Upload 2 files

Browse files
Files changed (1) hide show
  1. utils.py +25 -18
utils.py CHANGED
@@ -64,13 +64,12 @@ def save_key(api_key):
64
  def query_pinecone(
65
  query, top_k, model, index, year, quarter, ticker, participant_type, threshold=0.25
66
  ):
67
- # generate embeddings for the query
68
- xq = model.encode([query]).tolist()
69
-
70
  if participant_type == "Company Speaker":
71
- participant = "Speaker"
72
  else:
73
- participant = participant_type
 
 
74
 
75
  if year == "All":
76
  if quarter == "All":
@@ -126,7 +125,6 @@ def query_pinecone(
126
  },
127
  include_metadata=True,
128
  )
129
-
130
  # filter the context passages based on the score threshold
131
  filtered_matches = []
132
  for match in xc["matches"]:
@@ -167,7 +165,7 @@ def text_lookup(data, sentence_ids):
167
 
168
  def generate_prompt(query_text, context_list):
169
  context = " ".join(context_list)
170
- prompt = f"""Answer the question as accurately as possible using the provided context. Try to include as many key details as possible.
171
  Context: {context}
172
  Question: {query_text}
173
  Answer:"""
@@ -204,17 +202,26 @@ def gpt_model(prompt):
204
 
205
 
206
  def retrieve_transcript(data, year, quarter, ticker):
207
- row = (
208
- data.loc[
209
- (data.Year == int(year))
210
- & (data.Quarter == quarter)
211
- & (data.Ticker == ticker),
212
- ["File_Name"],
213
- ]
214
- .drop_duplicates()
215
- .iloc[0, 0]
216
- )
217
- print(row)
 
 
 
 
 
 
 
 
 
218
  # convert row to a string and join values with "-"
219
  # row_str = "-".join(row.astype(str)) + ".txt"
220
  open_file = open(
 
64
  def query_pinecone(
65
  query, top_k, model, index, year, quarter, ticker, participant_type, threshold=0.25
66
  ):
 
 
 
67
  if participant_type == "Company Speaker":
68
+ participant = "Answer"
69
  else:
70
+ participant = "Question"
71
+ # generate embeddings for the query
72
+ xq = model.encode([query]).tolist()
73
 
74
  if year == "All":
75
  if quarter == "All":
 
125
  },
126
  include_metadata=True,
127
  )
 
128
  # filter the context passages based on the score threshold
129
  filtered_matches = []
130
  for match in xc["matches"]:
 
165
 
166
  def generate_prompt(query_text, context_list):
167
  context = " ".join(context_list)
168
+ prompt = f"""Answer the question in 5 detailed points as accurately as possible using the provided context. Try to include as many key details as possible.
169
  Context: {context}
170
  Question: {query_text}
171
  Answer:"""
 
202
 
203
 
204
  def retrieve_transcript(data, year, quarter, ticker):
205
+ if year == "All" or quarter == "All":
206
+ row = (
207
+ data.loc[
208
+ (data.Ticker == ticker),
209
+ ["File_Name"],
210
+ ]
211
+ .drop_duplicates()
212
+ .iloc[0, 0]
213
+ )
214
+ else:
215
+ row = (
216
+ data.loc[
217
+ (data.Year == int(year))
218
+ & (data.Quarter == quarter)
219
+ & (data.Ticker == ticker),
220
+ ["File_Name"],
221
+ ]
222
+ .drop_duplicates()
223
+ .iloc[0, 0]
224
+ )
225
  # convert row to a string and join values with "-"
226
  # row_str = "-".join(row.astype(str)) + ".txt"
227
  open_file = open(