awinml commited on
Commit
dfc16db
1 Parent(s): 868b330

Upload 16 files

Browse files
app.py CHANGED
@@ -24,6 +24,7 @@ from utils.models import (
24
  get_flan_t5_model,
25
  get_mpnet_embedding_model,
26
  get_sgpt_embedding_model,
 
27
  get_spacy_model,
28
  get_splade_sparse_embedding_model,
29
  get_t5_model,
@@ -247,7 +248,7 @@ with st.sidebar:
247
 
248
  # Choose encoder model
249
 
250
- encoder_models_choice = ["MPNET", "SGPT", "Hybrid MPNET - SPLADE"]
251
  with st.sidebar:
252
  encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
253
 
@@ -281,6 +282,15 @@ elif encoder_model == "SGPT":
281
  pinecone_index = pinecone.Index(pinecone_index_name)
282
  retriever_model = get_sgpt_embedding_model()
283
 
 
 
 
 
 
 
 
 
 
284
  elif encoder_model == "Hybrid MPNET - SPLADE":
285
  pinecone.init(
286
  api_key=st.secrets["pinecone_hybrid_splade_mpnet"],
 
24
  get_flan_t5_model,
25
  get_mpnet_embedding_model,
26
  get_sgpt_embedding_model,
27
+ get_instructor_embedding_model,
28
  get_spacy_model,
29
  get_splade_sparse_embedding_model,
30
  get_t5_model,
 
248
 
249
  # Choose encoder model
250
 
251
+ encoder_models_choice = ["MPNET", "Instructor", "SGPT", "Hybrid MPNET - SPLADE"]
252
  with st.sidebar:
253
  encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
254
 
 
282
  pinecone_index = pinecone.Index(pinecone_index_name)
283
  retriever_model = get_sgpt_embedding_model()
284
 
285
+ elif encoder_model == "Instructor":
286
+ # Connect to pinecone environment
287
+ pinecone.init(
288
+ api_key=st.secrets["pinecone_instructor"], environment="us-west4-gcp-free"
289
+ )
290
+ pinecone_index_name = "week13-instructor-xl"
291
+ pinecone_index = pinecone.Index(pinecone_index_name)
292
+ retriever_model = get_instructor_embedding_model()
293
+
294
  elif encoder_model == "Hybrid MPNET - SPLADE":
295
  pinecone.init(
296
  api_key=st.secrets["pinecone_hybrid_splade_mpnet"],
requirements.txt CHANGED
@@ -11,3 +11,4 @@ transformers
11
  streamlit
12
  streamlit-scrollable-textbox
13
  openai
 
 
11
  streamlit
12
  streamlit-scrollable-textbox
13
  openai
14
+ InstructorEmbedding
utils/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (181 Bytes). View file
 
utils/__pycache__/entity_extraction.cpython-38.pyc ADDED
Binary file (4.04 kB). View file
 
utils/__pycache__/models.cpython-38.pyc ADDED
Binary file (4.28 kB). View file
 
utils/__pycache__/prompts.cpython-38.pyc ADDED
Binary file (16.1 kB). View file
 
utils/__pycache__/retriever.cpython-38.pyc ADDED
Binary file (4.27 kB). View file
 
utils/__pycache__/transcript_retrieval.cpython-38.pyc ADDED
Binary file (658 Bytes). View file
 
utils/__pycache__/vector_index.cpython-38.pyc ADDED
Binary file (1.77 kB). View file
 
utils/models.py CHANGED
@@ -9,6 +9,7 @@ import spacy_transformers
9
  import streamlit_scrollable_textbox as stx
10
  import torch
11
  from sentence_transformers import SentenceTransformer
 
12
  from tqdm import tqdm
13
  from transformers import (
14
  AutoModelForMaskedLM,
@@ -95,6 +96,13 @@ def get_sgpt_embedding_model():
95
  return model
96
 
97
 
 
 
 
 
 
 
 
98
  @st.experimental_memo
99
  def save_key(api_key):
100
  return api_key
 
9
  import streamlit_scrollable_textbox as stx
10
  import torch
11
  from sentence_transformers import SentenceTransformer
12
+ from InstructorEmbedding import INSTRUCTOR
13
  from tqdm import tqdm
14
  from transformers import (
15
  AutoModelForMaskedLM,
 
96
  return model
97
 
98
 
99
+ @st.experimental_singleton
100
+ def get_instructor_embedding_model():
101
+ device = "cuda" if torch.cuda.is_available() else "cpu"
102
+ model = INSTRUCTOR("hkunlp/instructor-large")
103
+ return model
104
+
105
+
106
  @st.experimental_memo
107
  def save_key(api_key):
108
  return api_key