Spaces:
Running
Running
Showing banner when issue with Weaviate
Browse files- app.py +81 -62
- backend.py +0 -1
app.py
CHANGED
@@ -9,7 +9,7 @@ from app_features import (convert_seconds, generate_prompt_series, search_result
|
|
9 |
expand_content)
|
10 |
from retrieval_evaluation import execute_evaluation, calc_hit_rate_scores
|
11 |
from llama_index.finetuning import EmbeddingQAFinetuneDataset
|
12 |
-
|
13 |
from openai import BadRequestError
|
14 |
from reranker import ReRanker
|
15 |
from loguru import logger
|
@@ -147,7 +147,7 @@ def download_model(model_name_or_path, model_local_path):
|
|
147 |
|
148 |
#%%
|
149 |
# for streamlit online, we must download the model from google drive
|
150 |
-
|
151 |
def check_model(model_name_or_path):
|
152 |
|
153 |
model_name = model_name_or_path.split('/')[-1] # remove 'sentence-transformers'
|
@@ -163,18 +163,27 @@ def check_model(model_name_or_path):
|
|
163 |
|
164 |
#%% instantiate Weaviate client
|
165 |
def get_weaviate_client(api_key, url, model_name_or_path, openai_api_key):
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
|
179 |
|
180 |
##############
|
@@ -260,56 +269,60 @@ def main():
|
|
260 |
st.write("Finetuning not available on Streamlit online because of space limitations")
|
261 |
|
262 |
check_model(model_name_or_path)
|
263 |
-
|
264 |
-
|
265 |
-
print(available_classes)
|
266 |
-
|
267 |
-
except Exception as e:
|
268 |
-
# Weaviate doesn't know this model, maybe we're just finetuning a model
|
269 |
-
st.sidebar.write(f"Model unknown to Weaviate")
|
270 |
-
st.stop()
|
271 |
-
|
272 |
-
start_class = 'Impact_theory_all_mpnet_base_v2_finetuned'
|
273 |
-
|
274 |
-
class_name = st.selectbox(
|
275 |
-
label='Class Name:',
|
276 |
-
options=available_classes,
|
277 |
-
index=available_classes.index(start_class),
|
278 |
-
placeholder='Select Class Name'
|
279 |
-
)
|
280 |
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
with c1:
|
286 |
-
show_metrics = st.button('Show Metrics on Golden set')
|
287 |
-
if show_metrics:
|
288 |
-
# we must add it because the hybrid search toggle could hide it
|
289 |
-
alpha_input2 = st.slider(label='Alpha',min_value=0.00, max_value=1.00, value=0.40, step=0.05, key=2)
|
290 |
-
|
291 |
-
# _, center, _ = st.columns([3, 5, 3])
|
292 |
-
# with center:
|
293 |
-
# st.text("Metrics")
|
294 |
-
with c2:
|
295 |
-
with st.spinner(''):
|
296 |
-
metrics = execute_evaluation(golden_dataset, class_name, client, alpha=alpha_input2)
|
297 |
-
with c1:
|
298 |
-
kw_hit_rate = metrics['kw_hit_rate']
|
299 |
-
kw_mrr = metrics['kw_mrr']
|
300 |
-
hybrid_hit_rate = metrics['hybrid_hit_rate']
|
301 |
-
vector_hit_rate = metrics['vector_hit_rate']
|
302 |
-
vector_mrr = metrics['vector_mrr']
|
303 |
-
total_misses = metrics['total_misses']
|
304 |
-
|
305 |
-
st.text(f"KW hit rate: {kw_hit_rate}")
|
306 |
-
st.text(f"Vector hit rate: {vector_hit_rate}")
|
307 |
-
st.text(f"Hybrid hit rate: {hybrid_hit_rate}")
|
308 |
-
st.text(f"Hybrid MRR: {vector_mrr}")
|
309 |
-
st.text(f"Total misses: {total_misses}")
|
310 |
|
311 |
-
|
|
|
312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
313 |
st.title("Chat with the Impact Theory podcasts!")
|
314 |
# st.image('./assets/impact-theory-logo.png', width=400)
|
315 |
st.image('assets/it_tom_bilyeu.png', use_column_width=True)
|
@@ -317,9 +330,15 @@ def main():
|
|
317 |
st.write('\n')
|
318 |
# st.stop()
|
319 |
|
320 |
-
|
321 |
st.write("\u21D0 Open the sidebar to change Search settings \n ") # https://home.unicode.org also 21E0, 21B0 B2 D0
|
322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
if not hybrid_search:
|
324 |
st.stop()
|
325 |
|
|
|
9 |
expand_content)
|
10 |
from retrieval_evaluation import execute_evaluation, calc_hit_rate_scores
|
11 |
from llama_index.finetuning import EmbeddingQAFinetuneDataset
|
12 |
+
|
13 |
from openai import BadRequestError
|
14 |
from reranker import ReRanker
|
15 |
from loguru import logger
|
|
|
147 |
|
148 |
#%%
|
149 |
# for streamlit online, we must download the model from google drive
|
150 |
+
# because github LFS doesn't work on forked repos
|
151 |
def check_model(model_name_or_path):
|
152 |
|
153 |
model_name = model_name_or_path.split('/')[-1] # remove 'sentence-transformers'
|
|
|
163 |
|
164 |
#%% instantiate Weaviate client
|
165 |
def get_weaviate_client(api_key, url, model_name_or_path, openai_api_key):
|
166 |
+
try:
|
167 |
+
client = WeaviateClient(api_key, url,
|
168 |
+
model_name_or_path=model_name_or_path,
|
169 |
+
openai_api_key=openai_api_key)
|
170 |
+
except Exception:
|
171 |
+
# client not available, wrong key, expired free sandbox etc
|
172 |
+
return None, None
|
173 |
+
|
174 |
+
try:
|
175 |
+
client.display_properties.append('summary')
|
176 |
+
# available_classes = sorted(client.show_classes()) # doesn't work anymore
|
177 |
+
# print(available_classes)
|
178 |
+
available_classes = sorted([c['class'] for c in client.schema.get()['classes']])
|
179 |
+
# print(available_classes)
|
180 |
+
# st.write(f"Available classes: {available_classes}")
|
181 |
+
# st.write(f"Available classes type: {type(available_classes)}")
|
182 |
+
logger.info(available_classes)
|
183 |
+
return client, available_classes
|
184 |
+
|
185 |
+
except Exception:
|
186 |
+
return client, []
|
187 |
|
188 |
|
189 |
##############
|
|
|
269 |
st.write("Finetuning not available on Streamlit online because of space limitations")
|
270 |
|
271 |
check_model(model_name_or_path)
|
272 |
+
client, available_classes = get_weaviate_client(Wapi_key, url, model_name_or_path, openai_api_key)
|
273 |
+
print("Available classes:", available_classes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
275 |
+
if client is None:
|
276 |
+
# maybe the free sandbox has expired, or the api key is wrong
|
277 |
+
st.sidebar.write(f"Weaviate sandbox not accessible or expired")
|
278 |
+
# st.stop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
|
280 |
+
elif available_classes:
|
281 |
+
start_class = 'Impact_theory_all_mpnet_base_v2_finetuned'
|
282 |
|
283 |
+
class_name = st.selectbox(
|
284 |
+
label='Class Name:',
|
285 |
+
options=available_classes,
|
286 |
+
index=available_classes.index(start_class),
|
287 |
+
placeholder='Select Class Name'
|
288 |
+
)
|
289 |
+
|
290 |
+
st.write("----------")
|
291 |
+
|
292 |
+
if we_are_not_online:
|
293 |
+
c1,c2 = st.columns([8,1])
|
294 |
+
with c1:
|
295 |
+
show_metrics = st.button('Show Metrics on Golden set')
|
296 |
+
if show_metrics:
|
297 |
+
# we must add it because the hybrid search toggle could hide it
|
298 |
+
alpha_input2 = st.slider(label='Alpha',min_value=0.00, max_value=1.00, value=0.40, step=0.05, key=2)
|
299 |
+
|
300 |
+
# _, center, _ = st.columns([3, 5, 3])
|
301 |
+
# with center:
|
302 |
+
# st.text("Metrics")
|
303 |
+
with c2:
|
304 |
+
with st.spinner(''):
|
305 |
+
metrics = execute_evaluation(golden_dataset, class_name, client, alpha=alpha_input2)
|
306 |
+
with c1:
|
307 |
+
kw_hit_rate = metrics['kw_hit_rate']
|
308 |
+
kw_mrr = metrics['kw_mrr']
|
309 |
+
hybrid_hit_rate = metrics['hybrid_hit_rate']
|
310 |
+
vector_hit_rate = metrics['vector_hit_rate']
|
311 |
+
vector_mrr = metrics['vector_mrr']
|
312 |
+
total_misses = metrics['total_misses']
|
313 |
+
|
314 |
+
st.text(f"KW hit rate: {kw_hit_rate}")
|
315 |
+
st.text(f"Vector hit rate: {vector_hit_rate}")
|
316 |
+
st.text(f"Hybrid hit rate: {hybrid_hit_rate}")
|
317 |
+
st.text(f"Hybrid MRR: {vector_mrr}")
|
318 |
+
st.text(f"Total misses: {total_misses}")
|
319 |
+
|
320 |
+
st.write("----------")
|
321 |
+
else:
|
322 |
+
# Weaviate doesn't know this model, maybe we're just finetuning a model
|
323 |
+
st.sidebar.write(f"Model Unknown to Weaviate")
|
324 |
+
|
325 |
+
|
326 |
st.title("Chat with the Impact Theory podcasts!")
|
327 |
# st.image('./assets/impact-theory-logo.png', width=400)
|
328 |
st.image('assets/it_tom_bilyeu.png', use_column_width=True)
|
|
|
330 |
st.write('\n')
|
331 |
# st.stop()
|
332 |
|
|
|
333 |
st.write("\u21D0 Open the sidebar to change Search settings \n ") # https://home.unicode.org also 21E0, 21B0 B2 D0
|
334 |
|
335 |
+
if client is None:
|
336 |
+
st.write("Weaviate sandbox not accessible or expired!!! Stopping execution!")
|
337 |
+
st.stop()
|
338 |
+
elif not available_classes:
|
339 |
+
# we have to stop here, to exit the 'with st.sidebar' block and display the banner at least
|
340 |
+
st.stop()
|
341 |
+
|
342 |
if not hybrid_search:
|
343 |
st.stop()
|
344 |
|
backend.py
CHANGED
@@ -9,7 +9,6 @@ from typing import List, Dict, Tuple, Union, Callable
|
|
9 |
# remote_path="./data",
|
10 |
# )
|
11 |
|
12 |
-
|
13 |
stub = modal.Stub("vector-search-project")
|
14 |
vector_search = modal.Image.debian_slim().pip_install(
|
15 |
"sentence_transformers==2.2.2", "llama_index==0.9.6.post1", "angle_emb==0.1.5"
|
|
|
9 |
# remote_path="./data",
|
10 |
# )
|
11 |
|
|
|
12 |
stub = modal.Stub("vector-search-project")
|
13 |
vector_search = modal.Image.debian_slim().pip_install(
|
14 |
"sentence_transformers==2.2.2", "llama_index==0.9.6.post1", "angle_emb==0.1.5"
|