Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel | |
import os | |
import pandas as pd | |
import numpy as np | |
from transformers import pipeline | |
from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances, euclidean_distances | |
def load_model(): | |
tokenizer = AutoTokenizer.from_pretrained("stanford-crfm/pubmedgpt") | |
model = AutoModel.from_pretrained("stanford-crfm/pubmedgpt") | |
return tokenizer, model | |
tokenizer, model = load_model() | |
pipe = pipeline('feature-extraction', model=model, tokenizer=tokenizer) | |
def get_embedding(desc): | |
return np.squeeze(pipe(desc)).mean(axis=0) | |
st.set_page_config( | |
page_title="Clinical Trials Best Match [Eye Diseases]", | |
page_icon="๐งโ๐ป", | |
layout="wide", | |
) | |
# Constants | |
embs = [] | |
# Heading | |
st.title('Clinical Trials Search') | |
# Gene File, 128 dim embeddings | |
data = np.load("data.npy") | |
def get_sim(emb_desc, data): | |
ids = [] | |
scores = [] | |
for i in data: | |
score = cosine_similarity(emb_desc, i['data']) | |
ids.append(i['ids']) | |
scores.append(score) | |
df = pd.DataFrame(data={"url": ids, "scores": scores}).sort_values(by='scores') | |
return df | |
st.subheader("๐ฎ Enter your clinical trial study description") | |
text = st.text_area('Example') | |
with st.spinner(): | |
emb = get_embedding(text) | |
st.subheader("๐ป Hit Search") | |
if st.button("Compute"): | |
with st.spinner('Searching...'): | |
df = get_sim(emb, data=data) | |
st.dataframe(df) |