Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import torch | |
import numpy as np | |
from transformers import AutoTokenizer, AutoModel | |
import faiss | |
from streamlit.errors import StreamlitAPIException | |
import urllib.parse | |
import os | |
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' | |
# Load model and tokenizer | |
model_name = "sentence-transformers/msmarco-distilbert-base-v3" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModel.from_pretrained(model_name) | |
# Load data | |
books = pd.read_csv('data_final_version.csv') | |
MAX_LEN = 300 | |
def embed_bert_cls(text, model=model, tokenizer=tokenizer): | |
t = tokenizer(text, | |
padding=True, | |
truncation=True, | |
return_tensors='pt', | |
max_length=MAX_LEN) | |
with torch.no_grad(): | |
model_output = model(**{k: v.to(model.device) for k, v in t.items()}) | |
embeddings = model_output.last_hidden_state[:, 0, :] | |
embeddings = torch.nn.functional.normalize(embeddings) | |
return embeddings[0].cpu().squeeze() | |
# Load embeddings | |
embeddings = np.loadtxt('embeddings.txt') | |
embeddings_tensor = [torch.tensor(embedding) for embedding in embeddings] | |
# Create Faiss index | |
embeddings_matrix = np.stack(embeddings) | |
index = faiss.IndexFlatIP(embeddings_matrix.shape[1]) | |
index.add(embeddings_matrix) | |
# CSS стили для заднего фона | |
background_image = """ | |
<style> | |
.stApp { | |
background-image: url("https://img.freepik.com/premium-photo/blur-image-book_9563-1100.jpg"); | |
background-size: cover; | |
background-position: center; | |
background-repeat: no-repeat; | |
color: black; | |
} | |
</style> | |
""" | |
# Вставляем CSS стили в приложение Streamlit | |
st.markdown(background_image, unsafe_allow_html=True) | |
# CSS стили для виджетов | |
custom_css = """ | |
<style> | |
/* Стиль для текстового ввода */ | |
.stTextInput input { | |
color: black !important; | |
} | |
/* Стиль для числового ввода */ | |
.stNumberInput input { | |
color: black !important; | |
} | |
/* Стиль для кнопки */ | |
.stButton button { | |
color: black !important; | |
} | |
/* Стиль для текста в прозрачном блоке */ | |
.transparent-box p { | |
color: black !important; | |
} | |
</style> | |
""" | |
# Вставляем CSS стили для окошка с прозрачным фоном | |
transparent_title = """ | |
<style> | |
.transparent-title { | |
background-color: rgba(255, 255, 255, 0.7); | |
padding: 10px; | |
border-radius: 5px; | |
box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1); | |
color: black; | |
} | |
</style> | |
""" | |
transparent_box = """ | |
<style> | |
.transparent-box { | |
background-color: rgba(255, 255, 255, 0.7); | |
padding: 10px; | |
border-radius: 5px; | |
box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1); | |
} | |
</style> | |
""" | |
# Вставляем CSS стили в приложение Streamlit | |
st.markdown(transparent_title, unsafe_allow_html=True) | |
st.markdown(transparent_box, unsafe_allow_html=True) | |
# Streamlit interface | |
st.markdown('<h1 class="transparent-title">🎓📚Приложение для рекомендаций книг📚🎓</h1>', unsafe_allow_html=True) | |
# Далее ваш код Streamlit | |
text = st.text_input('Введите ваш запрос для поиска книг:') | |
num_results = st.number_input('Количество результатов:', min_value=1, max_value=20, value=3) | |
recommend_button = st.button('Получить рекомендации') | |
if text and recommend_button: # Check if the user entered text and clicked the button | |
# Embed the query and search for nearest vectors using Faiss | |
query_embedding = embed_bert_cls(text) | |
query_embedding = query_embedding.numpy().astype('float32') | |
_, indices = index.search(np.expand_dims(query_embedding, axis=0), num_results) | |
st.subheader('Рекомендации по вашему запросу:') | |
for i in indices[0]: | |
recommended_embedding = embeddings_tensor[i].numpy() # Vector of the recommended book | |
similarity = np.dot(query_embedding, recommended_embedding) / (np.linalg.norm(query_embedding) * np.linalg.norm(recommended_embedding)) # Cosine similarity | |
similarity_percent = similarity * 100 | |
col1, col2 = st.columns([1, 3]) | |
with col1: | |
image_url = books['image_url'][i] | |
if pd.isna(image_url) or not image_url or image_url.strip() == '': | |
st.write("Обложка не найдена") | |
else: | |
try: | |
st.image(image_url, use_column_width=True) | |
except Exception as e: | |
st.write("Обложка не найдена") | |
st.write(e) | |
with col2: | |
# Выводим информацию о книге на прозрачном фоне | |
st.markdown(f""" | |
<div class="transparent-box"> | |
<p><b>Название книги:</b> {books['title'][i]}</p> | |
<p><b>Автор:</b> {books['author'][i]}</p> | |
<p><b>Описание:</b>{books['annotation'][i]}") | |
<p><b>Оценка сходства:</b> {similarity_percent:.2f}%</p> | |
</div> | |
""", unsafe_allow_html=True) | |
st.write("---") |