import streamlit as st import pandas as pd import torch import numpy as np from transformers import AutoTokenizer, AutoModel import faiss from streamlit.errors import StreamlitAPIException import urllib.parse import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' # Load model and tokenizer model_name = "sentence-transformers/msmarco-distilbert-base-v3" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) # Load data books = pd.read_csv('data_final_version.csv') MAX_LEN = 300 def embed_bert_cls(text, model=model, tokenizer=tokenizer): t = tokenizer(text, padding=True, truncation=True, return_tensors='pt', max_length=MAX_LEN) with torch.no_grad(): model_output = model(**{k: v.to(model.device) for k, v in t.items()}) embeddings = model_output.last_hidden_state[:, 0, :] embeddings = torch.nn.functional.normalize(embeddings) return embeddings[0].cpu().squeeze() # Load embeddings embeddings = np.loadtxt('embeddings.txt') embeddings_tensor = [torch.tensor(embedding) for embedding in embeddings] # Create Faiss index embeddings_matrix = np.stack(embeddings) index = faiss.IndexFlatIP(embeddings_matrix.shape[1]) index.add(embeddings_matrix) # CSS стили для заднего фона background_image = """ """ # Вставляем CSS стили в приложение Streamlit st.markdown(background_image, unsafe_allow_html=True) # CSS стили для виджетов custom_css = """ """ # Вставляем CSS стили для окошка с прозрачным фоном transparent_title = """ """ transparent_box = """ """ # Вставляем CSS стили в приложение Streamlit st.markdown(transparent_title, unsafe_allow_html=True) st.markdown(transparent_box, unsafe_allow_html=True) # Streamlit interface st.markdown('
Название книги: {books['title'][i]}
Автор: {books['author'][i]}
Описание:{books['annotation'][i]}")
Оценка сходства: {similarity_percent:.2f}%