File size: 4,498 Bytes
44de051 4692739 44de051 4692739 44de051 4692739 44de051 4692739 44de051 4692739 44de051 4692739 44de051 4692739 44de051 4692739 44de051 4692739 44de051 4692739 44de051 4692739 44de051 4692739 44de051 4692739 44de051 4692739 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import streamlit as st
from fastai.collab import *
import torch
from torch import nn
import pickle
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import sentencepiece
import string
import requests
def load_stuff():
# Load the data loader
dls = pd.read_pickle("dataloader.pkl")
# Create an instance of the model
learn = collab_learner(dls, use_nn=True, layers=[20, 10], y_range=(0, 10.5))
# Load the saved state dictionary
state_dict = torch.load("myModel.pth", map_location=torch.device("cpu"))
# Assign the loaded state dictionary to the model's load_state_dict() method
# load books dataframe
books = pd.read_csv("./data/BX_Books.csv", sep=";", encoding="latin-1")
# load tokenizer
tokenizer = AutoTokenizer.from_pretrained("pszemraj/pegasus-x-large-book-summary")
# load model
model = AutoModelForSeq2SeqLM.from_pretrained(
return dls, learn, books, tokenizer, model
dls, learn, books, tokenizer, model = load_stuff()
# function to get recommendations
def get_3_recs(book):
book_factors = learn.model.embeds[1].weight
idx = dls.classes["title"].o2i[book]
distances = nn.CosineSimilarity(dim=1)(book_factors, book_factors[idx][None])
idxs = distances.argsort(descending=True)[1:4]
recs = [dls.classes["title"][i] for i in idxs]
return recs
# function to get descriptions from Google Books
def search_book_description(title):
# Google Books API endpoint for book search
url = ""
# Parameters for the book search
params = {"q": title, "maxResults": 1}
# Send GET request to Google Books API
response = requests.get(url, params=params)
# Check if the request was successful
if response.status_code == 200:
# Parse the JSON response to extract the book description
data = response.json()
if "items" in data and len(data["items"]) > 0:
book_description = data["items"][0]["volumeInfo"].get(
"description", "No description available."
return book_description
print("No book found with the given title.")
return None
# If the request failed, print the error message
print("Error:", response.status_code, response.text)
return None
# function to ensure summaries end with punctuation
def cut(sum):
last_punc_idx = max(sum.rfind(p) for p in string.punctuation)
output = sum[: last_punc_idx + 1]
return output
# function to summarize
def summarize(des_list):
if "No description available." in des_list:
idx = des_list.index("No description available.")
des = des_list.copy()
rest = summarize(des)
rest.insert(idx, "No description available.")
return rest
# Tokenize all the descriptions in the list
encoded_inputs = tokenizer(
des_list, truncation=True, padding="longest", return_tensors="pt"
# Generate summaries for all the inputs
summaries = model.generate(**encoded_inputs, max_new_tokens=100)
# Decode the summaries and process them
outputs = tokenizer.batch_decode(summaries, skip_special_tokens=True)
outputs = list(map(cut, outputs))
return outputs
# function to get cover images
def get_covers(recs):
imgs = [books[books["Book-Title"] == r]["Image-URL-L"].tolist()[0] for r in recs]
return imgs
# streamlit app construction
st.title("Your digital librarian")
"Hi there! I recommend you books based on one you love (which might not be in the same genre because that's boring) and give you my own synopsis of each book. Enjoy!"
options = books["Book-Title"].tolist()
input = st.selectbox("Select your favorite book", options)
if st.button("Get recommendations"):
recs = get_3_recs(input)
descriptions = list(map(search_book_description, recs))
des_sums = summarize(descriptions)
imgs = get_covers(recs)
col1, col2, col3 = st.columns(3)