import gradio as gr
import pandas as pd
from thirdai import bolt, licensing
import os
import time
licensing.set_path("license.serialized")
max_posts = 5
df = pd.read_csv("processed_recipes_3.csv")
model = bolt.UniversalDeepTransformer.load("1bn_name_ctg_keywords_4gram.bolt")
recipe_id_to_row_num = {}
for i in range(df.shape[0]):
recipe_id_to_row_num[df.iloc[i,0]] = i
INTRO_MARKDOWN = (
"""# A billion parameter model, trained on a single CPU, in just 90 mins, on 522K recipes from food.com !!
"""
)
LIKE_TEXT = "👍 update LLM"
FEEDBACK_RECEIVED_TEXT = "👌 Click search for updated results"
SHOW_MORE = "Show more"
SHOW_LESS = "Show less"
def retrain(query, doc_id):
query = query.lower()
query.replace('\n', ' ')
query = ' '.join([query[i:i+4] for i in range(len(query)-3)])
df = pd.DataFrame({
"Name": [query],
"RecipeId": [str(doc_id)]
})
filename = f"temptrain{hash(query)}{hash(doc_id)}{time.time()}.csv"
df.to_csv(filename)
prediction = None
while prediction != doc_id:
model.train(filename, epochs=1)
prediction = model.predict(
{"Name": query.replace('\n', ' ')},
return_predicted_class=True)
os.remove(filename)
# sample = {"query": query.replace('\n', ' '), "id": str(doc_id)}
# batch = [sample]
# prediction = None
# while prediction != doc_id:
# model.train_batch(batch, metrics=["categorical_accuracy"])
# prediction = model.predict(sample, return_predicted_class=True)
def search(query):
query = query.lower()
query = ' '.join([query[i:i+4] for i in range(len(query)-3)])
scores = model.predict({"Name": query})
####
sorted_ids = scores.argsort()[-max_posts:][::-1]
relevant_posts = [
df.iloc[pid] for pid in sorted_ids
]
####
# K = min(2*max_posts, len(scores) - 1)
# sorted_post_ids = scores.argsort()[-K:][::-1]
# print(sorted_post_ids)
# sorted_ids = []
# relevant_posts = []
# count = 0
# for pid in sorted_post_ids:
# if pid in recipe_id_to_row_num:
# relevant_posts.append(df.iloc[recipe_id_to_row_num[pid]])
# sorted_ids.append(pid)
# count += 1
# if count==max_posts:
# break
####
header = [gr.Markdown.update(visible=True)]
boxes = [
gr.Box.update(visible=True)
for _ in relevant_posts
]
titles = [
gr.Markdown.update(f"## {post['Name']}")
for post in relevant_posts
]
toggles = [
gr.Button.update(
visible=True,
value=SHOW_MORE,
interactive=True,
)
for _ in relevant_posts
]
matches = [
gr.Button.update(
value=LIKE_TEXT,
interactive=True,
)
for _ in relevant_posts
]
bodies = [
gr.HTML.update(
visible=False,
value=f"
"
f"