Spaces:

chuanenlin
/

foodnet

Sleeping

File size: 14,962 Bytes

import requests
from io import BytesIO
import numpy as np
from gensim.models.fasttext import FastText
from scipy import spatial
import itertools
import gdown
import warnings
import nltk
# warnings.filterwarnings('ignore')

import pickle
import pdb
from concurrent.futures import ProcessPoolExecutor

import matplotlib.pyplot as plt
import streamlit as st
import argparse
import logging
from pyunsplash import PyUnsplash
import blacklists
api_key = 'hzcKZ0e4we95wSd8_ip2zTB3m2DrOMWehAxrYjqjwg0'

# instantiate PyUnsplash object
py_un = PyUnsplash(api_key=api_key)

# pyunsplash logger defaults to level logging.ERROR
# If you need to change that, use getLogger/setLevel
# on the module logger, like this:
logging.getLogger("pyunsplash").setLevel(logging.DEBUG)

# TODO: 
# Image search: Option 1 -> google image search api || Option 2 -> open ai clip search
from PIL import Image


# NLTK Datasets
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Average embedding → Compare
def recommend_ingredients(yum, leftovers, n=10):
  '''
  Uses a mean aggregation method 

  :params
  yum -> FastText Word2Vec Obj
  leftovers -> list of str
  n -> int top_n to return  

  :returns
  output -> top_n recommendations
  '''
  leftovers_embedding_sum = np.zeros([32,])
  for ingredient in leftovers:
    # pdb.set_trace()

    ingredient_embedding = yum.get_vector(ingredient, norm=True)

    leftovers_embedding_sum += ingredient_embedding
  leftovers_embedding = leftovers_embedding_sum / len(leftovers) # Embedding for leftovers
  top_matches = yum.similar_by_vector(leftovers_embedding, topn=100)
  top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches]
  leftovers = [x.replace('_',' ') for x in leftovers]
  output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
  return output[:n]

# Compare → Find intersection
def recommend_ingredients_intersect(yum, leftovers, n=10):
  '''
  Finds top combined probabilities
  
  :params
  yum -> FastText Word2Vec Obj
  leftovers -> list of str
  n -> int top_n to return  

  :returns
  output -> top_n recommendations
  '''
  first = True
  for ingredient in leftovers:
    ingredient_embedding = yum.get_vector(ingredient, norm=True)
    ingredient_matches = yum.similar_by_vector(ingredient_embedding, topn=10000)
    ingredient_matches = [(x[0].replace('_',' '), x[1]) for x in ingredient_matches]
    ingredient_output = [x for x in ingredient_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
    if first:
      output = ingredient_output
      first = False
    else:
      output = [x for x in output for y in ingredient_output if x[0] == y[0]]
  return output[:n]

def recommend_ingredients_subsets(model, yum,leftovers, subset_size):
  '''
  Returns all subsets from each ingredient 

  :params
  model -> FastText Obj
  yum -> FastText Word2Vec Obj
  leftovers -> list of str
  n -> int top_n to return  

  :returns
  output -> top_n recommendations
  '''
  all_outputs = {}
  for leftovers_subset in itertools.combinations(leftovers, subset_size):
    leftovers_embedding_sum = np.empty([100,])
    for ingredient in leftovers_subset:
      ingredient_embedding = yum.word_vec(ingredient, use_norm=True)
      leftovers_embedding_sum += ingredient_embedding
    leftovers_embedding = leftovers_embedding_sum / len(leftovers_subset) # Embedding for leftovers
    top_matches = model.similar_by_vector(leftovers_embedding, topn=100)
    top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches]
    output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers_subset)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
    all_outputs[leftovers_subset] = output[:10]
  return all_outputs



def filter_adjectives(data):
    '''
    Remove adjectives that are not associated with a food item 

    :params
    data

    :returns
    data
    '''
    recipe_ingredients_token = [nltk.word_tokenize(x) for x in data]
    inds = []
    for i, r in enumerate(recipe_ingredients_token): 
        out = nltk.pos_tag(r)
        out = [x[1] for x in out]
        if len(out) > 1:
            inds.append(int(i))
        elif 'NN' in out or 'NNS' in out:
            inds.append(int(i))
    
    return [data[i] for i in inds]

def plural_to_singular(lemma, recipe): 
  '''
  :params
  lemma -> nltk lemma Obj
  recipe -> list of str

  :returns
  recipe -> converted recipe
  '''
  return [lemma.lemmatize(r) for r in recipe]

def filter_lemma(data):
    '''
    Convert plural to roots

    :params 
    data -> list of lists

    :returns
    data -> returns filtered data
    '''
    # Initialize Lemmatizer (to reduce plurals to stems)
    lemma = nltk.wordnet.WordNetLemmatizer()

    # NOTE: This uses all the computational resources of your computer 
    with ProcessPoolExecutor() as executor: 
        out = list(executor.map(plural_to_singular, itertools.repeat(lemma), data))

    return out


def train_model(data):
    '''
    Train fastfood text 
    NOTE: gensim==4.1.2

    :params
    data -> list of lists of all recipes
    save -> bool 

    :returns 
    model -> FastFood model obj
    '''
    model = FastText(data, vector_size=32, window=99, min_count=5, workers=40, sg=1) # Train model
    
    return model

@st.cache(allow_output_mutation=True)
def load_model(filename):
  '''
  Load the FastText Model
  :params:
  filename -> path to the model 

  :returns
  model -> this is the full FastText obj
  yum -> this is the FastText Word2Vec obj
  '''
  # Load Models

  model = FastText.load(filename)
  yum = model.wv

  return model, yum

@st.cache(allow_output_mutation=True)
def load_data(filename='data/all_recipes_ingredients_lemma.pkl'):
  '''
  Load data
  :params:
  filename -> path to dataset

  :return
  data -> list of all recipes 
  '''
  return pickle.load(open(filename,'rb'))

def plot_results(names, probs, n=5):
  '''
  Plots a bar chart of the names of the items vs. probability of similarity 
  :params:
  names -> list of str 
  probs -> list of float values
  n -> int of how many bars to show NOTE: Max = 100

  :return
  fig -> return figure for plotting 
  '''
  plt.bar(range(len(names)), probs, align='center')
  ax = plt.gca()

  ax.xaxis.set_major_locator(plt.FixedLocator(range(len(names))))
  ax.xaxis.set_major_formatter(plt.FixedFormatter(names))
  ax.set_ylabel('Probability',fontsize='large', fontweight='bold')
  ax.set_xlabel('Ingredients', fontsize='large', fontweight='bold')
  ax.xaxis.labelpad = 10
  ax.set_title(f'FoodNet Top {n} Predictions = {st.session_state.leftovers}')
  # mpld3.show()
  plt.xticks(rotation=45, ha='right')
  fig = plt.gcf()

  return fig

def load_image(image_file):
	img = Image.open(image_file)
	return img

st.set_page_config(page_title="FoodNet", page_icon = "🍔", layout = "centered", initial_sidebar_state = "auto")

##### UI/UX #####
## Sidebar ##
add_selectbox = st.sidebar.selectbox("Pages", ("FoodNet Recommender", "Food Donation Resources", "Contact Team"))

model, yum = load_model('fastfood.pth')

if add_selectbox == "FoodNet Recommender":
    st.title("FoodNet 🍔")
    st.write("Search for similar food ingredients. Select two or more ingredients to find complementary ingredients.")
    ingredients = list(yum.key_to_index.keys())
    ingredients = [x.replace('_',' ') for x in ingredients]
    st.multiselect("Type or select food ingredients", ingredients, default=['bread', 'lettuce'], key="leftovers")

    ## Slider ##
    st.slider("Select number of recommendations to show", min_value=1, max_value=10, value=3, step=1, key='top_n')

    ## Show Images ## 
    # search = py_un.search(type_="photos", query="cookie")
    # py_un.photos(type_="single", photo_id='l0_kVknpO2g')

    # st.image(search)
    ## Images
    # for leftover in st.session_state.leftovers:
    #   search = py_un.search(type_='photos', query=leftover)
    #   for photo in search.entries:
    #       # print(photo.id, photo.link_download)
    #     st.image(photo.link_download, caption=leftover, width=200)
    #     break
    # (f"![Alt Text]({search.link_next})")

    ## Get food recommendation ##
    ingredients_no_space = [x.replace(' ','_') for x in st.session_state.get('leftovers')]
    out = recommend_ingredients(yum, ingredients_no_space, n=st.session_state.top_n)
    names = [o[0] for o in out]
    probs = [o[1] for o in out]

    # if 'probs' not in st.session_state:
    #     st.session_state['probs'] = False
    
    # if st.session_state.probs:
    #     st.table(data=out)
    # else:
    #     st.table(data=names)
        
    # st.checkbox(label="Show model scores", value=False, key="probs")
    # ## Plot Results ##
    # st.checkbox(label="Show results bar chart", value=False, key="plot")
    # if st.session_state.plot:
    #     fig = plot_results(names, probs, st.session_state.top_n)

    #     ## Show Plot ##
    #     st.pyplot(fig)
    st.selectbox(label="Select dietary restriction", options=('None', 'Kosher', 'Vegetarian'), key="diet")
    if st.session_state.diet != 'None':
      if st.session_state.diet == 'Vegetarian':
        out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.vegitarian)]
      if st.session_state.diet == 'Kosher': 
        out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.kosher)]
      names = [o[0] for o in out]
      probs = [o[1] for o in out]

    col1, col2, col3 = st.columns(3)

    for i, name in enumerate(names): 
      search = py_un.search(type_='photos', query=name)
      for photo in search.entries:
        col_id = i % 3
        if col_id == 0:
          col1.image(photo.link_download, caption=name, use_column_width=True)
        elif col_id == 1:
          col2.image(photo.link_download, caption=name, use_column_width=True)
        elif col_id == 2: 
          col3.image(photo.link_download, caption=name, use_column_width=True)
        break

elif add_selectbox == "Food Donation Resources":
    st.title('Food Donation Resources')
    st.subheader('Pittsburgh Food Bank:')
    st.write("In 2000, the Food Bank opened the doors on its facility in Duquesne."
              "This facility was the first LEED-certified building in Pittsburgh and the first LEED-certified "
              "food bank in the nation. Learn more about that facility here. "
              "Today, we work with a network of more than 850 partners across the 11 counties we serve. "
              "In addition to sourcing, warehousing and distributing food, the Food Bank is actively engaged in "
              "stabilizing lives and confronting issues of chronic hunger, poor nutrition and health. "
              "And, through our advocacy efforts, we have become a primary driver in comprehensive anti-hunger "
              "endeavors regionally, statewide and at the national level."
              )
    st.write("Check out this [link](https://pittsburghfoodbank.org/)👈")
    st.subheader('412 Food Rescue:')
    st.write("412 Food Rescue is a nonprofit organization dedicated to ending hunger by organizing "
              "volunteers to deliver surplus food to insecure communities instead of landfills."
              "Since its creation in 2015, the organization has redistributed over three million pounds of food through "
              "the use of its mobile application, Food Rescue Hero. They are currently rolling out the app nationwide."
              )
    st.write("Check out this [link](https://412foodrescue.org/)👈")

    # st.subheader('Image')
    # st.multiselect("Select leftovers:", list(yum.key_to_index.keys()), key="leftovers")
    # image_file = st.file_uploader("Upload Food Image:", type=["png", "jpg", "jpeg"])
    # if image_file is not None:
    #     # To See details
    #     file_details = {"filename": image_file.name, "filetype": image_file.type,
    #                     "filesize": image_file.size}
    #     st.write(file_details)
    #
    #     # To View Uploaded Image
    #     st.image(load_image(image_file), width=250)
if add_selectbox == "Contact Team":
    st.title('Contact Team')
    st.subheader('David Chuan-En Lin')
    col1, mid, col2 = st.columns([20, 2, 10])
    with col1:
        st.write('Pronouns: he/him/his')
        st.write(
            'Research/career interests: Human-AI Co-Design by (1) building ML-infused creativity support tools and '
            '(2) investigating how such tools augment design processes')
        st.write('Favorite Food: Ice cream sandwich')
        st.write('A painfully boring fact: Second-year PhD at HCII SCS')
        st.write('Hobbies: Making travel videos, graphic design, music')
        st.write('Email: [email protected]')
    with col2:
        st.image('https://chuanenlin.com/images/me.jpg', width=300)

    st.subheader('Mitchell Fogelson')
    col1, mid, col2 = st.columns([20, 2, 10])
    with col1:
        st.write('Pronouns: he/him/his')
        st.write('Research/career interests: Robotics, AI')
        st.write('Favorite Food: Deep Dish Pizza')
        st.write('A painfully boring fact: Am a middle child')
        st.write('Hobbies: Golf, Traveling, Games')
        st.write('Email: [email protected]')
    with col2:
        st.image('https://images.squarespace-cdn.com/content/v1/562661f3e4b0ae7c10f0a2cc/1590528961389-2142HA48O7LRZ9FWGP0F/about_image.jpg?format=2500w', width=300)

    st.subheader('Sunny Yang')
    col1, mid, col2 = st.columns([20, 2, 10])
    with col1:
        st.write('Pronouns: She/Her/Hers')
        st.write('Research/career interests: Product Manager')
        st.write('Favorite Food: Sushi')
        st.write('A painfully boring fact: I do not like rainy:(')
        st.write('Hobbies: Viola, Basketball')
        st.write('Email: [email protected]')
    with col2:
        st.image('https://media-exp1.licdn.com/dms/image/C4D03AQF37KjK_GYwzA/profile-displayphoto-shrink_400_400/0/1638326708803?e=1643846400&v=beta&t=q10CTNCG6h5guez1YT0j4j_oLlrGJB_8NugaBOUSAGg', width=300)

    st.subheader('Shihao Xu')
    col1, mid, col2 = st.columns([20, 2, 10])
    with col1:
        st.write('Pronouns: he/him/his')
        st.write('Research/career interests: Autonomous Vehicle')
        st.write('Favorite Food: Dumplings')
        st.write('A painfully boring fact:  Covid is still not gone')
        st.write('Hobbies: photography')
        st.write('Email: [email protected]')
    with col2:
        st.image('https://scontent-ort2-1.xx.fbcdn.net/v/t39.30808-6/261420667_131245119324840_3342182275866550937_n.jpg?_nc_cat=100&ccb=1-5&_nc_sid=730e14&_nc_ohc=IP7khn2w6cwAX_wC85x&_nc_ht=scontent-ort2-1.xx&oh=063c2b6b0ed5e9fc10adb2c391c471cf&oe=61AA72C1', width=300)