File size: 14,962 Bytes
a618fc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75d1bdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d9242a
a618fc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f540cc
a618fc8
 
7d9242a
a618fc8
7d9242a
a618fc8
 
 
 
28f7cf3
a618fc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d9242a
a618fc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b77f2ea
a618fc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8693708
a618fc8
9b7fe1b
a618fc8
 
 
 
7d9242a
 
 
a618fc8
0c43b6e
a618fc8
7d9242a
 
 
a618fc8
b77f2ea
a618fc8
7d9242a
 
 
8693708
 
7d248e1
a618fc8
7d9242a
b3bde46
a618fc8
75d1bdf
 
 
 
 
 
 
 
 
 
 
 
 
 
a618fc8
8693708
 
a618fc8
 
 
b5c4f59
 
7d9242a
b5c4f59
 
 
 
7d9242a
b5c4f59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d9242a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75d1bdf
7d9242a
75d1bdf
 
7d9242a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75d1bdf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
import requests
from io import BytesIO
import numpy as np
from gensim.models.fasttext import FastText
from scipy import spatial
import itertools
import gdown
import warnings
import nltk
# warnings.filterwarnings('ignore')

import pickle
import pdb
from concurrent.futures import ProcessPoolExecutor

import matplotlib.pyplot as plt
import streamlit as st
import argparse
import logging
from pyunsplash import PyUnsplash
import blacklists
api_key = 'hzcKZ0e4we95wSd8_ip2zTB3m2DrOMWehAxrYjqjwg0'

# instantiate PyUnsplash object
py_un = PyUnsplash(api_key=api_key)

# pyunsplash logger defaults to level logging.ERROR
# If you need to change that, use getLogger/setLevel
# on the module logger, like this:
logging.getLogger("pyunsplash").setLevel(logging.DEBUG)

# TODO: 
# Image search: Option 1 -> google image search api || Option 2 -> open ai clip search
from PIL import Image


# NLTK Datasets
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Average embedding β†’ Compare
def recommend_ingredients(yum, leftovers, n=10):
  '''
  Uses a mean aggregation method 

  :params
  yum -> FastText Word2Vec Obj
  leftovers -> list of str
  n -> int top_n to return  

  :returns
  output -> top_n recommendations
  '''
  leftovers_embedding_sum = np.zeros([32,])
  for ingredient in leftovers:
    # pdb.set_trace()

    ingredient_embedding = yum.get_vector(ingredient, norm=True)

    leftovers_embedding_sum += ingredient_embedding
  leftovers_embedding = leftovers_embedding_sum / len(leftovers) # Embedding for leftovers
  top_matches = yum.similar_by_vector(leftovers_embedding, topn=100)
  top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches]
  leftovers = [x.replace('_',' ') for x in leftovers]
  output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
  return output[:n]

# Compare β†’ Find intersection
def recommend_ingredients_intersect(yum, leftovers, n=10):
  '''
  Finds top combined probabilities
  
  :params
  yum -> FastText Word2Vec Obj
  leftovers -> list of str
  n -> int top_n to return  

  :returns
  output -> top_n recommendations
  '''
  first = True
  for ingredient in leftovers:
    ingredient_embedding = yum.get_vector(ingredient, norm=True)
    ingredient_matches = yum.similar_by_vector(ingredient_embedding, topn=10000)
    ingredient_matches = [(x[0].replace('_',' '), x[1]) for x in ingredient_matches]
    ingredient_output = [x for x in ingredient_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
    if first:
      output = ingredient_output
      first = False
    else:
      output = [x for x in output for y in ingredient_output if x[0] == y[0]]
  return output[:n]

def recommend_ingredients_subsets(model, yum,leftovers, subset_size):
  '''
  Returns all subsets from each ingredient 

  :params
  model -> FastText Obj
  yum -> FastText Word2Vec Obj
  leftovers -> list of str
  n -> int top_n to return  

  :returns
  output -> top_n recommendations
  '''
  all_outputs = {}
  for leftovers_subset in itertools.combinations(leftovers, subset_size):
    leftovers_embedding_sum = np.empty([100,])
    for ingredient in leftovers_subset:
      ingredient_embedding = yum.word_vec(ingredient, use_norm=True)
      leftovers_embedding_sum += ingredient_embedding
    leftovers_embedding = leftovers_embedding_sum / len(leftovers_subset) # Embedding for leftovers
    top_matches = model.similar_by_vector(leftovers_embedding, topn=100)
    top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches]
    output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers_subset)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
    all_outputs[leftovers_subset] = output[:10]
  return all_outputs



def filter_adjectives(data):
    '''
    Remove adjectives that are not associated with a food item 

    :params
    data

    :returns
    data
    '''
    recipe_ingredients_token = [nltk.word_tokenize(x) for x in data]
    inds = []
    for i, r in enumerate(recipe_ingredients_token): 
        out = nltk.pos_tag(r)
        out = [x[1] for x in out]
        if len(out) > 1:
            inds.append(int(i))
        elif 'NN' in out or 'NNS' in out:
            inds.append(int(i))
    
    return [data[i] for i in inds]

def plural_to_singular(lemma, recipe): 
  '''
  :params
  lemma -> nltk lemma Obj
  recipe -> list of str

  :returns
  recipe -> converted recipe
  '''
  return [lemma.lemmatize(r) for r in recipe]

def filter_lemma(data):
    '''
    Convert plural to roots

    :params 
    data -> list of lists

    :returns
    data -> returns filtered data
    '''
    # Initialize Lemmatizer (to reduce plurals to stems)
    lemma = nltk.wordnet.WordNetLemmatizer()

    # NOTE: This uses all the computational resources of your computer 
    with ProcessPoolExecutor() as executor: 
        out = list(executor.map(plural_to_singular, itertools.repeat(lemma), data))

    return out


def train_model(data):
    '''
    Train fastfood text 
    NOTE: gensim==4.1.2

    :params
    data -> list of lists of all recipes
    save -> bool 

    :returns 
    model -> FastFood model obj
    '''
    model = FastText(data, vector_size=32, window=99, min_count=5, workers=40, sg=1) # Train model
    
    return model

@st.cache(allow_output_mutation=True)
def load_model(filename):
  '''
  Load the FastText Model
  :params:
  filename -> path to the model 

  :returns
  model -> this is the full FastText obj
  yum -> this is the FastText Word2Vec obj
  '''
  # Load Models

  model = FastText.load(filename)
  yum = model.wv

  return model, yum

@st.cache(allow_output_mutation=True)
def load_data(filename='data/all_recipes_ingredients_lemma.pkl'):
  '''
  Load data
  :params:
  filename -> path to dataset

  :return
  data -> list of all recipes 
  '''
  return pickle.load(open(filename,'rb'))

def plot_results(names, probs, n=5):
  '''
  Plots a bar chart of the names of the items vs. probability of similarity 
  :params:
  names -> list of str 
  probs -> list of float values
  n -> int of how many bars to show NOTE: Max = 100

  :return
  fig -> return figure for plotting 
  '''
  plt.bar(range(len(names)), probs, align='center')
  ax = plt.gca()

  ax.xaxis.set_major_locator(plt.FixedLocator(range(len(names))))
  ax.xaxis.set_major_formatter(plt.FixedFormatter(names))
  ax.set_ylabel('Probability',fontsize='large', fontweight='bold')
  ax.set_xlabel('Ingredients', fontsize='large', fontweight='bold')
  ax.xaxis.labelpad = 10
  ax.set_title(f'FoodNet Top {n} Predictions = {st.session_state.leftovers}')
  # mpld3.show()
  plt.xticks(rotation=45, ha='right')
  fig = plt.gcf()

  return fig

def load_image(image_file):
	img = Image.open(image_file)
	return img

st.set_page_config(page_title="FoodNet", page_icon = "πŸ”", layout = "centered", initial_sidebar_state = "auto")

##### UI/UX #####
## Sidebar ##
add_selectbox = st.sidebar.selectbox("Pages", ("FoodNet Recommender", "Food Donation Resources", "Contact Team"))

model, yum = load_model('fastfood.pth')

if add_selectbox == "FoodNet Recommender":
    st.title("FoodNet πŸ”")
    st.write("Search for similar food ingredients. Select two or more ingredients to find complementary ingredients.")
    ingredients = list(yum.key_to_index.keys())
    ingredients = [x.replace('_',' ') for x in ingredients]
    st.multiselect("Type or select food ingredients", ingredients, default=['bread', 'lettuce'], key="leftovers")

    ## Slider ##
    st.slider("Select number of recommendations to show", min_value=1, max_value=10, value=3, step=1, key='top_n')

    ## Show Images ## 
    # search = py_un.search(type_="photos", query="cookie")
    # py_un.photos(type_="single", photo_id='l0_kVknpO2g')

    # st.image(search)
    ## Images
    # for leftover in st.session_state.leftovers:
    #   search = py_un.search(type_='photos', query=leftover)
    #   for photo in search.entries:
    #       # print(photo.id, photo.link_download)
    #     st.image(photo.link_download, caption=leftover, width=200)
    #     break
    # (f"![Alt Text]({search.link_next})")

    ## Get food recommendation ##
    ingredients_no_space = [x.replace(' ','_') for x in st.session_state.get('leftovers')]
    out = recommend_ingredients(yum, ingredients_no_space, n=st.session_state.top_n)
    names = [o[0] for o in out]
    probs = [o[1] for o in out]

    # if 'probs' not in st.session_state:
    #     st.session_state['probs'] = False
    
    # if st.session_state.probs:
    #     st.table(data=out)
    # else:
    #     st.table(data=names)
        
    # st.checkbox(label="Show model scores", value=False, key="probs")
    # ## Plot Results ##
    # st.checkbox(label="Show results bar chart", value=False, key="plot")
    # if st.session_state.plot:
    #     fig = plot_results(names, probs, st.session_state.top_n)

    #     ## Show Plot ##
    #     st.pyplot(fig)
    st.selectbox(label="Select dietary restriction", options=('None', 'Kosher', 'Vegetarian'), key="diet")
    if st.session_state.diet != 'None':
      if st.session_state.diet == 'Vegetarian':
        out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.vegitarian)]
      if st.session_state.diet == 'Kosher': 
        out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.kosher)]
      names = [o[0] for o in out]
      probs = [o[1] for o in out]

    col1, col2, col3 = st.columns(3)

    for i, name in enumerate(names): 
      search = py_un.search(type_='photos', query=name)
      for photo in search.entries:
        col_id = i % 3
        if col_id == 0:
          col1.image(photo.link_download, caption=name, use_column_width=True)
        elif col_id == 1:
          col2.image(photo.link_download, caption=name, use_column_width=True)
        elif col_id == 2: 
          col3.image(photo.link_download, caption=name, use_column_width=True)
        break

elif add_selectbox == "Food Donation Resources":
    st.title('Food Donation Resources')
    st.subheader('Pittsburgh Food Bank:')
    st.write("In 2000, the Food Bank opened the doors on its facility in Duquesne."
              "This facility was the first LEED-certified building in Pittsburgh and the first LEED-certified "
              "food bank in the nation. Learn more about that facility here. "
              "Today, we work with a network of more than 850 partners across the 11 counties we serve. "
              "In addition to sourcing, warehousing and distributing food, the Food Bank is actively engaged in "
              "stabilizing lives and confronting issues of chronic hunger, poor nutrition and health. "
              "And, through our advocacy efforts, we have become a primary driver in comprehensive anti-hunger "
              "endeavors regionally, statewide and at the national level."
              )
    st.write("Check out this [link](https://pittsburghfoodbank.org/)πŸ‘ˆ")
    st.subheader('412 Food Rescue:')
    st.write("412 Food Rescue is a nonprofit organization dedicated to ending hunger by organizing "
              "volunteers to deliver surplus food to insecure communities instead of landfills."
              "Since its creation in 2015, the organization has redistributed over three million pounds of food through "
              "the use of its mobile application, Food Rescue Hero. They are currently rolling out the app nationwide."
              )
    st.write("Check out this [link](https://412foodrescue.org/)πŸ‘ˆ")

    # st.subheader('Image')
    # st.multiselect("Select leftovers:", list(yum.key_to_index.keys()), key="leftovers")
    # image_file = st.file_uploader("Upload Food Image:", type=["png", "jpg", "jpeg"])
    # if image_file is not None:
    #     # To See details
    #     file_details = {"filename": image_file.name, "filetype": image_file.type,
    #                     "filesize": image_file.size}
    #     st.write(file_details)
    #
    #     # To View Uploaded Image
    #     st.image(load_image(image_file), width=250)
if add_selectbox == "Contact Team":
    st.title('Contact Team')
    st.subheader('David Chuan-En Lin')
    col1, mid, col2 = st.columns([20, 2, 10])
    with col1:
        st.write('Pronouns: he/him/his')
        st.write(
            'Research/career interests: Human-AI Co-Design by (1) building ML-infused creativity support tools and '
            '(2) investigating how such tools augment design processes')
        st.write('Favorite Food: Ice cream sandwich')
        st.write('A painfully boring fact: Second-year PhD at HCII SCS')
        st.write('Hobbies: Making travel videos, graphic design, music')
        st.write('Email: [email protected]')
    with col2:
        st.image('https://chuanenlin.com/images/me.jpg', width=300)

    st.subheader('Mitchell Fogelson')
    col1, mid, col2 = st.columns([20, 2, 10])
    with col1:
        st.write('Pronouns: he/him/his')
        st.write('Research/career interests: Robotics, AI')
        st.write('Favorite Food: Deep Dish Pizza')
        st.write('A painfully boring fact: Am a middle child')
        st.write('Hobbies: Golf, Traveling, Games')
        st.write('Email: [email protected]')
    with col2:
        st.image('https://images.squarespace-cdn.com/content/v1/562661f3e4b0ae7c10f0a2cc/1590528961389-2142HA48O7LRZ9FWGP0F/about_image.jpg?format=2500w', width=300)

    st.subheader('Sunny Yang')
    col1, mid, col2 = st.columns([20, 2, 10])
    with col1:
        st.write('Pronouns: She/Her/Hers')
        st.write('Research/career interests: Product Manager')
        st.write('Favorite Food: Sushi')
        st.write('A painfully boring fact: I do not like rainy:(')
        st.write('Hobbies: Viola, Basketball')
        st.write('Email: [email protected]')
    with col2:
        st.image('https://media-exp1.licdn.com/dms/image/C4D03AQF37KjK_GYwzA/profile-displayphoto-shrink_400_400/0/1638326708803?e=1643846400&v=beta&t=q10CTNCG6h5guez1YT0j4j_oLlrGJB_8NugaBOUSAGg', width=300)

    st.subheader('Shihao Xu')
    col1, mid, col2 = st.columns([20, 2, 10])
    with col1:
        st.write('Pronouns: he/him/his')
        st.write('Research/career interests: Autonomous Vehicle')
        st.write('Favorite Food: Dumplings')
        st.write('A painfully boring fact:  Covid is still not gone')
        st.write('Hobbies: photography')
        st.write('Email: [email protected]')
    with col2:
        st.image('https://scontent-ort2-1.xx.fbcdn.net/v/t39.30808-6/261420667_131245119324840_3342182275866550937_n.jpg?_nc_cat=100&ccb=1-5&_nc_sid=730e14&_nc_ohc=IP7khn2w6cwAX_wC85x&_nc_ht=scontent-ort2-1.xx&oh=063c2b6b0ed5e9fc10adb2c391c471cf&oe=61AA72C1', width=300)