import asyncio import tempfile as tfile from datetime import datetime from urllib.request import urlopen import requests from keras.utils import img_to_array from lxml import etree import keras from keras.applications.imagenet_utils import decode_predictions, preprocess_input from keras.models import Model from PIL import Image from io import BytesIO import numpy as np from sklearn.decomposition import PCA from scipy.spatial import distance from collections import OrderedDict # from remove import remove_files # # from generate_csv_file import generate_csv_files # from load_data import load_data, get_shops from schemas import Shop def get_ids_from_feed(feed_url): # create temp xml file temp_file = tfile.NamedTemporaryFile(mode="w", suffix=".xml", prefix="feed") f = temp_file.name temp_file.write(urlopen(feed_url).read().decode('utf-8')) # open xml file tree = etree.parse(f) temp_file.close() root = tree.getroot() # get image ids and shop base url list_ids = [] shop_url = root[0][1].text for item in root.findall(".//g:mpn", root.nsmap): list_ids.append(item.text) return list_ids, shop_url def get_image(url): res = requests.get(url) im = Image.open(BytesIO(res.content)).convert("RGB").resize((224, 224)) img = img_to_array(im) x = img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) return img, x def load_image(url, img_id): print('get image url', id) request_url = '{}/flat_thumb/{}/1/224'.format(url, img_id) print('get image', request_url) img, x = get_image(request_url) return img, x async def create_feature_files(shop: Shop): model = keras.applications.VGG16(weights='imagenet', include_top=True) feat_extractor = Model(inputs=model.input, outputs=model.get_layer("fc2").output) await calculate_shop(shop, feat_extractor) async def calculate_shop(shop: Shop, feat_extractor) -> None: start = datetime.today() if shop.id: # temp print(shop.id, shop.base_url) google_xml_feed_url = '{}/google_xml_feed'.format(shop.base_url) try: list_ids, shop_url = get_ids_from_feed(google_xml_feed_url) except Exception as e: list_ids = [] print('could not get images from ', shop.id, e) features = [] list_of_fitted_designs = [] design_json = {} if len(list_ids) > 0: for l in list_ids[:100]: try: img, x = load_image(shop_url, l) feat = feat_extractor.predict(x)[0] features.append(feat) list_of_fitted_designs.append(l) except Exception as e: print(l, ' failed loading feature extraction', e) try: features = np.array(features) # print(features.shape) components = len(features) if len(features) < 300 else 300 pca = PCA(n_components=components) # 300 pca.fit(features) pca_features = pca.transform(features) except Exception as e: print('pca too small?', e) if len(list_of_fitted_designs) >= 80: max_list_per_design = 80 else: max_list_per_design = len(list_of_fitted_designs) try: for im in list_of_fitted_designs: query_image_idx = list_of_fitted_designs.index(im) similar_idx = [distance.cosine(pca_features[query_image_idx], feat) for feat in pca_features] filterd_idx = dict() for i in range(len(similar_idx)): filterd_idx[i] = {"dist": similar_idx[i], "id": list_of_fitted_designs[i]} sorted_dict = dict( OrderedDict(sorted(filterd_idx.items(), key=lambda i: i[1]['dist'])[1:max_list_per_design])) design_list = [] for k, v in sorted_dict.items(): design_list.append(v) design_dict = {"shop_id": shop.id, "design": im, "results": design_list } print(design_dict) try: response = requests.post(shop.webhook_url, json=design_dict) if response.status_code == 200: print(f"result for {im} is sent to {shop.webhook_url}") else: print(f"Error sending data {shop.webhook_url} to for result {im}: {response.status_code}") except Exception as e: print(f"Error sending data {shop.webhook_url} to for result {im}: {response.status_code}", e) # idx_closest = sorted(range(len(similar_idx)), key=lambda k: similar_idx[k]) # design_json.update(design_dict) # print(idx_closest) except Exception as e: print("could not create json with look-a-like for shop:", shop.id, e) end = datetime.today() # return {'shop_id': shop.id, 'start_time': start, 'end_time': end, 'designs': design_json}