Spaces:
Running
on
T4
Running
on
T4
koenverhagen
commited on
Commit
•
0b89ddd
1
Parent(s):
79f0df2
int
Browse files- .gitignore +4 -0
- Dockerfile +12 -0
- createlookalike.py +160 -0
- requirements.txt +38 -0
- server.py +17 -0
- state.json +1 -0
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
venv
|
2 |
+
*.iml
|
3 |
+
*.xml
|
4 |
+
.idea
|
Dockerfile
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM tensorflow/tensorflow:2.10.0-gpu
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
RUN mkdir -p shops
|
6 |
+
|
7 |
+
COPY ./requirements.txt /requirements.txt
|
8 |
+
|
9 |
+
RUN pip install --no-cache-dir --upgrade -r /requirements.txt
|
10 |
+
|
11 |
+
CMD ["uvicorn", "app.server:app", "--host", "0.0.0.0", "--port", "7860"]
|
12 |
+
|
createlookalike.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tempfile as tfile
|
2 |
+
from datetime import datetime
|
3 |
+
from urllib.request import urlopen
|
4 |
+
|
5 |
+
import requests
|
6 |
+
from keras.utils import img_to_array
|
7 |
+
from lxml import etree
|
8 |
+
import keras
|
9 |
+
|
10 |
+
from keras.applications.imagenet_utils import decode_predictions, preprocess_input
|
11 |
+
from keras.models import Model
|
12 |
+
from PIL import Image
|
13 |
+
from io import BytesIO
|
14 |
+
|
15 |
+
import numpy as np
|
16 |
+
|
17 |
+
from sklearn.decomposition import PCA
|
18 |
+
from scipy.spatial import distance
|
19 |
+
from collections import OrderedDict
|
20 |
+
from remove import remove_files
|
21 |
+
|
22 |
+
from generate_csv_file import generate_csv_files
|
23 |
+
from load_data import load_data, get_shops
|
24 |
+
|
25 |
+
|
26 |
+
def get_ids_from_feed(feed_url):
|
27 |
+
# create temp xml file
|
28 |
+
temp_file = tfile.NamedTemporaryFile(mode="w", suffix=".xml", prefix="feed")
|
29 |
+
|
30 |
+
f = temp_file.name
|
31 |
+
|
32 |
+
temp_file.write(urlopen(feed_url).read().decode('utf-8'))
|
33 |
+
|
34 |
+
# open xml file
|
35 |
+
tree = etree.parse(f)
|
36 |
+
|
37 |
+
temp_file.close()
|
38 |
+
|
39 |
+
root = tree.getroot()
|
40 |
+
|
41 |
+
# get image ids and shop base url
|
42 |
+
list_ids = []
|
43 |
+
|
44 |
+
shop_url = root[0][1].text
|
45 |
+
|
46 |
+
for item in root.findall(".//g:mpn", root.nsmap):
|
47 |
+
list_ids.append(item.text)
|
48 |
+
|
49 |
+
return list_ids, shop_url
|
50 |
+
|
51 |
+
|
52 |
+
def get_image(url):
|
53 |
+
res = requests.get(url)
|
54 |
+
im = Image.open(BytesIO(res.content)).convert("RGB").resize((224, 224))
|
55 |
+
img = img_to_array(im)
|
56 |
+
x = img_to_array(img)
|
57 |
+
x = np.expand_dims(x, axis=0)
|
58 |
+
x = preprocess_input(x)
|
59 |
+
return img, x
|
60 |
+
|
61 |
+
|
62 |
+
def load_image(url, img_id):
|
63 |
+
print('get image url', id)
|
64 |
+
request_url = '{}/flat_thumb/{}/1/224'.format(url, img_id)
|
65 |
+
print('get image', request_url)
|
66 |
+
img, x = get_image(request_url)
|
67 |
+
return img, x
|
68 |
+
|
69 |
+
|
70 |
+
def create_feature_files():
|
71 |
+
model = keras.applications.VGG16(weights='imagenet', include_top=True)
|
72 |
+
feat_extractor = Model(inputs=model.input, outputs=model.get_layer("fc2").output)
|
73 |
+
final_json = []
|
74 |
+
data = get_shops()
|
75 |
+
|
76 |
+
if data:
|
77 |
+
for p in data:
|
78 |
+
final_json.append(calculate_shop(p, feat_extractor))
|
79 |
+
|
80 |
+
load_data(generate_csv_files(final_json))
|
81 |
+
remove_files()
|
82 |
+
|
83 |
+
return
|
84 |
+
|
85 |
+
|
86 |
+
def calculate_shop(shop, feat_extractor):
|
87 |
+
start = datetime.today()
|
88 |
+
if shop['id'] not in ['']: # temp
|
89 |
+
print(shop['id'], shop['base_url'])
|
90 |
+
google_xml_feed_url = '{}/google_xml_feed'.format(shop['base_url'])
|
91 |
+
try:
|
92 |
+
list_ids, shop_url = get_ids_from_feed(google_xml_feed_url)
|
93 |
+
except Exception as e:
|
94 |
+
list_ids = []
|
95 |
+
print('could not get images from ', shop['id'], e)
|
96 |
+
features = []
|
97 |
+
|
98 |
+
list_of_fitted_designs = []
|
99 |
+
|
100 |
+
design_json = {}
|
101 |
+
if len(list_ids) > 0:
|
102 |
+
for l in list_ids:
|
103 |
+
|
104 |
+
try:
|
105 |
+
img, x = load_image(shop_url, l)
|
106 |
+
feat = feat_extractor.predict(x)[0]
|
107 |
+
|
108 |
+
features.append(feat)
|
109 |
+
list_of_fitted_designs.append(l)
|
110 |
+
|
111 |
+
except Exception as e:
|
112 |
+
print(l, ' failed loading feature extraction', e)
|
113 |
+
|
114 |
+
try:
|
115 |
+
features = np.array(features)
|
116 |
+
# print(features.shape)
|
117 |
+
components = len(features) if len(features) < 300 else 300
|
118 |
+
pca = PCA(n_components=components) # 300
|
119 |
+
pca.fit(features)
|
120 |
+
pca_features = pca.transform(features)
|
121 |
+
except Exception as e:
|
122 |
+
print('pca too small?', e)
|
123 |
+
|
124 |
+
if len(list_of_fitted_designs) >= 80:
|
125 |
+
max_list_per_design = 80
|
126 |
+
else:
|
127 |
+
max_list_per_design = len(list_of_fitted_designs)
|
128 |
+
|
129 |
+
try:
|
130 |
+
for im in list_of_fitted_designs:
|
131 |
+
|
132 |
+
query_image_idx = list_of_fitted_designs.index(im)
|
133 |
+
|
134 |
+
similar_idx = [distance.cosine(pca_features[query_image_idx], feat) for feat in pca_features]
|
135 |
+
|
136 |
+
filterd_idx = dict()
|
137 |
+
|
138 |
+
for i in range(len(similar_idx)):
|
139 |
+
filterd_idx[i] = {"dist": similar_idx[i], "id": list_of_fitted_designs[i]}
|
140 |
+
|
141 |
+
sorted_dict = dict(
|
142 |
+
OrderedDict(sorted(filterd_idx.items(), key=lambda i: i[1]['dist'])[1:max_list_per_design]))
|
143 |
+
|
144 |
+
design_list = []
|
145 |
+
|
146 |
+
for k, v in sorted_dict.items():
|
147 |
+
design_list.append(v)
|
148 |
+
|
149 |
+
design_dict = {im: design_list}
|
150 |
+
# idx_closest = sorted(range(len(similar_idx)), key=lambda k: similar_idx[k])
|
151 |
+
|
152 |
+
design_json.update(design_dict)
|
153 |
+
# print(idx_closest)
|
154 |
+
|
155 |
+
except Exception as e:
|
156 |
+
print("could not create json with look-a-like for shop:", shop['id'], e)
|
157 |
+
|
158 |
+
end = datetime.today()
|
159 |
+
|
160 |
+
return {'shop_id': shop['id'], 'start_time': start, 'end_time': end, 'designs': design_json}
|
requirements.txt
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
joblib==1.2.0
|
2 |
+
keras==2.10.0
|
3 |
+
lxml==4.9.1
|
4 |
+
numpy==1.23.4
|
5 |
+
scikit-learn==1.1.3
|
6 |
+
scipy==1.9.3
|
7 |
+
|
8 |
+
keras==2.10.0
|
9 |
+
Pillow==9.2.0
|
10 |
+
pandas==1.5.1
|
11 |
+
requests==2.28.1
|
12 |
+
|
13 |
+
python-multipart==0.0.5
|
14 |
+
matplotlib==3.6.1
|
15 |
+
scikit-image==0.19.3
|
16 |
+
six==1.16.0
|
17 |
+
termcolor==2.0.1
|
18 |
+
tensorboard==2.12.1
|
19 |
+
tensorboard-data-server==0.6.1
|
20 |
+
tensorboard-plugin-wit==1.8.1
|
21 |
+
tensorflow-estimator==2.12.0
|
22 |
+
|
23 |
+
threadpoolctl==3.1.0
|
24 |
+
tifffile==2022.10.10
|
25 |
+
typing_extensions==4.4.0
|
26 |
+
|
27 |
+
|
28 |
+
google-auth==2.13.0
|
29 |
+
google-auth-oauthlib==0.4.6
|
30 |
+
google-pasta==0.2.0
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
+
fastapi==0.66.0
|
35 |
+
python-multipart==0.0.5
|
36 |
+
uvicorn==0.14.0
|
37 |
+
|
38 |
+
tensorflow==2.12.0
|
server.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Response, Request, Header, Form, UploadFile, Body
|
2 |
+
from starlette.middleware.cors import CORSMiddleware
|
3 |
+
from pydantic import BaseModel
|
4 |
+
import json
|
5 |
+
from load_data import get_design_data, get_design_resolutions_for_shop
|
6 |
+
|
7 |
+
app = FastAPI()
|
8 |
+
app.add_middleware(
|
9 |
+
CORSMiddleware,
|
10 |
+
allow_origins=["*"], # Allows all origins
|
11 |
+
allow_credentials=True,
|
12 |
+
allow_methods=["*"], # Allows all methods
|
13 |
+
allow_headers=["*"], # Allows all headers
|
14 |
+
)
|
15 |
+
|
16 |
+
# API_TOKEN = os.environ["API_TOKEN"]
|
17 |
+
API_TOKEN = '34dsadfF$$%#$TGREGEFGE%Q*)(*&%'
|
state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"state": 0}
|