Spaces:
Sleeping
Sleeping
testing minimum version
Browse files- app.py +366 -243
- requirements.txt +2 -6
- tools/__init__.py +0 -0
- tools/annotation.py +0 -107
- tools/face_detection.py +0 -481
- tools/face_recognition.py +0 -114
- tools/gallery.py +0 -37
- tools/nametypes.py +0 -33
- tools/pca.py +0 -59
- tools/utils.py +0 -164
app.py
CHANGED
@@ -2,291 +2,414 @@ import streamlit as st
|
|
2 |
import time
|
3 |
from typing import List
|
4 |
from streamlit_webrtc import webrtc_streamer, WebRtcMode
|
5 |
-
import logging
|
6 |
import av
|
7 |
-
import
|
8 |
-
|
9 |
-
import
|
10 |
-
|
11 |
-
|
12 |
-
from
|
13 |
-
|
14 |
-
from
|
15 |
-
from
|
16 |
-
from
|
17 |
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
# Set page layout for streamlit to wide
|
25 |
-
st.set_page_config(layout="wide", page_title="
|
26 |
-
with st.sidebar:
|
27 |
-
st.markdown("# Settings")
|
28 |
-
face_rec_on = st_toggle_switch(
|
29 |
-
"Live Face Recognition",
|
30 |
-
key="activate_face_rec",
|
31 |
-
default_value=True,
|
32 |
-
active_color=rgb(255, 75, 75),
|
33 |
-
track_color=rgb(50, 50, 50),
|
34 |
-
label_after=True,
|
35 |
-
)
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
resolution = st.selectbox(
|
40 |
-
"Webcam Resolution",
|
41 |
-
[(1920, 1080), (1280, 720), (640, 360)],
|
42 |
-
index=2,
|
43 |
-
)
|
44 |
-
st.markdown("Note: To change the resolution, you have to restart the stream.")
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
)
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
-
|
104 |
-
stats = Stats()
|
105 |
|
106 |
-
# Start timer for FPS calculation
|
107 |
-
frame_start = time.time()
|
108 |
|
|
|
109 |
# Convert frame to numpy array
|
110 |
frame = frame.to_ndarray(format="rgb24")
|
111 |
|
112 |
-
#
|
113 |
-
|
114 |
-
stats = stats._replace(resolution=resolution)
|
115 |
-
|
116 |
-
if face_rec_on:
|
117 |
-
# Run face detection
|
118 |
-
start = time.time()
|
119 |
-
frame, detections = face_detector(frame)
|
120 |
-
stats = stats._replace(num_faces=len(detections) if detections else 0)
|
121 |
-
stats = stats._replace(detection=(time.time() - start) * 1000)
|
122 |
|
123 |
-
|
124 |
-
|
125 |
-
identities = face_recognizer(frame, detections)
|
126 |
-
stats = stats._replace(recognition=(time.time() - start) * 1000)
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
matches = face_recognizer.find_matches(identities, gallery)
|
131 |
-
stats = stats._replace(matching=(time.time() - start) * 1000)
|
132 |
|
133 |
-
|
134 |
-
|
135 |
-
frame = annotator(frame, detections, identities, matches, gallery)
|
136 |
-
stats = stats._replace(annotation=(time.time() - start) * 1000)
|
137 |
|
138 |
# Convert frame back to av.VideoFrame
|
139 |
frame = av.VideoFrame.from_ndarray(frame, format="rgb24")
|
140 |
|
141 |
-
|
142 |
-
stats = stats._replace(fps=1 / (time.time() - frame_start))
|
143 |
|
144 |
-
# Send data to other thread
|
145 |
-
transfer_queue.put_nowait([stats, detections, identities, matches])
|
146 |
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
-
#
|
151 |
-
|
152 |
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
ctx = webrtc_streamer(
|
157 |
-
key="FaceIDAppDemo",
|
158 |
-
mode=WebRtcMode.SENDRECV,
|
159 |
-
rtc_configuration={"iceServers": get_ice_servers(name=ice_server)},
|
160 |
-
video_frame_callback=video_frame_callback,
|
161 |
-
media_stream_constraints={
|
162 |
-
"video": {
|
163 |
-
"width": {
|
164 |
-
"min": resolution[0],
|
165 |
-
"ideal": resolution[0],
|
166 |
-
"max": resolution[0],
|
167 |
-
},
|
168 |
-
"height": {
|
169 |
-
"min": resolution[1],
|
170 |
-
"ideal": resolution[1],
|
171 |
-
"max": resolution[1],
|
172 |
-
},
|
173 |
-
},
|
174 |
-
"audio": False,
|
175 |
-
},
|
176 |
-
async_processing=True,
|
177 |
-
)
|
178 |
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
with tab_metrics:
|
191 |
-
# Display Detections and Identities
|
192 |
-
st.markdown("**Detection Metrics**")
|
193 |
-
disp_detection_metrics = st.info("No detected faces yet ...")
|
194 |
-
|
195 |
-
# Display Recognition Metrics
|
196 |
-
st.markdown("**Recognition Metrics**")
|
197 |
-
disp_recognition_metrics = st.info("No recognized identities yet ...")
|
198 |
-
|
199 |
-
with tab_pca:
|
200 |
-
# Display 2D and 3D PCA
|
201 |
-
col1, col2 = st.columns(2)
|
202 |
-
col1.markdown("**PCA 2D**")
|
203 |
-
disp_pca3d = col1.info("Only available if more than 1 recognized face ...")
|
204 |
-
col2.markdown("**PCA 3D**")
|
205 |
-
disp_pca2d = col2.info("Only available if more than 1 recognized face ...")
|
206 |
-
freeze_pcas = st.button("Freeze PCAs for Interaction", key="reset_pca")
|
207 |
-
|
208 |
-
# Show PCAs
|
209 |
-
if freeze_pcas and gallery:
|
210 |
-
col1, col2 = st.columns(2)
|
211 |
-
if len(st.session_state.matches) > 1:
|
212 |
-
col1.plotly_chart(
|
213 |
-
pca(
|
214 |
-
st.session_state.matches,
|
215 |
-
st.session_state.identities,
|
216 |
-
gallery,
|
217 |
-
dim=3,
|
218 |
-
),
|
219 |
-
use_container_width=True,
|
220 |
-
)
|
221 |
-
col2.plotly_chart(
|
222 |
-
pca(
|
223 |
-
st.session_state.matches,
|
224 |
-
st.session_state.identities,
|
225 |
-
gallery,
|
226 |
-
dim=2,
|
227 |
-
),
|
228 |
-
use_container_width=True,
|
229 |
)
|
230 |
|
|
|
|
|
|
|
|
|
|
|
231 |
|
232 |
-
#
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
|
|
|
|
237 |
)
|
238 |
-
else:
|
239 |
-
disp_identities_gal.info("No gallery images uploaded yet ...")
|
240 |
|
|
|
|
|
241 |
|
242 |
-
# Display Live Stats
|
243 |
if ctx.state.playing:
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
# Save for PCA Snapshot
|
249 |
-
st.session_state.identities = identities
|
250 |
-
st.session_state.matches = matches
|
251 |
-
|
252 |
-
# Show Stats
|
253 |
-
disp_stats.dataframe(
|
254 |
-
pd.DataFrame([stats]).applymap(lambda x: (format_dflist(x))),
|
255 |
-
use_container_width=True,
|
256 |
-
)
|
257 |
-
|
258 |
-
# Show Detections Metrics
|
259 |
-
if detections:
|
260 |
-
disp_detection_metrics.dataframe(
|
261 |
-
pd.DataFrame(detections).applymap(lambda x: (format_dflist(x))),
|
262 |
-
use_container_width=True,
|
263 |
-
)
|
264 |
-
else:
|
265 |
-
disp_detection_metrics.info("No detected faces yet ...")
|
266 |
-
|
267 |
-
# Show Match Metrics
|
268 |
-
if matches:
|
269 |
-
disp_recognition_metrics.dataframe(
|
270 |
-
pd.DataFrame(matches).applymap(lambda x: (format_dflist(x))),
|
271 |
-
use_container_width=True,
|
272 |
-
)
|
273 |
-
else:
|
274 |
-
disp_recognition_metrics.info("No recognized identities yet ...")
|
275 |
-
|
276 |
-
if len(matches) > 1:
|
277 |
-
disp_pca3d.plotly_chart(pca(matches, identities, gallery, dim=3), use_container_width=True)
|
278 |
-
disp_pca2d.plotly_chart(pca(matches, identities, gallery, dim=2), use_container_width=True)
|
279 |
-
else:
|
280 |
-
disp_pca3d.info("Only available if more than 1 recognized face ...")
|
281 |
-
disp_pca2d.info("Only available if more than 1 recognized face ...")
|
282 |
-
|
283 |
-
# Show Recognized Identities
|
284 |
-
if matches:
|
285 |
-
disp_identities_rec.image(
|
286 |
-
image=[identities[match.identity_idx].face_aligned for match in matches],
|
287 |
-
caption=[gallery[match.gallery_idx].name for match in matches],
|
288 |
-
)
|
289 |
-
else:
|
290 |
-
disp_identities_rec.info("No recognized identities yet ...")
|
291 |
|
292 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import time
|
3 |
from typing import List
|
4 |
from streamlit_webrtc import webrtc_streamer, WebRtcMode
|
|
|
5 |
import av
|
6 |
+
import numpy as np
|
7 |
+
import onnxruntime as rt
|
8 |
+
import threading
|
9 |
+
import mediapipe as mp
|
10 |
+
import os
|
11 |
+
from twilio.rest import Client
|
12 |
+
import cv2
|
13 |
+
from skimage.transform import SimilarityTransform
|
14 |
+
from types import SimpleNamespace
|
15 |
+
from sklearn.metrics.pairwise import cosine_distances
|
16 |
|
17 |
|
18 |
+
class Detection(SimpleNamespace):
|
19 |
+
bbox: List[List[float]] = None
|
20 |
+
landmarks: List[List[float]] = None
|
21 |
|
22 |
|
23 |
+
class Identity(SimpleNamespace):
|
24 |
+
detection: Detection = Detection()
|
25 |
+
name: str = None
|
26 |
+
embedding: np.ndarray = None
|
27 |
+
face: np.ndarray = None
|
28 |
+
|
29 |
+
|
30 |
+
class Match(SimpleNamespace):
|
31 |
+
subject_id: Identity = Identity()
|
32 |
+
gallery_id: Identity = Identity()
|
33 |
+
distance: float = None
|
34 |
+
name: str = None
|
35 |
+
|
36 |
+
|
37 |
+
class Grabber(object):
|
38 |
+
def __init__(self, video_receiver) -> None:
|
39 |
+
self.currentFrame = None
|
40 |
+
self.capture = video_receiver
|
41 |
+
self.thread = threading.Thread(target=self.update_frame)
|
42 |
+
self.thread.daemon = True
|
43 |
+
|
44 |
+
def update_frame(self) -> None:
|
45 |
+
while True:
|
46 |
+
self.currentFrame = self.capture.get_frame()
|
47 |
+
|
48 |
+
def get_frame(self) -> av.VideoFrame:
|
49 |
+
return self.currentFrame
|
50 |
+
|
51 |
+
|
52 |
+
# Similarity threshold for face matching
|
53 |
+
SIMILARITY_THRESHOLD = 1.2
|
54 |
+
|
55 |
+
# Get twilio ice server configuration using twilio credentials from environment variables (set in streamlit secrets)
|
56 |
+
# Ref: https://www.twilio.com/docs/stun-turn/api
|
57 |
+
ICE_SERVERS = Client(os.environ["TWILIO_ACCOUNT_SID"], os.environ["TWILIO_AUTH_TOKEN"]).tokens.create().ice_servers
|
58 |
+
|
59 |
# Set page layout for streamlit to wide
|
60 |
+
st.set_page_config(layout="wide", page_title="Live Face Recognition", page_icon=":sunglasses:")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
+
# Streamlit app
|
63 |
+
st.title("Live Webcam Face Recognition")
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
+
st.markdown("**Live Stream**")
|
66 |
+
ctx_container = st.container()
|
67 |
+
stream_container = st.empty()
|
68 |
+
|
69 |
+
st.markdown("**Matches**")
|
70 |
+
matches_container = st.info("No matches found yet ...")
|
71 |
+
|
72 |
+
|
73 |
+
# Init face detector and face recognizer
|
74 |
+
face_recognizer = rt.InferenceSession("model.fixed.onnx", providers=rt.get_available_providers())
|
75 |
+
face_detector = mp.solutions.face_mesh.FaceMesh(
|
76 |
+
refine_landmarks=True,
|
77 |
+
min_detection_confidence=0.5,
|
78 |
+
min_tracking_confidence=0.5,
|
79 |
+
max_num_faces=5,
|
80 |
+
)
|
81 |
+
|
82 |
+
|
83 |
+
def detect_faces(frame: np.ndarray) -> List[Detection]:
|
84 |
+
# Process the frame with the face detector
|
85 |
+
result = face_detector.process(frame)
|
86 |
+
|
87 |
+
# Initialize an empty list to store the detected faces
|
88 |
+
detections = []
|
89 |
+
|
90 |
+
# Check if any faces were detected
|
91 |
+
if result.multi_face_landmarks:
|
92 |
+
# Iterate over each detected face
|
93 |
+
for count, detection in enumerate(result.multi_face_landmarks):
|
94 |
+
# Select 5 Landmarks
|
95 |
+
five_landmarks = np.asarray(detection.landmark)[[470, 475, 1, 57, 287]]
|
96 |
+
|
97 |
+
# Extract the x and y coordinates of the landmarks of interest
|
98 |
+
landmarks = [[landmark.x * frame.shape[1], landmark.y * frame.shape[0]] for landmark in five_landmarks]
|
99 |
+
|
100 |
+
# Extract the x and y coordinates of all landmarks
|
101 |
+
all_x_coords = [landmark.x * frame.shape[1] for landmark in detection.landmark]
|
102 |
+
all_y_coords = [landmark.y * frame.shape[0] for landmark in detection.landmark]
|
103 |
+
|
104 |
+
# Compute the bounding box of the face
|
105 |
+
x_min, x_max = int(min(all_x_coords)), int(max(all_x_coords))
|
106 |
+
y_min, y_max = int(min(all_y_coords)), int(max(all_y_coords))
|
107 |
+
bbox = [[x_min, y_min], [x_max, y_max]]
|
108 |
+
|
109 |
+
# Create a Detection object for the face
|
110 |
+
detection = Detection(
|
111 |
+
idx=count,
|
112 |
+
bbox=bbox,
|
113 |
+
landmarks=landmarks,
|
114 |
+
confidence=None,
|
115 |
+
)
|
116 |
+
|
117 |
+
# Add the detection to the list
|
118 |
+
detections.append(detection)
|
119 |
+
|
120 |
+
# Return the list of detections
|
121 |
+
return detections
|
122 |
+
|
123 |
+
|
124 |
+
def recognize_faces(frame: np.ndarray, detections: List[Detection]) -> List[Identity]:
|
125 |
+
if not detections:
|
126 |
+
return []
|
127 |
+
|
128 |
+
identities = []
|
129 |
+
for detection in detections:
|
130 |
+
# ALIGNMENT -----------------------------------------------------------
|
131 |
+
# Target landmark coordinates (as used in training)
|
132 |
+
landmarks_target = np.array(
|
133 |
+
[
|
134 |
+
[38.2946, 51.6963],
|
135 |
+
[73.5318, 51.5014],
|
136 |
+
[56.0252, 71.7366],
|
137 |
+
[41.5493, 92.3655],
|
138 |
+
[70.7299, 92.2041],
|
139 |
+
],
|
140 |
+
dtype=np.float32,
|
141 |
)
|
142 |
+
tform = SimilarityTransform()
|
143 |
+
tform.estimate(detection.landmarks, landmarks_target)
|
144 |
+
tmatrix = tform.params[0:2, :]
|
145 |
+
face_aligned = cv2.warpAffine(frame, tmatrix, (112, 112), borderValue=0.0)
|
146 |
+
# ---------------------------------------------------------------------
|
147 |
+
|
148 |
+
# INFERENCE -----------------------------------------------------------
|
149 |
+
# Inference face embeddings with onnxruntime
|
150 |
+
input_image = (np.asarray([face_aligned]).astype(np.float32) / 255.0).clip(0.0, 1.0)
|
151 |
+
embedding = face_recognizer.run(None, {"input_image": input_image})[0][0]
|
152 |
+
# ---------------------------------------------------------------------
|
153 |
+
|
154 |
+
# Create Identity object
|
155 |
+
identities.append(Identity(detection=detection, embedding=embedding, face=face_aligned))
|
156 |
+
|
157 |
+
return identities
|
158 |
+
|
159 |
+
|
160 |
+
def match_faces(subjects: List[Identity], gallery: List[Identity]) -> List[Match]:
|
161 |
+
if len(gallery) == 0 or len(subjects) == 0:
|
162 |
+
return []
|
163 |
+
|
164 |
+
# Get Embeddings
|
165 |
+
embs_gal = np.asarray([identity.embedding for identity in gallery])
|
166 |
+
embs_det = np.asarray([identity.embedding for identity in subjects])
|
167 |
+
|
168 |
+
# Calculate Cosine Distances
|
169 |
+
cos_distances = cosine_distances(embs_det, embs_gal)
|
170 |
+
|
171 |
+
# Find Matches
|
172 |
+
matches = []
|
173 |
+
for ident_idx, identity in enumerate(subjects):
|
174 |
+
dists_to_identity = cos_distances[ident_idx]
|
175 |
+
idx_min = np.argmin(dists_to_identity)
|
176 |
+
if dists_to_identity[idx_min] < SIMILARITY_THRESHOLD:
|
177 |
+
matches.append(
|
178 |
+
Match(
|
179 |
+
subject_id=identity,
|
180 |
+
gallery_id=gallery[idx_min],
|
181 |
+
distance=dists_to_identity[idx_min],
|
182 |
+
)
|
183 |
+
)
|
184 |
|
185 |
+
# Sort Matches by identity_idx
|
186 |
+
matches = sorted(matches, key=lambda match: match.gallery_id.name)
|
187 |
+
|
188 |
+
return matches
|
189 |
+
|
190 |
+
|
191 |
+
def draw_annotations(frame: np.ndarray, detections: List[Detection], matches: List[Match]) -> np.ndarray:
|
192 |
+
global timestamp
|
193 |
+
shape = np.asarray(frame.shape[:2][::-1])
|
194 |
+
|
195 |
+
# Upscale frame to 1080p for better visualization of drawn annotations
|
196 |
+
frame = cv2.resize(frame, (1920, 1080))
|
197 |
+
upscale_factor = np.asarray([1920 / shape[0], 1080 / shape[1]])
|
198 |
+
shape = np.asarray(frame.shape[:2][::-1])
|
199 |
+
|
200 |
+
# Make frame writeable (for better performance)
|
201 |
+
frame.flags.writeable = True
|
202 |
+
|
203 |
+
fps = 1 / (time.time() - timestamp)
|
204 |
+
timestamp = time.time()
|
205 |
+
|
206 |
+
# Draw FPS
|
207 |
+
cv2.putText(
|
208 |
+
frame,
|
209 |
+
f"FPS: {fps:.1f}",
|
210 |
+
(20, 40),
|
211 |
+
cv2.FONT_HERSHEY_SIMPLEX,
|
212 |
+
1,
|
213 |
+
(0, 255, 0),
|
214 |
+
2,
|
215 |
)
|
216 |
|
217 |
+
# Draw Detections
|
218 |
+
for detection in detections:
|
219 |
+
# Draw Landmarks
|
220 |
+
for landmark in detection.landmarks:
|
221 |
+
cv2.circle(
|
222 |
+
frame,
|
223 |
+
(landmark * upscale_factor).astype(int),
|
224 |
+
2,
|
225 |
+
(255, 255, 255),
|
226 |
+
-1,
|
227 |
+
)
|
228 |
|
229 |
+
# Draw Bounding Box
|
230 |
+
cv2.rectangle(
|
231 |
+
frame,
|
232 |
+
(detection.bbox[0] * upscale_factor).astype(int),
|
233 |
+
(detection.bbox[1] * upscale_factor).astype(int),
|
234 |
+
(255, 0, 0),
|
235 |
+
2,
|
236 |
+
)
|
237 |
|
238 |
+
# Draw Index
|
239 |
+
cv2.putText(
|
240 |
+
frame,
|
241 |
+
str(detection.idx),
|
242 |
+
(
|
243 |
+
((detection.bbox[1][0] + 2) * upscale_factor[0]).astype(int),
|
244 |
+
((detection.bbox[1][1] + 2) * upscale_factor[1]).astype(int),
|
245 |
+
),
|
246 |
+
cv2.LINE_AA,
|
247 |
+
0.5,
|
248 |
+
(0, 0, 0),
|
249 |
+
2,
|
250 |
+
)
|
251 |
|
252 |
+
# Draw Matches
|
253 |
+
for match in matches:
|
254 |
+
detection = match.subject_id.detection
|
255 |
+
name = match.gallery_id.name
|
256 |
+
|
257 |
+
# Draw Bounding Box in green
|
258 |
+
cv2.rectangle(
|
259 |
+
frame,
|
260 |
+
(detection.bbox[0] * upscale_factor).astype(int),
|
261 |
+
(detection.bbox[1] * upscale_factor).astype(int),
|
262 |
+
(0, 255, 0),
|
263 |
+
2,
|
264 |
+
)
|
265 |
|
266 |
+
# Draw Banner
|
267 |
+
cv2.rectangle(
|
268 |
+
frame,
|
269 |
+
(
|
270 |
+
(detection.bbox[0][0] * upscale_factor[0]).astype(int),
|
271 |
+
(detection.bbox[0][1] * upscale_factor[1] - (shape[1] // 25)).astype(int),
|
272 |
+
),
|
273 |
+
(
|
274 |
+
(detection.bbox[1][0] * upscale_factor[0]).astype(int),
|
275 |
+
(detection.bbox[0][1] * upscale_factor[1]).astype(int),
|
276 |
+
),
|
277 |
+
(255, 255, 255),
|
278 |
+
-1,
|
279 |
+
)
|
280 |
|
281 |
+
# Draw Name
|
282 |
+
cv2.putText(
|
283 |
+
frame,
|
284 |
+
name,
|
285 |
+
(
|
286 |
+
((detection.bbox[0][0] + shape[0] // 400) * upscale_factor[0]).astype(int),
|
287 |
+
((detection.bbox[0][1] - shape[1] // 50) * upscale_factor[1]).astype(int),
|
288 |
+
),
|
289 |
+
cv2.LINE_AA,
|
290 |
+
0.7,
|
291 |
+
(0, 0, 0),
|
292 |
+
2,
|
293 |
+
)
|
294 |
|
295 |
+
# Draw Distance
|
296 |
+
cv2.putText(
|
297 |
+
frame,
|
298 |
+
f" Distance: {match.distance:.2f}",
|
299 |
+
(
|
300 |
+
((detection.bbox[0][0] + shape[0] // 400) * upscale_factor[0]).astype(int),
|
301 |
+
((detection.bbox[0][1] - shape[1] // 350) * upscale_factor[1]).astype(int),
|
302 |
+
),
|
303 |
+
cv2.LINE_AA,
|
304 |
+
0.5,
|
305 |
+
(0, 0, 0),
|
306 |
+
2,
|
307 |
+
)
|
308 |
|
309 |
+
return frame
|
|
|
310 |
|
|
|
|
|
311 |
|
312 |
+
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
313 |
# Convert frame to numpy array
|
314 |
frame = frame.to_ndarray(format="rgb24")
|
315 |
|
316 |
+
# Run face detection
|
317 |
+
detections = detect_faces(frame)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
+
# Run face recognition
|
320 |
+
subjects = recognize_faces(frame, detections)
|
|
|
|
|
321 |
|
322 |
+
# Run face matching
|
323 |
+
matches = match_faces(subjects, gallery)
|
|
|
|
|
324 |
|
325 |
+
# Draw annotations
|
326 |
+
frame = draw_annotations(frame, detections, matches)
|
|
|
|
|
327 |
|
328 |
# Convert frame back to av.VideoFrame
|
329 |
frame = av.VideoFrame.from_ndarray(frame, format="rgb24")
|
330 |
|
331 |
+
return frame, matches
|
|
|
332 |
|
|
|
|
|
333 |
|
334 |
+
# Sidebar for face gallery
|
335 |
+
with st.sidebar:
|
336 |
+
st.markdown("# Face Gallery")
|
337 |
+
files = st.sidebar.file_uploader(
|
338 |
+
"Upload images to gallery",
|
339 |
+
type=["png", "jpg", "jpeg"],
|
340 |
+
accept_multiple_files=True,
|
341 |
+
label_visibility="collapsed",
|
342 |
+
)
|
343 |
|
344 |
+
# Init gallery
|
345 |
+
gallery = []
|
346 |
+
for file in files:
|
347 |
+
# Read file bytes
|
348 |
+
file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
|
349 |
|
350 |
+
# Decode image and convert from BGR to RGB
|
351 |
+
img = cv2.cvtColor(cv2.imdecode(file_bytes, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
|
352 |
|
353 |
+
# Detect faces
|
354 |
+
detections = detect_faces(img)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
|
356 |
+
if detections:
|
357 |
+
# recognize faces
|
358 |
+
subjects = recognize_faces(img, detections[:1])
|
359 |
+
|
360 |
+
# Add subjects to gallery
|
361 |
+
gallery.append(
|
362 |
+
Identity(
|
363 |
+
name=os.path.splitext(file.name)[0],
|
364 |
+
embedding=subjects[0].embedding,
|
365 |
+
face=subjects[0].face,
|
366 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
)
|
368 |
|
369 |
+
# Show gallery images
|
370 |
+
st.image(
|
371 |
+
image=[identity.face for identity in gallery],
|
372 |
+
caption=[identity.name for identity in gallery],
|
373 |
+
)
|
374 |
|
375 |
+
# Start streaming component
|
376 |
+
with ctx_container:
|
377 |
+
ctx = webrtc_streamer(
|
378 |
+
key="LiveFaceRecognition",
|
379 |
+
mode=WebRtcMode.SENDONLY,
|
380 |
+
rtc_configuration={"iceServers": ICE_SERVERS},
|
381 |
+
media_stream_constraints={"video": {"width": 1920}, "audio": False},
|
382 |
)
|
|
|
|
|
383 |
|
384 |
+
# Initialize frame grabber
|
385 |
+
grabber = Grabber(ctx.video_receiver)
|
386 |
|
|
|
387 |
if ctx.state.playing:
|
388 |
+
# Start frame grabber in background thread
|
389 |
+
grabber.thread.start()
|
390 |
+
timestamp = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
391 |
|
392 |
+
# Start main loop
|
393 |
+
while True:
|
394 |
+
frame = grabber.get_frame()
|
395 |
+
if frame is not None:
|
396 |
+
# Print frame timestamp to streamlit
|
397 |
+
st.write(f"Frame timestamp: {frame.time}")
|
398 |
+
|
399 |
+
# Run face detection and recognition
|
400 |
+
frame, matches = video_frame_callback(frame)
|
401 |
+
|
402 |
+
# Convert frame to numpy array
|
403 |
+
frame = frame.to_ndarray(format="rgb24")
|
404 |
+
|
405 |
+
# Show Stream
|
406 |
+
stream_container.image(frame, channels="RGB")
|
407 |
+
|
408 |
+
# Show Matches
|
409 |
+
if matches:
|
410 |
+
matches_container.image(
|
411 |
+
image=[match.subject_id.face for match in matches],
|
412 |
+
caption=[match.gallery_id.name for match in matches],
|
413 |
+
)
|
414 |
+
else:
|
415 |
+
matches_container.info("No matches found yet ...")
|
requirements.txt
CHANGED
@@ -1,13 +1,9 @@
|
|
1 |
streamlit
|
2 |
scikit-image
|
3 |
scikit-learn
|
4 |
-
mediapipe
|
5 |
opencv-python-headless
|
6 |
watchdog
|
7 |
streamlit-webrtc
|
8 |
-
matplotlib
|
9 |
-
streamlit-toggle-switch
|
10 |
-
tflite-runtime
|
11 |
twilio
|
12 |
-
|
13 |
-
|
|
|
1 |
streamlit
|
2 |
scikit-image
|
3 |
scikit-learn
|
|
|
4 |
opencv-python-headless
|
5 |
watchdog
|
6 |
streamlit-webrtc
|
|
|
|
|
|
|
7 |
twilio
|
8 |
+
onnxruntime
|
9 |
+
mediapipe
|
tools/__init__.py
DELETED
File without changes
|
tools/annotation.py
DELETED
@@ -1,107 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import cv2
|
3 |
-
|
4 |
-
|
5 |
-
class Annotation:
|
6 |
-
def __init__(self, draw_bbox=True, draw_landmarks=True, draw_name=True, upscale=True):
|
7 |
-
self.bbox = draw_bbox
|
8 |
-
self.landmarks = draw_landmarks
|
9 |
-
self.name = draw_name
|
10 |
-
self.upscale = upscale
|
11 |
-
|
12 |
-
def __call__(self, frame, detections, identities, matches, gallery):
|
13 |
-
shape = np.asarray(frame.shape[:2][::-1])
|
14 |
-
if self.upscale:
|
15 |
-
frame = cv2.resize(frame, (1920, 1080))
|
16 |
-
upscale_factor = np.asarray([1920 / shape[0], 1080 / shape[1]])
|
17 |
-
shape = np.asarray(frame.shape[:2][::-1])
|
18 |
-
else:
|
19 |
-
upscale_factor = np.asarray([1, 1])
|
20 |
-
|
21 |
-
frame.flags.writeable = True
|
22 |
-
|
23 |
-
for detection in detections:
|
24 |
-
# Draw Landmarks
|
25 |
-
if self.landmarks:
|
26 |
-
for landmark in detection.landmarks:
|
27 |
-
cv2.circle(
|
28 |
-
frame,
|
29 |
-
(landmark * upscale_factor).astype(int),
|
30 |
-
2,
|
31 |
-
(255, 255, 255),
|
32 |
-
-1,
|
33 |
-
)
|
34 |
-
|
35 |
-
# Draw Bounding Box
|
36 |
-
if self.bbox:
|
37 |
-
cv2.rectangle(
|
38 |
-
frame,
|
39 |
-
(detection.bbox[0] * upscale_factor).astype(int),
|
40 |
-
(detection.bbox[1] * upscale_factor).astype(int),
|
41 |
-
(255, 0, 0),
|
42 |
-
2,
|
43 |
-
)
|
44 |
-
|
45 |
-
# Draw Index
|
46 |
-
cv2.putText(
|
47 |
-
frame,
|
48 |
-
str(detection.idx),
|
49 |
-
(
|
50 |
-
((detection.bbox[1][0] + 2) * upscale_factor[0]).astype(int),
|
51 |
-
((detection.bbox[1][1] + 2) * upscale_factor[1]).astype(int),
|
52 |
-
),
|
53 |
-
cv2.LINE_AA,
|
54 |
-
0.5,
|
55 |
-
(0, 0, 0),
|
56 |
-
2,
|
57 |
-
)
|
58 |
-
|
59 |
-
# Draw Name
|
60 |
-
if self.name:
|
61 |
-
for match in matches:
|
62 |
-
try:
|
63 |
-
detection = detections[identities[match.identity_idx].detection_idx]
|
64 |
-
except:
|
65 |
-
print("Identity IDX: ", match.identity_idx)
|
66 |
-
print("Len(Detections): ", len(detections))
|
67 |
-
print("Len(Identites): ", len(identities))
|
68 |
-
print("Detection IDX: ", identities[match.identity_idx].detection_idx)
|
69 |
-
|
70 |
-
# print("Detections: ", detections)
|
71 |
-
|
72 |
-
cv2.rectangle(
|
73 |
-
frame,
|
74 |
-
(detection.bbox[0] * upscale_factor).astype(int),
|
75 |
-
(detection.bbox[1] * upscale_factor).astype(int),
|
76 |
-
(0, 255, 0),
|
77 |
-
2,
|
78 |
-
)
|
79 |
-
|
80 |
-
cv2.rectangle(
|
81 |
-
frame,
|
82 |
-
(
|
83 |
-
(detection.bbox[0][0] * upscale_factor[0]).astype(int),
|
84 |
-
(detection.bbox[0][1] * upscale_factor[1] - (shape[1] // 25)).astype(int),
|
85 |
-
),
|
86 |
-
(
|
87 |
-
(detection.bbox[1][0] * upscale_factor[0]).astype(int),
|
88 |
-
(detection.bbox[0][1] * upscale_factor[1]).astype(int),
|
89 |
-
),
|
90 |
-
(255, 255, 255),
|
91 |
-
-1,
|
92 |
-
)
|
93 |
-
|
94 |
-
cv2.putText(
|
95 |
-
frame,
|
96 |
-
gallery[match.gallery_idx].name,
|
97 |
-
(
|
98 |
-
((detection.bbox[0][0] + shape[0] // 400) * upscale_factor[0]).astype(int),
|
99 |
-
((detection.bbox[0][1] - shape[1] // 100) * upscale_factor[1]).astype(int),
|
100 |
-
),
|
101 |
-
cv2.LINE_AA,
|
102 |
-
0.5,
|
103 |
-
(0, 0, 0),
|
104 |
-
2,
|
105 |
-
)
|
106 |
-
|
107 |
-
return frame
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/face_detection.py
DELETED
@@ -1,481 +0,0 @@
|
|
1 |
-
import tflite_runtime.interpreter as tflite
|
2 |
-
import cv2
|
3 |
-
import numpy as np
|
4 |
-
from .utils import tflite_inference
|
5 |
-
from .nametypes import Detection
|
6 |
-
from .utils import get_file
|
7 |
-
|
8 |
-
|
9 |
-
BASE_URL = "https://github.com/Martlgap/FaceIDLight/releases/download/v.0.1/"
|
10 |
-
|
11 |
-
FILE_HASHES = {
|
12 |
-
"o_net": "768385d570300648b7b881acbd418146522b79b4771029bb2e684bdd8c764b9f",
|
13 |
-
"p_net": "530183192e24f7cc86b6706e1eb600482c4ed4306399ac939c472e3957bae15e",
|
14 |
-
"r_net": "5ec33b065eb2802bc4c2575d21feff1a56958d854785bc3e2907d3b7ace861a2",
|
15 |
-
}
|
16 |
-
|
17 |
-
|
18 |
-
class StageStatus:
|
19 |
-
"""
|
20 |
-
Keeps status between MTCNN stages
|
21 |
-
"""
|
22 |
-
|
23 |
-
def __init__(self, pad_result: tuple = None, width=0, height=0):
|
24 |
-
self.width = width
|
25 |
-
self.height = height
|
26 |
-
self.dy = self.edy = self.dx = self.edx = self.y = self.ey = self.x = self.ex = self.tmp_w = self.tmp_h = []
|
27 |
-
|
28 |
-
if pad_result is not None:
|
29 |
-
self.update(pad_result)
|
30 |
-
|
31 |
-
def update(self, pad_result: tuple):
|
32 |
-
s = self
|
33 |
-
s.dy, s.edy, s.dx, s.edx, s.y, s.ey, s.x, s.ex, s.tmp_w, s.tmp_h = pad_result
|
34 |
-
|
35 |
-
|
36 |
-
class FaceDetection:
|
37 |
-
"""
|
38 |
-
Allows to perform MTCNN Detection ->
|
39 |
-
a) Detection of faces (with the confidence probability)
|
40 |
-
b) Detection of keypoints (left eye, right eye, nose, mouth_left, mouth_right)
|
41 |
-
"""
|
42 |
-
|
43 |
-
def __init__(
|
44 |
-
self,
|
45 |
-
min_face_size: int = 40,
|
46 |
-
steps_threshold: list = None,
|
47 |
-
scale_factor: float = 0.7,
|
48 |
-
min_detections_conf: float = 0.9,
|
49 |
-
):
|
50 |
-
"""
|
51 |
-
Initializes the MTCNN.
|
52 |
-
:param min_face_size: minimum size of the face to detect
|
53 |
-
:param steps_threshold: step's thresholds values
|
54 |
-
:param scale_factor: scale factor
|
55 |
-
"""
|
56 |
-
if steps_threshold is None:
|
57 |
-
steps_threshold = [0.6, 0.7, 0.7] # original mtcnn values [0.6, 0.7, 0.7]
|
58 |
-
self._min_face_size = min_face_size
|
59 |
-
self._steps_threshold = steps_threshold
|
60 |
-
self._scale_factor = scale_factor
|
61 |
-
self.min_detections_conf = min_detections_conf
|
62 |
-
self.p_net = tflite.Interpreter(model_path=get_file(BASE_URL + "p_net.tflite", FILE_HASHES["p_net"]))
|
63 |
-
self.r_net = tflite.Interpreter(model_path=get_file(BASE_URL + "r_net.tflite", FILE_HASHES["r_net"]))
|
64 |
-
self.o_net = tflite.Interpreter(model_path=get_file(BASE_URL + "o_net.tflite", FILE_HASHES["o_net"]))
|
65 |
-
|
66 |
-
def __call__(self, frame):
|
67 |
-
"""
|
68 |
-
Detects bounding boxes from the specified image.
|
69 |
-
:param img: image to process
|
70 |
-
:return: list containing all the bounding boxes detected with their keypoints.
|
71 |
-
|
72 |
-
From MTCNN:
|
73 |
-
# Total boxes (bBoxes for faces)
|
74 |
-
# 1. dim -> Number of found Faces
|
75 |
-
# 2. dim -> x_min, y_min, x_max, y_max, score
|
76 |
-
|
77 |
-
# Points (Landmarks left eye, right eye, nose, left mouth, right mouth)
|
78 |
-
# 1. dim -> Number of found Faces
|
79 |
-
# 2. dim -> x1, x2, x3, x4, x5, y2, y2, y3, y4, y5 Coordinates
|
80 |
-
"""
|
81 |
-
|
82 |
-
height, width, _ = frame.shape
|
83 |
-
stage_status = StageStatus(width=width, height=height)
|
84 |
-
m = 12 / self._min_face_size
|
85 |
-
min_layer = np.amin([height, width]) * m
|
86 |
-
scales = self.__compute_scale_pyramid(m, min_layer)
|
87 |
-
|
88 |
-
# We pipe here each of the stages
|
89 |
-
total_boxes, stage_status = self.__stage1(frame, scales, stage_status)
|
90 |
-
total_boxes, stage_status = self.__stage2(frame, total_boxes, stage_status)
|
91 |
-
bboxes, points = self.__stage3(frame, total_boxes, stage_status)
|
92 |
-
|
93 |
-
# Sort by location (to prevent flickering)
|
94 |
-
sort_idx = np.argsort(bboxes[:, 0])
|
95 |
-
bboxes = bboxes[sort_idx]
|
96 |
-
points = points[sort_idx]
|
97 |
-
|
98 |
-
# Transform to better shape and points now inside bbox
|
99 |
-
detections = []
|
100 |
-
cnt = 0
|
101 |
-
for i in range(bboxes.shape[0]):
|
102 |
-
conf = bboxes[i, -1].astype(np.float32)
|
103 |
-
if conf > self.min_detections_conf:
|
104 |
-
bboxes_c = np.reshape(bboxes[i, :-1], [2, 2]).astype(np.float32)
|
105 |
-
points_c = np.reshape(points[i], [2, 5]).transpose().astype(np.float32)
|
106 |
-
detections.append(
|
107 |
-
Detection(
|
108 |
-
idx=cnt,
|
109 |
-
bbox=list(bboxes_c),
|
110 |
-
landmarks=list(points_c),
|
111 |
-
confidence=conf,
|
112 |
-
)
|
113 |
-
)
|
114 |
-
cnt += 1
|
115 |
-
return frame, detections
|
116 |
-
|
117 |
-
def __compute_scale_pyramid(self, m, min_layer):
|
118 |
-
scales = []
|
119 |
-
factor_count = 0
|
120 |
-
|
121 |
-
while min_layer >= 12:
|
122 |
-
scales += [m * np.power(self._scale_factor, factor_count)]
|
123 |
-
min_layer = min_layer * self._scale_factor
|
124 |
-
factor_count += 1
|
125 |
-
|
126 |
-
return scales
|
127 |
-
|
128 |
-
@staticmethod
|
129 |
-
def __scale_image(image, scale: float):
|
130 |
-
"""
|
131 |
-
Scales the image to a given scale.
|
132 |
-
:param image:
|
133 |
-
:param scale:
|
134 |
-
:return:
|
135 |
-
"""
|
136 |
-
height, width, _ = image.shape
|
137 |
-
|
138 |
-
width_scaled = int(np.ceil(width * scale))
|
139 |
-
height_scaled = int(np.ceil(height * scale))
|
140 |
-
|
141 |
-
im_data = cv2.resize(image, (width_scaled, height_scaled), interpolation=cv2.INTER_AREA)
|
142 |
-
|
143 |
-
# Normalize the image's pixels
|
144 |
-
im_data_normalized = (im_data - 127.5) * 0.0078125
|
145 |
-
|
146 |
-
return im_data_normalized
|
147 |
-
|
148 |
-
@staticmethod
|
149 |
-
def __generate_bounding_box(imap, reg, scale, t):
|
150 |
-
# use heatmap to generate bounding boxes
|
151 |
-
stride = 2
|
152 |
-
cellsize = 12
|
153 |
-
|
154 |
-
imap = np.transpose(imap)
|
155 |
-
dx1 = np.transpose(reg[:, :, 0])
|
156 |
-
dy1 = np.transpose(reg[:, :, 1])
|
157 |
-
dx2 = np.transpose(reg[:, :, 2])
|
158 |
-
dy2 = np.transpose(reg[:, :, 3])
|
159 |
-
|
160 |
-
y, x = np.where(imap >= t)
|
161 |
-
|
162 |
-
if y.shape[0] == 1:
|
163 |
-
dx1 = np.flipud(dx1)
|
164 |
-
dy1 = np.flipud(dy1)
|
165 |
-
dx2 = np.flipud(dx2)
|
166 |
-
dy2 = np.flipud(dy2)
|
167 |
-
|
168 |
-
score = imap[(y, x)]
|
169 |
-
reg = np.transpose(np.vstack([dx1[(y, x)], dy1[(y, x)], dx2[(y, x)], dy2[(y, x)]]))
|
170 |
-
|
171 |
-
if reg.size == 0:
|
172 |
-
reg = np.empty(shape=(0, 3))
|
173 |
-
|
174 |
-
bb = np.transpose(np.vstack([y, x]))
|
175 |
-
|
176 |
-
q1 = np.fix((stride * bb + 1) / scale)
|
177 |
-
q2 = np.fix((stride * bb + cellsize) / scale)
|
178 |
-
boundingbox = np.hstack([q1, q2, np.expand_dims(score, 1), reg])
|
179 |
-
|
180 |
-
return boundingbox, reg
|
181 |
-
|
182 |
-
@staticmethod
|
183 |
-
def __nms(boxes, threshold, method):
|
184 |
-
"""
|
185 |
-
Non Maximum Suppression.
|
186 |
-
|
187 |
-
:param boxes: np array with bounding boxes.
|
188 |
-
:param threshold:
|
189 |
-
:param method: NMS method to apply. Available values ('Min', 'Union')
|
190 |
-
:return:
|
191 |
-
"""
|
192 |
-
if boxes.size == 0:
|
193 |
-
return np.empty((0, 3))
|
194 |
-
|
195 |
-
x1 = boxes[:, 0]
|
196 |
-
y1 = boxes[:, 1]
|
197 |
-
x2 = boxes[:, 2]
|
198 |
-
y2 = boxes[:, 3]
|
199 |
-
s = boxes[:, 4]
|
200 |
-
|
201 |
-
area = (x2 - x1 + 1) * (y2 - y1 + 1)
|
202 |
-
sorted_s = np.argsort(s)
|
203 |
-
|
204 |
-
pick = np.zeros_like(s, dtype=np.int16)
|
205 |
-
counter = 0
|
206 |
-
while sorted_s.size > 0:
|
207 |
-
i = sorted_s[-1]
|
208 |
-
pick[counter] = i
|
209 |
-
counter += 1
|
210 |
-
idx = sorted_s[0:-1]
|
211 |
-
|
212 |
-
xx1 = np.maximum(x1[i], x1[idx])
|
213 |
-
yy1 = np.maximum(y1[i], y1[idx])
|
214 |
-
xx2 = np.minimum(x2[i], x2[idx])
|
215 |
-
yy2 = np.minimum(y2[i], y2[idx])
|
216 |
-
|
217 |
-
w = np.maximum(0.0, xx2 - xx1 + 1)
|
218 |
-
h = np.maximum(0.0, yy2 - yy1 + 1)
|
219 |
-
|
220 |
-
inter = w * h
|
221 |
-
|
222 |
-
if method == "Min":
|
223 |
-
o = inter / np.minimum(area[i], area[idx])
|
224 |
-
else:
|
225 |
-
o = inter / (area[i] + area[idx] - inter)
|
226 |
-
|
227 |
-
sorted_s = sorted_s[np.where(o <= threshold)]
|
228 |
-
|
229 |
-
pick = pick[0:counter]
|
230 |
-
|
231 |
-
return pick
|
232 |
-
|
233 |
-
@staticmethod
|
234 |
-
def __pad(total_boxes, w, h):
|
235 |
-
# compute the padding coordinates (pad the bounding boxes to square)
|
236 |
-
tmp_w = (total_boxes[:, 2] - total_boxes[:, 0] + 1).astype(np.int32)
|
237 |
-
tmp_h = (total_boxes[:, 3] - total_boxes[:, 1] + 1).astype(np.int32)
|
238 |
-
numbox = total_boxes.shape[0]
|
239 |
-
|
240 |
-
dx = np.ones(numbox, dtype=np.int32)
|
241 |
-
dy = np.ones(numbox, dtype=np.int32)
|
242 |
-
edx = tmp_w.copy().astype(np.int32)
|
243 |
-
edy = tmp_h.copy().astype(np.int32)
|
244 |
-
|
245 |
-
x = total_boxes[:, 0].copy().astype(np.int32)
|
246 |
-
y = total_boxes[:, 1].copy().astype(np.int32)
|
247 |
-
ex = total_boxes[:, 2].copy().astype(np.int32)
|
248 |
-
ey = total_boxes[:, 3].copy().astype(np.int32)
|
249 |
-
|
250 |
-
tmp = np.where(ex > w)
|
251 |
-
edx.flat[tmp] = np.expand_dims(-ex[tmp] + w + tmp_w[tmp], 1)
|
252 |
-
ex[tmp] = w
|
253 |
-
|
254 |
-
tmp = np.where(ey > h)
|
255 |
-
edy.flat[tmp] = np.expand_dims(-ey[tmp] + h + tmp_h[tmp], 1)
|
256 |
-
ey[tmp] = h
|
257 |
-
|
258 |
-
tmp = np.where(x < 1)
|
259 |
-
dx.flat[tmp] = np.expand_dims(2 - x[tmp], 1)
|
260 |
-
x[tmp] = 1
|
261 |
-
|
262 |
-
tmp = np.where(y < 1)
|
263 |
-
dy.flat[tmp] = np.expand_dims(2 - y[tmp], 1)
|
264 |
-
y[tmp] = 1
|
265 |
-
|
266 |
-
return dy, edy, dx, edx, y, ey, x, ex, tmp_w, tmp_h
|
267 |
-
|
268 |
-
@staticmethod
|
269 |
-
def __rerec(bbox):
|
270 |
-
# convert bbox to square
|
271 |
-
height = bbox[:, 3] - bbox[:, 1]
|
272 |
-
width = bbox[:, 2] - bbox[:, 0]
|
273 |
-
max_side_length = np.maximum(width, height)
|
274 |
-
bbox[:, 0] = bbox[:, 0] + width * 0.5 - max_side_length * 0.5
|
275 |
-
bbox[:, 1] = bbox[:, 1] + height * 0.5 - max_side_length * 0.5
|
276 |
-
bbox[:, 2:4] = bbox[:, 0:2] + np.transpose(np.tile(max_side_length, (2, 1)))
|
277 |
-
return bbox
|
278 |
-
|
279 |
-
@staticmethod
|
280 |
-
def __bbreg(boundingbox, reg):
|
281 |
-
# calibrate bounding boxes
|
282 |
-
if reg.shape[1] == 1:
|
283 |
-
reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
|
284 |
-
|
285 |
-
w = boundingbox[:, 2] - boundingbox[:, 0] + 1
|
286 |
-
h = boundingbox[:, 3] - boundingbox[:, 1] + 1
|
287 |
-
b1 = boundingbox[:, 0] + reg[:, 0] * w
|
288 |
-
b2 = boundingbox[:, 1] + reg[:, 1] * h
|
289 |
-
b3 = boundingbox[:, 2] + reg[:, 2] * w
|
290 |
-
b4 = boundingbox[:, 3] + reg[:, 3] * h
|
291 |
-
boundingbox[:, 0:4] = np.transpose(np.vstack([b1, b2, b3, b4]))
|
292 |
-
return boundingbox
|
293 |
-
|
294 |
-
def __stage1(self, image, scales: list, stage_status: StageStatus):
|
295 |
-
"""
|
296 |
-
First stage of the MTCNN.
|
297 |
-
:param image:
|
298 |
-
:param scales:
|
299 |
-
:param stage_status:
|
300 |
-
:return:
|
301 |
-
"""
|
302 |
-
total_boxes = np.empty((0, 9))
|
303 |
-
status = stage_status
|
304 |
-
|
305 |
-
for scale in scales:
|
306 |
-
scaled_image = self.__scale_image(image, scale)
|
307 |
-
|
308 |
-
img_x = np.expand_dims(scaled_image, 0)
|
309 |
-
img_y = np.transpose(img_x, (0, 2, 1, 3))
|
310 |
-
|
311 |
-
out = tflite_inference(self.p_net, img_y)
|
312 |
-
|
313 |
-
out0 = np.transpose(out[0], (0, 2, 1, 3))
|
314 |
-
out1 = np.transpose(out[1], (0, 2, 1, 3))
|
315 |
-
|
316 |
-
boxes, _ = self.__generate_bounding_box(
|
317 |
-
out1[0, :, :, 1].copy(),
|
318 |
-
out0[0, :, :, :].copy(),
|
319 |
-
scale,
|
320 |
-
self._steps_threshold[0],
|
321 |
-
)
|
322 |
-
|
323 |
-
# inter-scale nms
|
324 |
-
pick = self.__nms(boxes.copy(), 0.5, "Union")
|
325 |
-
if boxes.size > 0 and pick.size > 0:
|
326 |
-
boxes = boxes[pick, :]
|
327 |
-
total_boxes = np.append(total_boxes, boxes, axis=0)
|
328 |
-
|
329 |
-
numboxes = total_boxes.shape[0]
|
330 |
-
|
331 |
-
if numboxes > 0:
|
332 |
-
pick = self.__nms(total_boxes.copy(), 0.7, "Union")
|
333 |
-
total_boxes = total_boxes[pick, :]
|
334 |
-
|
335 |
-
regw = total_boxes[:, 2] - total_boxes[:, 0]
|
336 |
-
regh = total_boxes[:, 3] - total_boxes[:, 1]
|
337 |
-
|
338 |
-
qq1 = total_boxes[:, 0] + total_boxes[:, 5] * regw
|
339 |
-
qq2 = total_boxes[:, 1] + total_boxes[:, 6] * regh
|
340 |
-
qq3 = total_boxes[:, 2] + total_boxes[:, 7] * regw
|
341 |
-
qq4 = total_boxes[:, 3] + total_boxes[:, 8] * regh
|
342 |
-
|
343 |
-
total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:, 4]]))
|
344 |
-
total_boxes = self.__rerec(total_boxes.copy())
|
345 |
-
|
346 |
-
total_boxes[:, 0:4] = np.fix(total_boxes[:, 0:4]).astype(np.int32)
|
347 |
-
status = StageStatus(
|
348 |
-
self.__pad(total_boxes.copy(), stage_status.width, stage_status.height),
|
349 |
-
width=stage_status.width,
|
350 |
-
height=stage_status.height,
|
351 |
-
)
|
352 |
-
|
353 |
-
return total_boxes, status
|
354 |
-
|
355 |
-
def __stage2(self, img, total_boxes, stage_status: StageStatus):
|
356 |
-
"""
|
357 |
-
Second stage of the MTCNN.
|
358 |
-
:param img:
|
359 |
-
:param total_boxes:
|
360 |
-
:param stage_status:
|
361 |
-
:return:
|
362 |
-
"""
|
363 |
-
|
364 |
-
num_boxes = total_boxes.shape[0]
|
365 |
-
if num_boxes == 0:
|
366 |
-
return total_boxes, stage_status
|
367 |
-
|
368 |
-
# second stage
|
369 |
-
tempimg = np.zeros(shape=(24, 24, 3, num_boxes))
|
370 |
-
|
371 |
-
for k in range(0, num_boxes):
|
372 |
-
tmp = np.zeros((int(stage_status.tmp_h[k]), int(stage_status.tmp_w[k]), 3))
|
373 |
-
|
374 |
-
tmp[
|
375 |
-
stage_status.dy[k] - 1 : stage_status.edy[k],
|
376 |
-
stage_status.dx[k] - 1 : stage_status.edx[k],
|
377 |
-
:,
|
378 |
-
] = img[
|
379 |
-
stage_status.y[k] - 1 : stage_status.ey[k],
|
380 |
-
stage_status.x[k] - 1 : stage_status.ex[k],
|
381 |
-
:,
|
382 |
-
]
|
383 |
-
|
384 |
-
if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
|
385 |
-
tempimg[:, :, :, k] = cv2.resize(tmp, (24, 24), interpolation=cv2.INTER_AREA)
|
386 |
-
|
387 |
-
else:
|
388 |
-
return np.empty(shape=(0,)), stage_status
|
389 |
-
|
390 |
-
tempimg = (tempimg - 127.5) * 0.0078125
|
391 |
-
tempimg1 = np.transpose(tempimg, (3, 1, 0, 2))
|
392 |
-
|
393 |
-
out = tflite_inference(self.r_net, tempimg1)
|
394 |
-
|
395 |
-
out0 = np.transpose(out[0])
|
396 |
-
out1 = np.transpose(out[1])
|
397 |
-
|
398 |
-
score = out1[1, :]
|
399 |
-
|
400 |
-
ipass = np.where(score > self._steps_threshold[1])
|
401 |
-
|
402 |
-
total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)])
|
403 |
-
|
404 |
-
mv = out0[:, ipass[0]]
|
405 |
-
|
406 |
-
if total_boxes.shape[0] > 0:
|
407 |
-
pick = self.__nms(total_boxes, 0.7, "Union")
|
408 |
-
total_boxes = total_boxes[pick, :]
|
409 |
-
total_boxes = self.__bbreg(total_boxes.copy(), np.transpose(mv[:, pick]))
|
410 |
-
total_boxes = self.__rerec(total_boxes.copy())
|
411 |
-
|
412 |
-
return total_boxes, stage_status
|
413 |
-
|
414 |
-
def __stage3(self, img, total_boxes, stage_status: StageStatus):
|
415 |
-
"""
|
416 |
-
Third stage of the MTCNN.
|
417 |
-
|
418 |
-
:param img:
|
419 |
-
:param total_boxes:
|
420 |
-
:param stage_status:
|
421 |
-
:return:
|
422 |
-
"""
|
423 |
-
num_boxes = total_boxes.shape[0]
|
424 |
-
if num_boxes == 0:
|
425 |
-
return total_boxes, np.empty(shape=(0,))
|
426 |
-
|
427 |
-
total_boxes = np.fix(total_boxes).astype(np.int32)
|
428 |
-
|
429 |
-
status = StageStatus(
|
430 |
-
self.__pad(total_boxes.copy(), stage_status.width, stage_status.height),
|
431 |
-
width=stage_status.width,
|
432 |
-
height=stage_status.height,
|
433 |
-
)
|
434 |
-
|
435 |
-
tempimg = np.zeros((48, 48, 3, num_boxes))
|
436 |
-
|
437 |
-
for k in range(0, num_boxes):
|
438 |
-
tmp = np.zeros((int(status.tmp_h[k]), int(status.tmp_w[k]), 3))
|
439 |
-
|
440 |
-
tmp[status.dy[k] - 1 : status.edy[k], status.dx[k] - 1 : status.edx[k], :] = img[
|
441 |
-
status.y[k] - 1 : status.ey[k], status.x[k] - 1 : status.ex[k], :
|
442 |
-
]
|
443 |
-
|
444 |
-
if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
|
445 |
-
tempimg[:, :, :, k] = cv2.resize(tmp, (48, 48), interpolation=cv2.INTER_AREA)
|
446 |
-
else:
|
447 |
-
return np.empty(shape=(0,)), np.empty(shape=(0,))
|
448 |
-
|
449 |
-
tempimg = (tempimg - 127.5) * 0.0078125
|
450 |
-
tempimg1 = np.transpose(tempimg, (3, 1, 0, 2))
|
451 |
-
|
452 |
-
out = tflite_inference(self.o_net, tempimg1)
|
453 |
-
out0 = np.transpose(out[0])
|
454 |
-
out1 = np.transpose(out[1])
|
455 |
-
out2 = np.transpose(out[2])
|
456 |
-
|
457 |
-
score = out2[1, :]
|
458 |
-
|
459 |
-
points = out1
|
460 |
-
|
461 |
-
ipass = np.where(score > self._steps_threshold[2])
|
462 |
-
|
463 |
-
points = points[:, ipass[0]]
|
464 |
-
|
465 |
-
total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)])
|
466 |
-
|
467 |
-
mv = out0[:, ipass[0]]
|
468 |
-
|
469 |
-
w = total_boxes[:, 2] - total_boxes[:, 0] + 1
|
470 |
-
h = total_boxes[:, 3] - total_boxes[:, 1] + 1
|
471 |
-
|
472 |
-
points[0:5, :] = np.tile(w, (5, 1)) * points[0:5, :] + np.tile(total_boxes[:, 0], (5, 1)) - 1
|
473 |
-
points[5:10, :] = np.tile(h, (5, 1)) * points[5:10, :] + np.tile(total_boxes[:, 1], (5, 1)) - 1
|
474 |
-
|
475 |
-
if total_boxes.shape[0] > 0:
|
476 |
-
total_boxes = self.__bbreg(total_boxes.copy(), np.transpose(mv))
|
477 |
-
pick = self.__nms(total_boxes.copy(), 0.7, "Min")
|
478 |
-
total_boxes = total_boxes[pick, :]
|
479 |
-
points = points[:, pick]
|
480 |
-
|
481 |
-
return total_boxes, points.transpose()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/face_recognition.py
DELETED
@@ -1,114 +0,0 @@
|
|
1 |
-
from .utils import tflite_inference
|
2 |
-
from .nametypes import Identity, Match
|
3 |
-
from sklearn.metrics.pairwise import cosine_distances
|
4 |
-
import numpy as np
|
5 |
-
import cv2
|
6 |
-
from skimage.transform import SimilarityTransform
|
7 |
-
from .utils import get_file
|
8 |
-
import tflite_runtime.interpreter as tflite
|
9 |
-
from typing import Literal
|
10 |
-
|
11 |
-
|
12 |
-
BASE_URL = "https://github.com/Martlgap/FaceIDLight/releases/download/v.0.1/"
|
13 |
-
|
14 |
-
FILE_HASHES = {
|
15 |
-
"mobileNet": "6c19b789f661caa8da735566490bfd8895beffb2a1ec97a56b126f0539991aa6",
|
16 |
-
"resNet": "f4d8b0194957a3ad766135505fc70a91343660151a8103bbb6c3b8ac34dbb4e2",
|
17 |
-
}
|
18 |
-
|
19 |
-
|
20 |
-
class FaceRecognition:
|
21 |
-
def __init__(
|
22 |
-
self,
|
23 |
-
min_similarity: float = 0.67,
|
24 |
-
model_name: Literal["mobileNet", "resNet50"] = "mobileNet",
|
25 |
-
):
|
26 |
-
self.min_similarity = min_similarity
|
27 |
-
self.model = tflite.Interpreter(model_path=get_file(BASE_URL + f"{model_name}.tflite", FILE_HASHES[model_name]))
|
28 |
-
|
29 |
-
def __call__(self, frame, detections):
|
30 |
-
# Align Faces
|
31 |
-
faces, faces_aligned = [], []
|
32 |
-
for detection in detections:
|
33 |
-
face = frame[
|
34 |
-
int(detection.bbox[0][1]) : int(detection.bbox[1][1]),
|
35 |
-
int(detection.bbox[0][0]) : int(detection.bbox[1][0]),
|
36 |
-
]
|
37 |
-
try:
|
38 |
-
face = cv2.resize(face, (112, 112))
|
39 |
-
except:
|
40 |
-
face = np.zeros((112, 112, 3))
|
41 |
-
|
42 |
-
faces.append(face)
|
43 |
-
faces_aligned.append(self.align(frame, detection.landmarks))
|
44 |
-
|
45 |
-
# Do Inference
|
46 |
-
if len(faces_aligned) == 0:
|
47 |
-
return []
|
48 |
-
|
49 |
-
# Normalize images from [0, 255] to [0, 1]
|
50 |
-
faces_aligned_norm = np.asarray(faces_aligned).astype(np.float32) / 255.0
|
51 |
-
|
52 |
-
embs_det = tflite_inference(self.model, faces_aligned_norm)
|
53 |
-
embs_det = np.asarray(embs_det[0])
|
54 |
-
|
55 |
-
# Save Identities
|
56 |
-
identities = []
|
57 |
-
for idx, detection in enumerate(detections):
|
58 |
-
identities.append(
|
59 |
-
Identity(
|
60 |
-
detection_idx=detection.idx,
|
61 |
-
embedding=embs_det[idx],
|
62 |
-
face_aligned=faces_aligned[idx],
|
63 |
-
)
|
64 |
-
)
|
65 |
-
return identities
|
66 |
-
|
67 |
-
def find_matches(self, identities, gallery):
|
68 |
-
if len(gallery) == 0 or len(identities) == 0:
|
69 |
-
return []
|
70 |
-
|
71 |
-
# Get Embeddings
|
72 |
-
embs_gal = np.asarray([identity.embedding for identity in gallery])
|
73 |
-
embs_det = np.asarray([identity.embedding for identity in identities])
|
74 |
-
|
75 |
-
# Calculate Cosine Distances
|
76 |
-
cos_distances = cosine_distances(embs_det, embs_gal)
|
77 |
-
|
78 |
-
# Find Matches
|
79 |
-
matches = []
|
80 |
-
for ident_idx, identity in enumerate(identities):
|
81 |
-
dist_to_identity = cos_distances[ident_idx]
|
82 |
-
idx_min = np.argmin(dist_to_identity)
|
83 |
-
if dist_to_identity[idx_min] < self.min_similarity:
|
84 |
-
matches.append(
|
85 |
-
Match(
|
86 |
-
identity_idx=identity.detection_idx,
|
87 |
-
gallery_idx=idx_min,
|
88 |
-
distance=dist_to_identity[idx_min],
|
89 |
-
name=gallery[idx_min].name,
|
90 |
-
)
|
91 |
-
)
|
92 |
-
|
93 |
-
# Sort Matches by identity_idx
|
94 |
-
matches = sorted(matches, key=lambda match: match.gallery_idx)
|
95 |
-
|
96 |
-
return matches
|
97 |
-
|
98 |
-
@staticmethod
|
99 |
-
def align(img, landmarks_source, target_size=(112, 112)):
|
100 |
-
landmarks_target = np.array(
|
101 |
-
[
|
102 |
-
[38.2946, 51.6963],
|
103 |
-
[73.5318, 51.5014],
|
104 |
-
[56.0252, 71.7366],
|
105 |
-
[41.5493, 92.3655],
|
106 |
-
[70.7299, 92.2041],
|
107 |
-
],
|
108 |
-
dtype=np.float32,
|
109 |
-
)
|
110 |
-
tform = SimilarityTransform()
|
111 |
-
tform.estimate(landmarks_source, landmarks_target)
|
112 |
-
tmatrix = tform.params[0:2, :]
|
113 |
-
face_aligned = cv2.warpAffine(img, tmatrix, target_size, borderValue=0.0)
|
114 |
-
return face_aligned
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/gallery.py
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
from .face_detection import FaceDetection
|
2 |
-
from .face_recognition import FaceRecognition
|
3 |
-
from .nametypes import Identity
|
4 |
-
import cv2
|
5 |
-
import os
|
6 |
-
import numpy as np
|
7 |
-
|
8 |
-
|
9 |
-
def init_gallery(files, min_detections_conf=0.8, min_similarity=0.67, model_name="mobileNet"):
|
10 |
-
face_detector = FaceDetection(min_detections_conf=min_detections_conf)
|
11 |
-
face_recognizer = FaceRecognition(model_name=model_name, min_similarity=min_similarity)
|
12 |
-
|
13 |
-
gallery = []
|
14 |
-
for file in files:
|
15 |
-
file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
|
16 |
-
img = cv2.cvtColor(cv2.imdecode(file_bytes, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
|
17 |
-
# Face Detection
|
18 |
-
img, detections = face_detector(img)
|
19 |
-
|
20 |
-
if detections == []:
|
21 |
-
continue
|
22 |
-
elif len(detections) > 1:
|
23 |
-
detections = detections[:1]
|
24 |
-
|
25 |
-
# Face Recognition
|
26 |
-
identities = face_recognizer(img, detections)
|
27 |
-
|
28 |
-
# Add to gallery
|
29 |
-
gallery.append(
|
30 |
-
Identity(
|
31 |
-
name=os.path.splitext(file.name)[0],
|
32 |
-
embedding=identities[0].embedding,
|
33 |
-
face_aligned=identities[0].face_aligned,
|
34 |
-
)
|
35 |
-
)
|
36 |
-
|
37 |
-
return gallery
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/nametypes.py
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
from typing import NamedTuple, List
|
2 |
-
import numpy as np
|
3 |
-
|
4 |
-
|
5 |
-
class Detection(NamedTuple):
|
6 |
-
idx: int = None
|
7 |
-
bbox: List[List[float]] = None
|
8 |
-
landmarks: List[List[float]] = None
|
9 |
-
confidence: float = None
|
10 |
-
|
11 |
-
|
12 |
-
class Identity(NamedTuple):
|
13 |
-
detection_idx: int = None
|
14 |
-
name: str = None
|
15 |
-
embedding: np.ndarray = None
|
16 |
-
face_aligned: np.ndarray = None
|
17 |
-
|
18 |
-
|
19 |
-
class Stats(NamedTuple):
|
20 |
-
fps: float = 0
|
21 |
-
resolution: List[int] = [None, None, None]
|
22 |
-
num_faces: int = 0
|
23 |
-
detection: float = None
|
24 |
-
recognition: float = None
|
25 |
-
matching: float = None
|
26 |
-
annotation: float = None
|
27 |
-
|
28 |
-
|
29 |
-
class Match(NamedTuple):
|
30 |
-
identity_idx: int = None
|
31 |
-
gallery_idx: int = None
|
32 |
-
distance: float = None
|
33 |
-
name: str = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/pca.py
DELETED
@@ -1,59 +0,0 @@
|
|
1 |
-
from sklearn.decomposition import PCA
|
2 |
-
import numpy as np
|
3 |
-
import plotly.express as px
|
4 |
-
|
5 |
-
|
6 |
-
def pca(matches, identities, gallery, dim=3):
|
7 |
-
"""
|
8 |
-
Perform PCA on embeddings.
|
9 |
-
Args:
|
10 |
-
embeddings: np.array of shape (n_embeddings, 512)
|
11 |
-
Returns:
|
12 |
-
embeddings_pca: np.array of shape (n_embeddings, 3)
|
13 |
-
"""
|
14 |
-
|
15 |
-
# Get Gallery and Detection Embeddings and stich them together in groups
|
16 |
-
embeddings = np.concatenate(
|
17 |
-
[[gallery[match.gallery_idx].embedding, identities[match.identity_idx].embedding] for match in matches],
|
18 |
-
axis=0,
|
19 |
-
)
|
20 |
-
|
21 |
-
# Get Identity Names and stich them together in groups
|
22 |
-
identity_names = np.concatenate(
|
23 |
-
[[gallery[match.gallery_idx].name, gallery[match.gallery_idx].name] for match in matches],
|
24 |
-
axis=0,
|
25 |
-
)
|
26 |
-
|
27 |
-
# Do 3D PCA
|
28 |
-
pca = PCA(n_components=dim)
|
29 |
-
pca.fit(embeddings)
|
30 |
-
embeddings_pca = pca.transform(embeddings)
|
31 |
-
|
32 |
-
if dim == 3:
|
33 |
-
fig = px.scatter_3d(
|
34 |
-
embeddings_pca,
|
35 |
-
x=0,
|
36 |
-
y=1,
|
37 |
-
z=2,
|
38 |
-
opacity=0.7,
|
39 |
-
color=identity_names,
|
40 |
-
color_discrete_sequence=px.colors.qualitative.Vivid,
|
41 |
-
)
|
42 |
-
fig.update_traces(marker=dict(size=4))
|
43 |
-
elif dim == 2:
|
44 |
-
fig = px.scatter(
|
45 |
-
embeddings_pca,
|
46 |
-
x=0,
|
47 |
-
y=1,
|
48 |
-
opacity=0.7,
|
49 |
-
color=identity_names,
|
50 |
-
color_discrete_sequence=px.colors.qualitative.Vivid,
|
51 |
-
)
|
52 |
-
fig.update_traces(marker=dict(size=4))
|
53 |
-
fig.update_xaxes(showgrid=True)
|
54 |
-
fig.update_yaxes(showgrid=True)
|
55 |
-
else:
|
56 |
-
raise ValueError("dim must be either 2 or 3")
|
57 |
-
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
|
58 |
-
|
59 |
-
return fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/utils.py
DELETED
@@ -1,164 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
import os
|
3 |
-
import streamlit as st
|
4 |
-
from twilio.rest import Client
|
5 |
-
import os
|
6 |
-
import numpy as np
|
7 |
-
import hashlib
|
8 |
-
import tempfile
|
9 |
-
import os
|
10 |
-
import hashlib
|
11 |
-
from tqdm import tqdm
|
12 |
-
from zipfile import ZipFile
|
13 |
-
from urllib.request import urlopen
|
14 |
-
|
15 |
-
|
16 |
-
logger = logging.getLogger(__name__)
|
17 |
-
|
18 |
-
|
19 |
-
@st.cache_data
|
20 |
-
def get_ice_servers(name="twilio"):
|
21 |
-
"""Get ICE servers from Twilio.
|
22 |
-
Returns:
|
23 |
-
List of ICE servers.
|
24 |
-
"""
|
25 |
-
if name == "twilio":
|
26 |
-
# Ref: https://www.twilio.com/docs/stun-turn/api
|
27 |
-
try:
|
28 |
-
account_sid = os.environ["TWILIO_ACCOUNT_SID"]
|
29 |
-
auth_token = os.environ["TWILIO_AUTH_TOKEN"]
|
30 |
-
except KeyError:
|
31 |
-
logger.warning("Twilio credentials are not set. Fallback to a free STUN server from Google.")
|
32 |
-
return [{"urls": ["stun:stun.l.google.com:19302"]}]
|
33 |
-
|
34 |
-
client = Client(account_sid, auth_token)
|
35 |
-
|
36 |
-
token = client.tokens.create()
|
37 |
-
|
38 |
-
return token.ice_servers
|
39 |
-
|
40 |
-
elif name == "metered":
|
41 |
-
try:
|
42 |
-
username = os.environ["METERED_USERNAME"]
|
43 |
-
credential = os.environ["METERED_CREDENTIAL"]
|
44 |
-
except KeyError:
|
45 |
-
logger.warning("Metered credentials are not set. Fallback to a free STUN server from Google.")
|
46 |
-
return [{"urls": ["stun:stun.l.google.com:19302"]}]
|
47 |
-
|
48 |
-
ice_servers = [
|
49 |
-
{"url": "stun:a.relay.metered.ca:80", "urls": "stun:a.relay.metered.ca:80"},
|
50 |
-
{
|
51 |
-
"url": "turn:a.relay.metered.ca:80",
|
52 |
-
"username": username,
|
53 |
-
"urls": "turn:a.relay.metered.ca:80",
|
54 |
-
"credential": credential,
|
55 |
-
},
|
56 |
-
{
|
57 |
-
"url": "turn:a.relay.metered.ca:80?transport=tcp",
|
58 |
-
"username": username,
|
59 |
-
"urls": "turn:a.relay.metered.ca:80?transport=tcp",
|
60 |
-
"credential": credential,
|
61 |
-
},
|
62 |
-
{
|
63 |
-
"url": "turn:a.relay.metered.ca:443",
|
64 |
-
"username": username,
|
65 |
-
"urls": "turn:a.relay.metered.ca:443",
|
66 |
-
"credential": credential,
|
67 |
-
},
|
68 |
-
{
|
69 |
-
"url": "turn:a.relay.metered.ca:443?transport=tcp",
|
70 |
-
"username": username,
|
71 |
-
"urls": "turn:a.relay.metered.ca:443?transport=tcp",
|
72 |
-
"credential": credential,
|
73 |
-
},
|
74 |
-
]
|
75 |
-
return ice_servers
|
76 |
-
else:
|
77 |
-
raise ValueError(f"Unknown name: {name}")
|
78 |
-
|
79 |
-
|
80 |
-
# Function to format floats within a list
|
81 |
-
def format_dflist(val):
|
82 |
-
if isinstance(val, list):
|
83 |
-
return [format_dflist(num) for num in val]
|
84 |
-
if isinstance(val, np.ndarray):
|
85 |
-
return np.asarray([format_dflist(num) for num in val])
|
86 |
-
if isinstance(val, np.float32):
|
87 |
-
return f"{val:.2f}"
|
88 |
-
if isinstance(val, float):
|
89 |
-
return f"{val:.2f}"
|
90 |
-
else:
|
91 |
-
return val
|
92 |
-
|
93 |
-
|
94 |
-
def rgb(r, g, b):
|
95 |
-
return "#{:02x}{:02x}{:02x}".format(r, g, b)
|
96 |
-
|
97 |
-
|
98 |
-
def tflite_inference(model, img):
|
99 |
-
"""Inferences an image through the model with tflite interpreter on CPU
|
100 |
-
:param model: a tflite.Interpreter loaded with a model
|
101 |
-
:param img: image
|
102 |
-
:return: list of outputs of the model
|
103 |
-
"""
|
104 |
-
# Check if img is np.ndarray
|
105 |
-
if not isinstance(img, np.ndarray):
|
106 |
-
img = np.asarray(img)
|
107 |
-
|
108 |
-
# Check if dim is 4
|
109 |
-
if len(img.shape) == 3:
|
110 |
-
img = np.expand_dims(img, axis=0)
|
111 |
-
|
112 |
-
input_details = model.get_input_details()
|
113 |
-
output_details = model.get_output_details()
|
114 |
-
model.resize_tensor_input(input_details[0]["index"], img.shape)
|
115 |
-
model.allocate_tensors()
|
116 |
-
model.set_tensor(input_details[0]["index"], img.astype(np.float32))
|
117 |
-
model.invoke()
|
118 |
-
return [model.get_tensor(elem["index"]) for elem in output_details]
|
119 |
-
|
120 |
-
|
121 |
-
def get_file(origin, file_hash, is_zip=False):
|
122 |
-
tmp_file = os.path.join(tempfile.gettempdir(), "FaceIDLight", origin.split("/")[-1])
|
123 |
-
os.makedirs(os.path.dirname(tmp_file), exist_ok=True)
|
124 |
-
if not os.path.exists(tmp_file):
|
125 |
-
download = True
|
126 |
-
else:
|
127 |
-
hasher = hashlib.sha256()
|
128 |
-
with open(tmp_file, "rb") as file:
|
129 |
-
for chunk in iter(lambda: file.read(65535), b""):
|
130 |
-
hasher.update(chunk)
|
131 |
-
if not hasher.hexdigest() == file_hash:
|
132 |
-
print(
|
133 |
-
"A local file was found, but it seems to be incomplete or outdated because the file hash does not "
|
134 |
-
"match the original value of " + file_hash + " so data will be downloaded."
|
135 |
-
)
|
136 |
-
download = True
|
137 |
-
else:
|
138 |
-
download = False
|
139 |
-
|
140 |
-
if download:
|
141 |
-
response = urlopen(origin)
|
142 |
-
with tqdm.wrapattr(
|
143 |
-
open(tmp_file, "wb"),
|
144 |
-
"write",
|
145 |
-
miniters=1,
|
146 |
-
desc="Downloading " + origin.split("/")[-1] + " to: " + tmp_file,
|
147 |
-
total=getattr(response, "length", None),
|
148 |
-
) as file:
|
149 |
-
for chunk in response:
|
150 |
-
file.write(chunk)
|
151 |
-
file.close()
|
152 |
-
if is_zip:
|
153 |
-
with ZipFile(tmp_file, "r") as zipObj:
|
154 |
-
zipObj.extractall(tmp_file.split(".")[0])
|
155 |
-
tmp_file = os.path.join(tmp_file.split(".")[0])
|
156 |
-
return tmp_file
|
157 |
-
|
158 |
-
|
159 |
-
def get_hash(filepath):
|
160 |
-
hasher = hashlib.sha256()
|
161 |
-
with open(filepath, "rb") as file:
|
162 |
-
for chunk in iter(lambda: file.read(65535), b""):
|
163 |
-
hasher.update(chunk)
|
164 |
-
return hasher.hexdigest()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|