federico commited on
Commit
9d11120
β€’
1 Parent(s): 5beb0bf

Starting commint, requirements missing

Browse files
ai/detection.py ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils.my_utils import rescale_bb, rescale_key_points, delete_items_from_array_aux, enlarge_bb
2
+ from utils.labels import coco_category_index, face_category_index
3
+ import time
4
+ import numpy as np
5
+
6
+
7
+ def detect(model, image, min_score_thresh, new_old_shape):
8
+ """
9
+ Detect objects in the image running the model
10
+
11
+ Args:
12
+ :model (tensorflow.python.saved_model): The Tensorflow object detection model
13
+ :image (numpy.ndarray): The image that is given as input to the object detection model
14
+ :min_score_threshold (float): The minimum score for the detections (detections with a score lower than this value will be discarded)
15
+ :new_old_shape (tuple): The first element represents the right padding (applied by resize_preserving_ar() function);
16
+ the second element represents the bottom padding (applied by resize_preserving_ar() function) and
17
+ the third element is a tuple that is the shape of the image after resizing without the padding (this is useful for
18
+ the coordinates changes that we have to do)
19
+
20
+ Returns:
21
+ :detections (dict): dictionary with detection scores, classes, centroids and bounding box coordinates ordered by score in descending order
22
+ :inference_time (float): inference time for one image expressed in seconds
23
+ """
24
+ image = np.array(image).astype(np.uint8)
25
+ input_tensor = np.expand_dims(image, axis=0)
26
+
27
+ start_time = time.time()
28
+ det = model(input_tensor)
29
+ end_time = time.time()
30
+
31
+ detections = filter_detections(det, min_score_thresh, image.shape, new_old_shape)
32
+ inference_time = end_time - start_time
33
+ return detections, inference_time
34
+
35
+
36
+ def filter_detections(detections, min_score_thresh, shape, new_old_shape=None):
37
+ """
38
+ Filter the detections based on a minimum threshold value and modify the bounding box coordinates if the image was resized for the detection
39
+
40
+ Args:
41
+ :detections (dict): The dictionary that outputs the model
42
+ :min_score_thresh (float): The minimum score for the detections (detections with a score lower than this value will be discarded)
43
+ :shape (tuple): The shape of the image
44
+ :new_old_shape (tuple): The first element represents the right padding (applied by resize_preserving_ar() function);
45
+ the second element represents the bottom padding (applied by resize_preserving_ar() function) and
46
+ the third element is a tuple that is the shape of the image after resizing without the padding (this is useful for
47
+ the coordinates changes that we have to do)
48
+ (default is None)
49
+
50
+ Returns:
51
+ :filtered_detections (dict): dictionary with detection scores, classes, centroids and bounding box coordinates ordered by score in descending order
52
+ """
53
+ allowed_categories = ["person"]
54
+ # allowed_categories = ["Face"] # if ssd face model
55
+
56
+ im_height, im_width, _ = shape
57
+ center_net = False
58
+
59
+ classes = detections['detection_classes'][0].numpy().astype(np.int32)
60
+ boxes = detections['detection_boxes'][0].numpy()
61
+ scores = detections['detection_scores'][0].numpy()
62
+ key_points_score = None
63
+ key_points = None
64
+
65
+ if 'detection_keypoint_scores' in detections:
66
+ key_points_score = detections['detection_keypoint_scores'][0].numpy()
67
+ key_points = detections['detection_keypoints'][0].numpy()
68
+ center_net = True
69
+
70
+ sorted_index = np.argsort(scores)[::-1]
71
+ scores = scores[sorted_index]
72
+ boxes = boxes[sorted_index]
73
+ classes = classes[sorted_index]
74
+
75
+ i = 0
76
+ while i < 10000:
77
+ if scores[i] < min_score_thresh: # sorted
78
+ break
79
+ if coco_category_index[classes[i]]["name"] in allowed_categories:
80
+ i += 1
81
+ else:
82
+ scores = np.delete(scores, i)
83
+ boxes = delete_items_from_array_aux(boxes, i)
84
+ classes = np.delete(classes, i)
85
+ if center_net:
86
+ key_points_score = delete_items_from_array_aux(key_points_score, i)
87
+ key_points = delete_items_from_array_aux(key_points, i)
88
+
89
+ filtered_detections = dict()
90
+ filtered_detections['detection_classes'] = classes[:i]
91
+
92
+ rescaled_boxes = (boxes[:i])
93
+
94
+ if new_old_shape:
95
+ rescale_bb(rescaled_boxes, new_old_shape, im_width, im_height)
96
+ if center_net:
97
+ rescaled_key_points = key_points[:i]
98
+ rescale_key_points(rescaled_key_points, new_old_shape, im_width, im_height)
99
+
100
+ filtered_detections['detection_boxes'] = rescaled_boxes
101
+ filtered_detections['detection_scores'] = scores[:i]
102
+
103
+ if center_net:
104
+ filtered_detections['detection_keypoint_scores'] = key_points_score[:i]
105
+ filtered_detections['detection_keypoints'] = rescaled_key_points
106
+
107
+ aux_centroids = []
108
+ for bb in boxes[:i]: # y_min, x_min, y_max, x_max
109
+ centroid_x = (bb[1] + bb[3]) / 2.
110
+ centroid_y = (bb[0] + bb[2]) / 2.
111
+ aux_centroids.append([centroid_x, centroid_y])
112
+
113
+ filtered_detections['detection_boxes_centroid'] = np.array(aux_centroids)
114
+
115
+ return filtered_detections
116
+
117
+
118
+ # def detect_head_pose_ssd_face(image, detections, model, output_image):
119
+ # """
120
+ # Detect objects in the image running the model
121
+ #
122
+ # Args:
123
+ # :model (tensorflow.python.saved_model): The Tensorflow object detection model
124
+ # :image (numpy.ndarray): The image that is given as input to the object detection model
125
+ # :min_score_threshold (float): The minimum score for the detections (detections with a score lower than this value will be discarded)
126
+ # :new_old_shape (tuple): The first element represents the right padding (applied by resize_preserving_ar() function);
127
+ # the second element represents the bottom padding (applied by resize_preserving_ar() function) and
128
+ # the third element is a tuple that is the shape of the image after resizing without the padding (this is useful for
129
+ # the coordinates changes that we have to do)
130
+ #
131
+ # Returns:
132
+ # :detections (dict): dictionary with detection scores, classes, centroids and bounding box coordinates ordered by score in descending order
133
+ # :inference_time (float): inference time for one image expressed in seconds
134
+ # """
135
+ #
136
+ # im_width, im_height = image.shape[1], image.shape[0]
137
+ # classes = detections['detection_classes']
138
+ # boxes = detections['detection_boxes']
139
+ #
140
+ # i = 0
141
+ # while i < len(classes): # for each bb (person)
142
+ # [y_min_perc, x_min_perc, y_max_perc, x_max_perc] = boxes[i]
143
+ # (x_min, x_max, y_min, y_max) = (int(x_min_perc * im_width), int(x_max_perc * im_width), int(y_min_perc * im_height), int(y_max_perc * im_height))
144
+ #
145
+ # y_min_face, x_min_face, y_max_face, x_max_face = enlarge_bb(y_min, x_min, y_max, x_max, im_width, im_height)
146
+ # img_face = image[y_min_face:y_max_face, x_min_face:x_max_face]
147
+ # img_face = cv2.cvtColor(img_face, cv2.COLOR_BGR2RGB)
148
+ #
149
+ # # img_face, _ = resize_preserving_ar(img_face, (224, 224))
150
+ # img_face = cv2.resize(img_face, (224, 224))
151
+ #
152
+ # img_face = np.expand_dims(img_face, axis=0)
153
+ # yaw, pitch, roll = model.get_angle(img_face)
154
+ #
155
+ # cv2.rectangle(output_image, (x_min_face, y_min_face), (x_max_face, y_max_face), (0, 0, 0), 2)
156
+ # # cv2.imshow("aa", output_image)
157
+ # # cv2.waitKey(0)
158
+ # # to original image coordinates
159
+ # x_min_orig, x_max_orig, y_min_orig, y_max_orig = x_min_face, x_max_face, y_min_face, y_max_face # x_min_face + x_min, x_max_face + x_min, y_min_face + y_min, y_max_face+y_min
160
+ # draw_axis(output_image, yaw, pitch, roll, tdx=(x_min_orig + x_max_orig) / 2, tdy=(y_min_orig + y_max_orig) / 2,
161
+ # size=abs(x_max_face - x_min_face))
162
+ #
163
+ # i += 1
164
+ #
165
+ # return output_image
166
+ #
167
+ #
168
+ # def detect_head_pose(image, detections, model, detector, output_image):
169
+ # """
170
+ # Detect the pose of the head given an image and the person detected
171
+ #
172
+ # Args:
173
+ # :image (numpy.ndarray): The image that is given as input
174
+ # :detections (dict): dictionary with detection scores, classes, centroids and bounding box coordinates ordered by score in descending order
175
+ # :model (src.ai.whenet.WHENet): model to detect the pose of the head
176
+ # :detector (_dlib_pybind11.cnn_face_detection_model_v1): model to detect the face
177
+ # :output_image (numpy.ndarray): The output image where the drawings of the head pose will be done
178
+ #
179
+ # Returns:
180
+ # :output_image (numpy.ndarray): The output image with the drawings of the head pose
181
+ # """
182
+ #
183
+ # im_width, im_height = image.shape[1], image.shape[0]
184
+ # classes = detections['detection_classes']
185
+ # boxes = detections['detection_boxes']
186
+ #
187
+ # i = 0
188
+ # while i < len(classes): # for each bb (person)
189
+ # [y_min_perc, x_min_perc, y_max_perc, x_max_perc] = boxes[i]
190
+ # (x_min, x_max, y_min, y_max) = (int(x_min_perc * im_width), int(x_max_perc * im_width), int(y_min_perc * im_height), int(y_max_perc * im_height))
191
+ #
192
+ # img_person = image[y_min:y_max, x_min:x_max]
193
+ #
194
+ # # start_time = time.time()
195
+ # # img_face = img_person[:int(img_person.shape[0]/2), :]
196
+ # rect_faces = detection_dlib_cnn_face(detector, img_person)
197
+ # # # rect_faces = detection_dlib_face(detector, img_person)
198
+ # # end_time = time.time()
199
+ # # # print("Inference time dlib cnn: ", end_time - start_time)
200
+ #
201
+ # if len(rect_faces) > 0: # if the detector able to find faces
202
+ #
203
+ # x_min_face, y_min_face, x_max_face, y_max_face = rect_faces[0][0], rect_faces[0][1], rect_faces[0][2], rect_faces[0][3] # rect_faces[0][1]
204
+ # y_min_face, x_min_face, y_max_face, x_max_face = enlarge_bb(y_min_face, x_min_face, y_max_face, x_max_face, im_width, im_height)
205
+ #
206
+ # img_face = img_person[y_min_face:y_max_face, x_min_face:x_max_face]
207
+ #
208
+ # img_face = cv2.cvtColor(img_face, cv2.COLOR_BGR2RGB)
209
+ #
210
+ # # img_face, _ = resize_preserving_ar(img_face, (224, 224))
211
+ # img_face = cv2.resize(img_face, (224, 224))
212
+ #
213
+ # img_face = np.expand_dims(img_face, axis=0)
214
+ # # start_time = time.time()
215
+ # yaw, pitch, roll = model.get_angle(img_face)
216
+ # # end_time = time.time()
217
+ # # print("Inference time whenet: ", end_time - start_time)
218
+ #
219
+ # cv2.rectangle(output_image, (x_min_face + x_min, y_min_face + y_min), (x_max_face + x_min, y_max_face + y_min), (0, 0, 0), 2)
220
+ # # to original image coordinates
221
+ # x_min_orig, x_max_orig, y_min_orig, y_max_orig = x_min_face + x_min, x_max_face + x_min, y_min_face + y_min, y_max_face+y_min
222
+ # draw_axis(output_image, yaw, pitch, roll, tdx=(x_min_orig + x_max_orig) / 2, tdy=(y_min_orig + y_max_orig) / 2,
223
+ # size=abs(x_max_face - x_min_face))
224
+ # # draw_axis(image, yaw, pitch, roll, tdx=(x_min_face + x_max_face) / 2, tdy=(y_min_face + y_max_face) / 2,
225
+ # # size=abs(x_max_face - x_min_face))
226
+ # else: # otherwise
227
+ # # print("SHAPE ", img_person.shape)
228
+ # # x_min_face, y_min_face, x_max_face, y_max_face = int(img_person.shape[1]/8), 0, int(img_person.shape[1]-img_person.shape[1]/9), int(img_person.shape[0]/3)
229
+ # # img_face = img_person[y_min_face:y_max_face, x_min_face:x_max_face]
230
+ # # # img_face = resize_preserving_ar(img_face, (224, 224))
231
+ # # img_face = cv2.resize(img_face, (224, 224))
232
+ # # cv2.imshow("face_rsz", img_face)
233
+ # # cv2.waitKey(0)
234
+ # # img_face = np.expand_dims(img_face, axis=0)
235
+ # # # cv2.rectangle(img_face, (x_min_face, y_min_face), (x_max_face, y_max_face), (0, 0, 0), 1)
236
+ # # yaw, pitch, roll = model.get_angle(img_face)
237
+ # # print("YPR", yaw, pitch, roll)
238
+ # # draw_axis(img_person, yaw, pitch, roll, tdx=(x_min_face+x_max_face)/2, tdy=(y_min_face+y_max_face)/2, size=abs(x_max_face-x_min_face))
239
+ # # cv2.imshow('output', img_person)
240
+ # # cv2.waitKey(0)
241
+ # i += 1
242
+ # continue
243
+ #
244
+ # i += 1
245
+ #
246
+ # return output_image
247
+
248
+
249
+ # def detect_head_pose_whenet(model, person, image):
250
+ #
251
+ # """
252
+ # Detect the head pose using the whenet model and draw on image
253
+ #
254
+ # Args:
255
+ # :model (): Whenet model
256
+ # :person ():
257
+ # :image (numpy.ndarray): The image that is given as input
258
+ #
259
+ # Returns:
260
+ # :
261
+ # """
262
+ #
263
+ # faces_coordinates = person.get_faces_coordinates()[-1]
264
+ #
265
+ # y_min, x_min, y_max, x_max = faces_coordinates
266
+ #
267
+ # image_face = image[y_min:y_max, x_min:x_max]
268
+ # img_face = cv2.cvtColor(image_face, cv2.COLOR_BGR2RGB)
269
+ #
270
+ # # img_face, _ = resize_preserving_ar(img_face, (224, 224))
271
+ # img_face = cv2.resize(img_face, (224, 224))
272
+ #
273
+ # img_face = np.expand_dims(img_face, axis=0)
274
+ # # start_time = time.time()
275
+ # yaw, pitch, roll = model.get_angle(img_face)
276
+ #
277
+ # # end_time = tiypme.time()
278
+ # # print("Inference time whenet: ", end_time - start_time)
279
+ # # cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 0, 0), 2)
280
+ #
281
+ # # to original image coordinates
282
+ # x_min_orig, x_max_orig, y_min_orig, y_max_orig = x_min, x_max, y_min, y_max
283
+ # vector_norm = draw_axis(image, yaw, pitch, roll, tdx=(x_min_orig + x_max_orig) / 2, tdy=(y_min_orig + y_max_orig) / 2,
284
+ # size=abs(x_max - x_min))
285
+ #
286
+ #
287
+ # visualize_vector(image, [int((x_min_orig + x_max_orig) / 2), int((y_min_orig + y_max_orig) / 2)], vector_norm)
288
+ #
289
+ # person.update_poses_ypr([yaw, pitch, roll])
290
+ # person.update_poses_vector_norm(vector_norm)
291
+
292
+ # cv2.imshow("", image)
293
+ # cv2.waitKey(0)
gradio_demo.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gdown
2
+ import gradio as gr
3
+
4
+ import logging
5
+ import os
6
+
7
+ import cv2
8
+ import numpy as np
9
+ import tensorflow as tf
10
+
11
+
12
+ from ai.detection import detect
13
+ from laeo_per_frame.interaction_per_frame_uncertainty import LAEO_computation
14
+ from utils.hpe import hpe, project_ypr_in2d
15
+ from utils.img_util import resize_preserving_ar, draw_detections, percentage_to_pixel, draw_key_points_pose, \
16
+ visualize_vector
17
+
18
+
19
+
20
+ def load_image(camera, ):
21
+ # Capture the video frame by frame
22
+ try:
23
+ ret, frame = camera.read()
24
+ return True, frame
25
+ except:
26
+ logging.Logger('Error reading frame')
27
+ return False, None
28
+
29
+ def demo_play(img, laeo=True, rgb=False):
30
+ # webcam in use
31
+
32
+
33
+
34
+
35
+ # gpus = tf.config.list_physical_devices('GPU')
36
+
37
+ # img = np.array(frame)
38
+ if not rgb:
39
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
40
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
41
+
42
+ img_resized, new_old_shape = resize_preserving_ar(img, input_shape_od_model)
43
+
44
+ print('inference centernet')
45
+ detections, elapsed_time = detect(model, img_resized, min_score_thresh,
46
+ new_old_shape) # detection classes boxes scores
47
+ # probably to draw on resized
48
+ img_with_detections = draw_detections(img_resized, detections, max_boxes_to_draw, None, None, None)
49
+ # cv2.imshow("aa", img_with_detections)
50
+
51
+ det, kpt = percentage_to_pixel(img.shape, detections['detection_boxes'], detections['detection_scores'],
52
+ detections['detection_keypoints'], detections['detection_keypoint_scores'])
53
+
54
+
55
+ # center_xy, yaw, pitch, roll = head_pose_estimation(kpt, 'centernet', gaze_model=gaze_model)
56
+
57
+ # _________ extract hpe and print to img
58
+ people_list = []
59
+
60
+ print('inferece hpe')
61
+
62
+ for j, kpt_person in enumerate(kpt):
63
+ yaw, pitch, roll, tdx, tdy = hpe(gaze_model, kpt_person, detector='centernet')
64
+
65
+ # img = draw_axis_3d(yaw[0].numpy()[0], pitch[0].numpy()[0], roll[0].numpy()[0], image=img, tdx=tdx, tdy=tdy,
66
+ # size=50)
67
+
68
+ people_list.append({'yaw' : yaw[0].numpy()[0],
69
+ 'yaw_u' : 0,
70
+ 'pitch' : pitch[0].numpy()[0],
71
+ 'pitch_u' : 0,
72
+ 'roll' : roll[0].numpy()[0],
73
+ 'roll_u' : 0,
74
+ 'center_xy': [tdx, tdy]
75
+ })
76
+
77
+ for i in range(len(det)):
78
+ img = draw_key_points_pose(img, kpt[i])
79
+
80
+ # call LAEO
81
+ clip_uncertainty = 0.5
82
+ binarize_uncertainty = False
83
+ if laeo:
84
+ interaction_matrix = LAEO_computation(people_list, clipping_value=clip_uncertainty,
85
+ clip=binarize_uncertainty)
86
+ else:
87
+ interaction_matrix = np.zeros((len(people_list), len(people_list)))
88
+ # coloured arrow print per person
89
+ # TODO coloured arrow print per person
90
+
91
+ for index, person in enumerate(people_list):
92
+ green = round((max(interaction_matrix[index, :])) * 255)
93
+ colour = (0, green, 0)
94
+ if green < 40:
95
+ colour = (0, 0, 255)
96
+ vector = project_ypr_in2d(person['yaw'], person['pitch'], person['roll'])
97
+ img = visualize_vector(img, person['center_xy'], vector, title="",
98
+ color=colour)
99
+ return img
100
+
101
+
102
+
103
+ demo = gr.Interface(
104
+ fn= demo_play,
105
+ inputs = [gr.Image(source="webcam", streaming=True),
106
+ gr.Checkbox(value=True, label="LAEO", info="Compute and display LAEO"),
107
+ gr.Checkbox(value=True, label="rgb", info="Display output on W/B image"),
108
+ ],
109
+ outputs="image",
110
+ live=True
111
+ )
112
+
113
+ if __name__ == '__main__':
114
+ if not os.path.exists("data"):
115
+ gdown.download_folder("https://drive.google.com/drive/folders/1nQ1Cb_tBEhWxy183t-mIcVH7AhAfa6NO?usp=drive_link",
116
+ use_cookies=False)
117
+ gaze_model_path = 'data/head_pose_estimation'
118
+ gaze_model = tf.keras.models.load_model(gaze_model_path, custom_objects={"tf": tf})
119
+ path_to_model = 'data/keypoint_detector/centernet_hg104_512x512_kpts_coco17_tpu-32'
120
+ model = tf.saved_model.load(os.path.join(path_to_model, 'saved_model'))
121
+
122
+ input_shape_od_model = (512, 512)
123
+ # params
124
+ min_score_thresh, max_boxes_to_draw, min_distance = .45, 50, 1.5
125
+
126
+ print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
127
+
128
+ demo.launch()
laeo_per_frame/__init__.py ADDED
File without changes
laeo_per_frame/interaction_per_frame_uncertainty.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''It calculates interaction frame per frame with not temporal consistency.
2
+ It also use the uncertainty to enlarge the visual cone.'''
3
+ import re
4
+ from math import sin, cos
5
+
6
+ import numpy as np
7
+
8
+
9
+ def project_ypr_in2d(yaw, pitch, roll):
10
+ """ Project yaw pitch roll on image plane. Result is NOT normalised.
11
+
12
+ :param yaw:
13
+ :param pitch:
14
+ :param roll:
15
+ :return:
16
+ """
17
+ pitch = pitch * np.pi / 180
18
+ yaw = -(yaw * np.pi / 180)
19
+ roll = roll * np.pi / 180
20
+
21
+ x3 = (sin(yaw))
22
+ y3 = (-cos(yaw) * sin(pitch))
23
+
24
+ # normalize the components
25
+ length = np.sqrt(x3 ** 2 + y3 ** 2)
26
+
27
+ # return [x3 / length, y3 / length]
28
+ return [x3, y3]
29
+
30
+
31
+ def compute_interaction_cosine(head_position, gaze_direction, uncertainty, target, visual_cone=True):
32
+ """Computes the interaction between two people using the angle of view.
33
+
34
+ The interaction in measured as the cosine of the angle formed by the line from person A to B
35
+ and the gaze direction of person A.
36
+ Reference system of zero degree:
37
+
38
+
39
+ :param head_position: position of the head of person A
40
+ :param gaze_direction: gaze direction of the head of person A
41
+ :param target: position of head of person B
42
+ :param yaw:
43
+ :param pitch:
44
+ :param roll:
45
+ :param visual_cone: (default) True, if False gaze is a line, otherwise it is a cone (more like humans)
46
+ :return: float or double describing the quantity of interaction
47
+ """
48
+ if np.array_equal(head_position, target):
49
+ return 0 # or -1
50
+ else:
51
+ cone_aperture = None
52
+ if 0 <= uncertainty < 0.4:
53
+ cone_aperture = np.deg2rad(3)
54
+ elif 0.4 <= uncertainty <= 0.6:
55
+ cone_aperture = np.deg2rad(6)
56
+ elif 0.6 < uncertainty <= 1:
57
+ cone_aperture = np.deg2rad(9)
58
+ # direction from observer to target
59
+ _direction_ = np.arctan2((target[1] - head_position[1]), (target[0] - head_position[0]))
60
+ _direction_gaze_ = np.arctan2(gaze_direction[1], gaze_direction[0])
61
+ difference = _direction_ - _direction_gaze_ # radians
62
+ if visual_cone and (0 < difference < cone_aperture):
63
+ difference = 0
64
+ # difference of the line joining observer -> target with the gazing direction,
65
+
66
+ val = np.cos(difference)
67
+ if val < 0:
68
+ return 0
69
+ else:
70
+ return val
71
+
72
+
73
+ def calculate_uncertainty(yaw_1, pitch_1, roll_1, clipping_value, clip=True):
74
+ # res_1 = abs((pitch_1 + yaw_1 + roll_1) / 3)
75
+ res_1 = abs((pitch_1 + yaw_1) / 2)
76
+ if clip:
77
+ # it binarize the uncertainty
78
+ if res_1 > clipping_value:
79
+ res_1 = clipping_value
80
+ else:
81
+ res_1 = 0
82
+ else:
83
+ # it leaves uncertainty untouched except for upper bound
84
+ if res_1 > clipping_value:
85
+ res_1 = clipping_value
86
+ elif res_1 < 0:
87
+ res_1 = 0
88
+
89
+ # normalize
90
+ res_1 = res_1 / clipping_value
91
+ # assert res_1 in [0, 1], 'uncertainty not binarized'
92
+ return res_1
93
+
94
+
95
+ def atoi(text):
96
+ return int(text) if text.isdigit() else text
97
+
98
+
99
+ def natural_keys(text):
100
+ '''
101
+ alist.sort(key=natural_keys) sorts in human order
102
+ http://nedbatchelder.com/blog/200712/human_sorting.html
103
+ (See Toothy's implementation in the comments)
104
+ '''
105
+ return [atoi(c) for c in re.split(r'(\d+)', text)]
106
+
107
+
108
+ def delete_file_if_exist(*file_path):
109
+ for f in file_path:
110
+ if f.is_file(): # if exist already, replace
111
+ f.unlink(missing_ok=True)
112
+
113
+
114
+ def LAEO_computation(people_list, clipping_value, clip):
115
+ #TODO here correct the average because -> 0+0.99-> LAEO, already corrected a bit
116
+ people_in_frame = len(people_list)
117
+
118
+ # create empty matrix with one entry per person in frame
119
+ matrix = np.empty((people_in_frame, people_in_frame))
120
+ interaction_matrix = np.zeros((people_in_frame, people_in_frame))
121
+ uncertainty_matrix = np.zeros((people_in_frame, people_in_frame))
122
+
123
+ norm_xy_all = [] # it will contains vector for printing
124
+ for subject in range(people_in_frame):
125
+ norm_xy = project_ypr_in2d(people_list[subject]['yaw'], people_list[subject]['pitch'],
126
+ people_list[subject]['roll'])
127
+ norm_xy_all.append(norm_xy)
128
+ uncertainty_1 = calculate_uncertainty(people_list[subject]['yaw_u'],
129
+ people_list[subject]['pitch_u'],
130
+ people_list[subject]['roll_u'], clipping_value=clipping_value,
131
+ clip=clip)
132
+
133
+ for object in range(people_in_frame):
134
+ uncertainty_2 = calculate_uncertainty(people_list[object]['yaw_u'],
135
+ people_list[object]['pitch_u'],
136
+ people_list[object]['roll_u'], clipping_value=clipping_value,
137
+ clip=clip)
138
+ v = compute_interaction_cosine(people_list[subject]['center_xy'], norm_xy, uncertainty_1,
139
+ people_list[object]['center_xy'])
140
+ matrix[subject][object] = v
141
+ uncertainty_matrix[subject][object] = uncertainty_1
142
+ # uncertainty_matrix[object][subject] = uncertainty_2
143
+
144
+ # matrix is completed
145
+
146
+ for subject in range(people_in_frame):
147
+ for object in range(people_in_frame):
148
+ # take average of previous matrix
149
+ if matrix[subject][object] > 0.3 and matrix[object][subject] > 0.3:
150
+ v = (matrix[subject][object] + matrix[object][subject]) / 2
151
+ interaction_matrix[subject][object] = v
152
+ else:
153
+ interaction_matrix[subject][object] = 0
154
+
155
+ return interaction_matrix
156
+
157
+
158
+ if __name__ == '__main__':
159
+ clip_uncertainty = 0
160
+ binarize_uncertainty = True
161
+ yaw, pitch, roll, tdx, tdy = 0, 0, 0, 0, 0
162
+ my_list = [{'yaw': yaw,
163
+ 'pitch': pitch,
164
+ 'roll': roll,
165
+ 'center_xy': [tdx, tdy]}]
166
+ _ = LAEO_computation(my_list, clipping_value=clip_uncertainty, clip=binarize_uncertainty)
utils/__init__.py ADDED
File without changes
utils/hpe.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import os
3
+ import numpy as np
4
+ import tensorflow as tf
5
+
6
+ from utils.my_utils import normalize_wrt_maximum_distance_point, retrieve_interest_points
7
+
8
+
9
+ def head_pose_estimation(kpt, detector, gaze_model, id_list=None):
10
+ fps, shape = 20, (1280, 720)
11
+
12
+ yaw_list, pitch_list, roll_list, yaw_u_list, pitch_u_list, roll_u_list = [], [], [], [], [], []
13
+ center_xy = []
14
+
15
+ for j, kpt_person in enumerate(kpt):
16
+ # TODO here change order if openpose
17
+ face_kpt = retrieve_interest_points(kpt_person, detector=detector)
18
+
19
+ tdx = np.mean([face_kpt[k] for k in range(0, 15, 3) if face_kpt[k] != 0.0])
20
+ tdy = np.mean([face_kpt[k + 1] for k in range(0, 15, 3) if face_kpt[k + 1] != 0.0])
21
+ if math.isnan(tdx) or math.isnan(tdy):
22
+ tdx = -1
23
+ tdy = -1
24
+
25
+ center_xy.append([tdx, tdy])
26
+ face_kpt_normalized = np.array(normalize_wrt_maximum_distance_point(face_kpt))
27
+ # print(type(face_kpt_normalized), face_kpt_normalized)
28
+
29
+ aux = tf.cast(np.expand_dims(face_kpt_normalized, 0), tf.float32)
30
+
31
+ yaw, pitch, roll = gaze_model(aux, training=False)
32
+ # print(yaw[0].numpy()[0], pitch, roll)
33
+ yaw_list.append(yaw[0].numpy()[0])
34
+ pitch_list.append(pitch[0].numpy()[0])
35
+ roll_list.append(roll[0].numpy()[0])
36
+
37
+ yaw_u_list.append(yaw[0].numpy()[1])
38
+ pitch_u_list.append(pitch[0].numpy()[1])
39
+ roll_u_list.append(roll[0].numpy()[1])
40
+ # print(id_lists[j])
41
+ # print('yaw: ', yaw[0].numpy()[0], 'yaw unc: ', yaw[0].numpy()[1], 'pitch: ', pitch[0].numpy()[0],
42
+ # 'pitch unc: ', pitch[0].numpy()[1], 'roll: ', roll[0].numpy()[0], 'roll unc: ', roll[0].numpy()[1])
43
+ # draw_axis(yaw.numpy(), pitch.numpy(), roll.numpy(), im_pose, tdx, tdy)
44
+ return center_xy, yaw_list, pitch_list, roll_list
45
+
46
+ def hpe(gaze_model, kpt_person, detector):
47
+ # TODO here change order if openpose
48
+ face_kpt = retrieve_interest_points(kpt_person, detector=detector)
49
+
50
+ tdx = np.mean([face_kpt[k] for k in range(0, 15, 3) if face_kpt[k] != 0.0])
51
+ tdy = np.mean([face_kpt[k + 1] for k in range(0, 15, 3) if face_kpt[k + 1] != 0.0])
52
+ if math.isnan(tdx) or math.isnan(tdy):
53
+ tdx = -1
54
+ tdy = -1
55
+
56
+ # center_xy.append([tdx, tdy])
57
+ face_kpt_normalized = np.array(normalize_wrt_maximum_distance_point(face_kpt))
58
+ # print(type(face_kpt_normalized), face_kpt_normalized)
59
+
60
+ aux = tf.cast(np.expand_dims(face_kpt_normalized, 0), tf.float32)
61
+
62
+ yaw, pitch, roll = gaze_model(aux, training=False)
63
+
64
+ return yaw, pitch, roll, tdx, tdy
65
+
66
+ def project_ypr_in2d(yaw, pitch, roll):
67
+ """ Project yaw pitch roll on image plane. Result is NOT normalised.
68
+
69
+ :param yaw:
70
+ :param pitch:
71
+ :param roll:
72
+ :return:
73
+ """
74
+ pitch = pitch * np.pi / 180
75
+ yaw = -(yaw * np.pi / 180)
76
+ roll = roll * np.pi / 180
77
+
78
+ x3 = (math.sin(yaw))
79
+ y3 = (-math.cos(yaw) * math.sin(pitch))
80
+
81
+ # normalize the components
82
+ length = np.sqrt(x3**2 + y3**2)
83
+
84
+ return [x3, y3]
85
+
86
+
utils/img_util.py ADDED
@@ -0,0 +1,676 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import os
3
+ import json
4
+ import numpy as np
5
+ from math import cos, sin, pi
6
+ from utils.labels import coco_category_index, rgb_colors, color_pose, color_pose_normalized, pose_id_part, face_category_index, body_parts_openpose, body_parts, face_points, face_points_openpose, pose_id_part_zedcam, face_points_zedcam, body_parts_zedcam
7
+ # from src.utils.my_utils import fit_plane_least_square # , retrieve_line_from_two_points
8
+
9
+
10
+ def percentage_to_pixel(shape, bb_boxes, bb_boxes_scores, key_points=None, key_points_score=None):
11
+ """
12
+ Convert the detections from percentage to pixels coordinates; it works both for the bounding boxes and for the key points if passed
13
+
14
+ Args:
15
+ :img_shape (tuple): the shape of the image
16
+ :bb_boxes (numpy.ndarray): list of list each one representing the bounding box coordinates expressed in percentage [y_min_perc, x_min_perc, y_max_perc, x_max_perc]
17
+ :bb_boxes_scores (numpy.ndarray): list of score for each bounding box in range [0, 1]
18
+ :key_points (numpy.ndarray): list of list of list each one representing the key points coordinates expressed in percentage [y_perc, x_perc]
19
+ :key_points_score (numpy.ndarray): list of list each one representing the score associated to each key point in range [0, 1]
20
+
21
+ Returns:
22
+ :det (numpy.ndarray): list of lists each one representing the bounding box coordinates in pixels and the score associated to each bounding box [x_min, y_min, x_max, y_max, score]
23
+ :kpt (list): list of lists each one representing the key points detected in pixels and the score associated to each point [x, y, score]
24
+ """
25
+
26
+ im_width, im_height = shape[1], shape[0]
27
+ det, kpt = [], []
28
+
29
+ if key_points is not None:
30
+ key_points = key_points
31
+ key_points_score = key_points_score
32
+
33
+ for i, _ in enumerate(bb_boxes):
34
+ y_min, x_min, y_max, x_max = bb_boxes[i]
35
+ x_min_rescaled, x_max_rescaled, y_min_rescaled, y_max_rescaled = x_min * im_width, x_max * im_width, y_min * im_height, y_max * im_height
36
+ det.append([int(x_min_rescaled), int(y_min_rescaled), int(x_max_rescaled), int(y_max_rescaled), bb_boxes_scores[i]])
37
+
38
+ if key_points is not None:
39
+ aux_list = []
40
+ for n, key_point in enumerate(key_points[i]): # y x
41
+ aux = [int(key_point[0] * im_height), int(key_point[1] * im_width), key_points_score[i][n]]
42
+ aux_list.append(aux)
43
+ kpt.append(aux_list)
44
+
45
+ det = np.array(det)
46
+
47
+ return det, kpt
48
+
49
+
50
+ def draw_detections(image, detections, max_boxes_to_draw, violate=None, couple_points=None, draw_class_score=False):
51
+ """
52
+ Given an image and a dictionary of detections this function return the image with the drawings of the bounding boxes (with violations information if specified)
53
+
54
+ Args:
55
+ :img (numpy.ndarray): The image that is given as input to the object detection model
56
+ :detections (dict): The dictionary with the detections information (detection_classes, detection_boxes, detection_scores,
57
+ detection_keypoint_scores, detection_keypoints, detection_boxes_centroid)
58
+ :max_boxes_to_draw (int): The maximum number of bounding boxes to draw
59
+ :violate (set): The indexes of detections (sorted) that violate the minimum distance computed by my_utils.compute_distance function
60
+ (default is None)
61
+ :couple_points (list): A list of tuples each one containing the couple of indexes that violate the minimum distance (used to draw lines in
62
+ between to bounding boxes)
63
+ (default is None)
64
+ :draw_class_score (bool): If this value is set to True, in the returned image will be drawn the category and the score over each bounding box
65
+ (default is False)
66
+
67
+ Returns:
68
+ :img_with_drawings (numpy.ndarray): The image with the bounding boxes of each detected objects and optionally with the situations of violation
69
+ """
70
+
71
+ im_width, im_height = image.shape[1], image.shape[0]
72
+ img_with_drawings = image.copy()
73
+ classes = detections['detection_classes']
74
+ boxes = detections['detection_boxes']
75
+ scores = detections['detection_scores']
76
+ centroids = detections['detection_boxes_centroid']
77
+ red = (0, 0, 255)
78
+
79
+ i = 0
80
+ while i < max_boxes_to_draw and i < len(classes):
81
+ [y_min, x_min, y_max, x_max] = boxes[i]
82
+ (x_min_rescaled, x_max_rescaled, y_min_rescaled, y_max_rescaled) = (x_min * im_width, x_max * im_width, y_min * im_height, y_max * im_height)
83
+ start_point, end_point = (int(x_max_rescaled), int(y_max_rescaled)), (int(x_min_rescaled), int(y_min_rescaled))
84
+
85
+ # [cx, cy] = centroids[i]
86
+ # (cx_rescaled, cy_rescaled) = (int(cx * im_width), int(cy * im_height))
87
+
88
+ color = rgb_colors[classes[i]]
89
+ if violate:
90
+ if i in violate:
91
+ color = red
92
+
93
+ cv2.rectangle(img_with_drawings, start_point, end_point, color, 2)
94
+ # cv2.circle(img_with_drawings, (cx_rescaled, cy_rescaled), 2, color, 2)
95
+
96
+ if draw_class_score:
97
+ cv2.rectangle(img_with_drawings, end_point, (start_point[0], end_point[1] - 25), rgb_colors[classes[i]], -1)
98
+ text = face_category_index[classes[i]]['name'] + " {:.2f}".format(scores[i])
99
+ cv2.putText(img_with_drawings, text, end_point, cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2, cv2.LINE_AA)
100
+ i += 1
101
+
102
+ if couple_points and len(centroids) > 1:
103
+ for j in range(len(couple_points)):
104
+ pt1 = centroids[couple_points[j][0]][0], centroids[couple_points[j][0]][1]
105
+ pt2 = centroids[couple_points[j][1]][0], centroids[couple_points[j][1]][1]
106
+ cv2.line(img_with_drawings, pt1, pt2, red, 2)
107
+
108
+ text_location = (int(image.shape[1]-image.shape[1]/4), int(image.shape[0]/17))
109
+ font_scale = 0.8 * 1 / (640/image.shape[0])
110
+ thickness = int(2 * (image.shape[0]/640))
111
+ cv2.putText(img_with_drawings, "# of people : "+str(i), text_location, cv2.FONT_HERSHEY_SIMPLEX, font_scale, red, thickness, cv2.LINE_AA)
112
+
113
+ return img_with_drawings
114
+
115
+
116
+ def resize_preserving_ar(image, new_shape):
117
+ """
118
+ Resize and pad the input image in order to make it usable by an object detection model (e.g. mobilenet 640x640)
119
+
120
+ Args:
121
+ :image (numpy.ndarray): The image that will be resized and padded
122
+ :new_shape (tuple): The shape of the image output (height, width)
123
+
124
+ Returns:
125
+ :res_image (numpy.ndarray): The image modified to have the new shape
126
+ """
127
+ (old_height, old_width, _) = image.shape
128
+ (new_height, new_width) = new_shape
129
+
130
+ if old_height != old_width: # rectangle
131
+ ratio_h, ratio_w = new_height / old_height, new_width / old_width
132
+
133
+ if ratio_h > ratio_w:
134
+ dim = (new_width, int(old_height * ratio_w))
135
+ img = cv2.resize(image, dim, interpolation=cv2.INTER_CUBIC)
136
+ bottom_padding = int(new_height - int(old_height * ratio_w)) if int(new_height - int(old_height * ratio_w)) >= 0 else 0
137
+ img = cv2.copyMakeBorder(img, 0, bottom_padding, 0, 0, cv2.BORDER_CONSTANT)
138
+ pad = (0, bottom_padding, dim)
139
+
140
+ else:
141
+ dim = (int(old_width * ratio_h), new_height)
142
+ img = cv2.resize(image, dim, interpolation=cv2.INTER_CUBIC)
143
+ right_padding = int(new_width - int(old_width * ratio_h)) if int(new_width - int(old_width * ratio_h)) >= 0 else 0
144
+ img = cv2.copyMakeBorder(img, 0, 0, 0, right_padding, cv2.BORDER_CONSTANT)
145
+ pad = (right_padding, 0, dim)
146
+
147
+ else: # square
148
+ img = cv2.resize(image, new_shape, new_height, new_width)
149
+ pad = (0, 0, (new_height, new_width))
150
+
151
+ return img, pad
152
+
153
+
154
+ def resize_and_padding_preserving_ar(image, new_shape):
155
+ """ Resize and pad the input image in order to make it usable by a pose model (e.g. mobilenet-posenet takes as input 257x257 images)
156
+
157
+ Args:
158
+ :image (numpy.ndarray): The image that will be resized and padded
159
+ :new_shape (tuple): The shape of the image output
160
+
161
+ Returns:
162
+ :res_image (numpy.ndarray): The image modified to have the new shape
163
+ """
164
+
165
+ (old_height, old_width, _) = image.shape
166
+ (new_height, new_width) = new_shape
167
+
168
+ if old_height != old_width: # rectangle
169
+ ratio_h, ratio_w = new_height / old_height, new_width / old_width
170
+
171
+ # print(img.shape, "\nRATIO: ", ratio_h, ratio_w)
172
+ if ratio_h < ratio_w:
173
+ ratio = new_shape[0] / old_height
174
+ dim = (int(old_width * ratio), new_width)
175
+ img = cv2.resize(image, dim)
176
+ right_padding = int(new_width - img.shape[1]) if int(new_width - img.shape[1]) >= 0 else 0
177
+ img = cv2.copyMakeBorder(img, 0, 0, 0, right_padding, cv2.BORDER_CONSTANT)
178
+ else:
179
+ ratio = new_shape[1] / old_width
180
+ dim = (new_height, int(old_height * ratio))
181
+ img = cv2.resize(image, dim)
182
+ bottom_padding = int(new_height - img.shape[0]) if int(new_width - img.shape[0]) >= 0 else 0
183
+ img = cv2.copyMakeBorder(img, 0, bottom_padding, 0, 0, cv2.BORDER_CONSTANT)
184
+
185
+ else: # square
186
+ img = cv2.resize(image, new_shape)
187
+
188
+ img = img.astype(np.float32) / 255.
189
+ res_image = np.expand_dims(img, 0)
190
+
191
+ return res_image
192
+
193
+
194
+ def draw_axis(yaw, pitch, roll, image=None, tdx=None, tdy=None, size=50):
195
+ """
196
+ Draw yaw pitch and roll axis on the image if passed as input and returns the vector containing the projection of the vector on the image plane
197
+
198
+ Args:
199
+ :yaw (float): value that represents the yaw rotation of the face
200
+ :pitch (float): value that represents the pitch rotation of the face
201
+ :roll (float): value that represents the roll rotation of the face
202
+ :image (numpy.ndarray): The image where the three vector will be printed
203
+ (default is None)
204
+ :tdx (float64): x coordinate from where the vector drawing start expressed in pixel coordinates
205
+ (default is None)
206
+ :tdy (float64): y coordinate from where the vector drawing start expressed in pixel coordinates
207
+ (default is None)
208
+ :size (int): value that will be multiplied to each x, y and z value that enlarge the "vector drawing"
209
+ (default is 50)
210
+
211
+ Returns:
212
+ :list_projection_xy (list): list containing the unit vector [x, y, z]
213
+ """
214
+
215
+ pitch = pitch * np.pi / 180
216
+ yaw = -(yaw * np.pi / 180)
217
+ roll = roll * np.pi / 180
218
+
219
+ if tdx != None and tdy != None:
220
+ tdx = tdx
221
+ tdy = tdy
222
+
223
+ else:
224
+ height, width = image.shape[:2]
225
+ tdx = width / 2
226
+ tdy = height / 2
227
+
228
+ # PROJECT 3D TO 2D XY plane (Z = 0)
229
+
230
+ # X-Axis pointing to right. drawn in red
231
+ x1 = size * (cos(yaw) * cos(roll)) + tdx
232
+ y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
233
+
234
+ # Y-Axis | drawn in green
235
+ x2 = size * (-cos(yaw) * sin(roll)) + tdx
236
+ y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
237
+
238
+ # Z-Axis (out of the screen) drawn in yellow #it was blue
239
+ x3 = size * (sin(yaw)) + tdx
240
+ y3 = size * (-cos(yaw) * sin(pitch)) + tdy
241
+ z3 = size * (cos(pitch) * cos(yaw)) + tdy
242
+
243
+ if image is not None:
244
+ cv2.line(image, (int(tdx), int(tdy)), (int(x1), int(y1)), (0, 0, 255), 2) # BGR->red
245
+ cv2.line(image, (int(tdx), int(tdy)), (int(x2), int(y2)), (0, 255, 0), 2) # BGR->green
246
+ cv2.line(image, (int(tdx), int(tdy)), (int(x3), int(y3)), (0, 255, 255), 2) # BGR->blue
247
+
248
+ list_projection_xy = [sin(yaw), -cos(yaw) * sin(pitch)]
249
+ return list_projection_xy
250
+
251
+
252
+ def visualize_vector(image, center, unit_vector, title="", color=(0, 0, 255)):
253
+ """
254
+ Draw the projected vector on the image plane and return the image
255
+
256
+ Args:
257
+ :image (numpy.ndarray): The image where the vector will be printed
258
+ :center (list): x, y coordinates in pixels of the starting point from where the vector is drawn
259
+ :unit_vector (list): vector of the gaze in the form [gx, gy]
260
+ :title (string): title displayed in the imshow function
261
+ (default is "")
262
+ :color (tuple): color value of the vector drawn on the image
263
+ (default is (0, 0, 255))
264
+
265
+ Returns:
266
+ :result (numpy.ndarray): The image with the vectors drawn
267
+ """
268
+ unit_vector_draw = [unit_vector[0] * image.shape[0]*0.15, unit_vector[1] * image.shape[0]*0.15]
269
+ point = [center[0] + unit_vector_draw[0], center[1] + unit_vector_draw[1]]
270
+
271
+ result = cv2.arrowedLine(image, (int(center[0]), int(center[1])), (int(point[0]), int(point[1])), color, thickness=4, tipLength=0.3)
272
+
273
+ return result
274
+
275
+
276
+ def draw_key_points_pose(image, kpt, openpose=False):
277
+ """
278
+ Draw the key points and the lines connecting them; it expects the output of CenterNet (not OpenPose format)
279
+
280
+ Args:
281
+ :image (numpy.ndarray): The image where the lines connecting the key points will be printed
282
+ :kpt (list): list of lists of points detected for each person [[x1, y1, c1], [x2, y2, c2],...] where x and y represent the coordinates of each
283
+ point while c represents the confidence
284
+
285
+ Returns:
286
+ :img (numpy.ndarray): The image with the drawings of lines and key points
287
+ """
288
+
289
+ parts = body_parts_openpose if openpose else body_parts
290
+ kpt_score = None
291
+ threshold = 0.4
292
+
293
+ overlay = image.copy()
294
+
295
+ face_pts = face_points_openpose if openpose else face_points
296
+
297
+ for j in range(len(kpt)):
298
+ # 0 nose, 1/2 left/right eye, 3/4 left/right ear
299
+ color = color_pose["blue"]
300
+ if j == face_pts[0]:
301
+ color = color_pose["purple"]# naso
302
+ if j == face_pts[1]:
303
+ color = color_pose["green"]#["light_pink"]#Leye
304
+ if j == face_pts[2]:
305
+ color = color_pose["dark_pink"]#Reye
306
+ if j == face_pts[3]:
307
+ color = color_pose["light_orange"]#LEar
308
+ if j == face_pts[4]:
309
+ color = color_pose["yellow"]# REar
310
+ if openpose:
311
+ cv2.circle(image, (int(kpt[j][0]), int(kpt[j][1])), 1, color, 2)
312
+ else:
313
+ cv2.circle(image, (int(kpt[j][1]), int(kpt[j][0])), 1, color, 2)
314
+ # cv2.putText(img, pose_id_part[i], (int(kpts[j][i, 1] * img.shape[1]), int(kpts[j][i, 0] * img.shape[0])), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 1, cv2.LINE_AA)
315
+
316
+ for part in parts:
317
+ if int(kpt[part[0]][1]) != 0 and int(kpt[part[0]][0]) != 0 and int(kpt[part[1]][1]) != 0 and int(
318
+ kpt[part[1]][0]) != 0:
319
+
320
+ if openpose:
321
+ cv2.line(overlay, (int(kpt[part[0]][0]), int(kpt[part[0]][1])), (int(kpt[part[1]][0]), int(kpt[part[1]][1])), (255, 255, 255), 2)
322
+ else:
323
+ cv2.line(overlay, (int(kpt[part[0]][1]), int(kpt[part[0]][0])),
324
+ (int(kpt[part[1]][1]), int(kpt[part[1]][0])), (255, 255, 255), 2)
325
+
326
+ alpha = 0.4
327
+ image = cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0)
328
+
329
+ return image
330
+
331
+ def draw_key_points_pose_zedcam(image, kpt, openpose=True):
332
+ """
333
+ Draw the key points and the lines connecting them; it expects the output of CenterNet (not OpenPose format)
334
+
335
+ Args:
336
+ :image (numpy.ndarray): The image where the lines connecting the key points will be printed
337
+ :kpt (list): list of lists of points detected for each person [[x1, y1, c1], [x2, y2, c2],...] where x and y represent the coordinates of each
338
+ point while c represents the confidence
339
+
340
+ Returns:
341
+ :img (numpy.ndarray): The image with the drawings of lines and key points
342
+ """
343
+
344
+ parts = body_parts_zedcam
345
+ kpt_score = None
346
+ threshold = 0.4
347
+
348
+ overlay = image.copy()
349
+
350
+ face_pts = face_points_zedcam
351
+
352
+ for j in range(len(kpt)):
353
+ # 0 nose, 1/2 left/right eye, 3/4 left/right ear
354
+ color = color_pose["blue"]
355
+ if j == face_pts[0]: # naso
356
+ color = color_pose["purple"]
357
+ if j == face_pts[1]:
358
+ color = color_pose["light_pink"]
359
+ if j == face_pts[2]:
360
+ color = color_pose["dark_pink"]
361
+ if j == face_pts[3]:
362
+ color = color_pose["light_orange"]
363
+ if j == face_pts[4]:
364
+ color = color_pose["dark_orange"]
365
+ if openpose:
366
+ cv2.circle(image, (int(kpt[j][0]), int(kpt[j][1])), 1, color, 2)
367
+ else:
368
+ cv2.circle(image, (int(kpt[j][1]), int(kpt[j][0])), 1, color, 2)
369
+ # cv2.putText(img, pose_id_part[i], (int(kpts[j][i, 1] * img.shape[1]), int(kpts[j][i, 0] * img.shape[0])), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 1, cv2.LINE_AA)
370
+
371
+ for part in parts:
372
+ if int(kpt[part[0]][1]) != 0 and int(kpt[part[0]][0]) != 0 and int(kpt[part[1]][1]) != 0 and int(
373
+ kpt[part[1]][0]) != 0:
374
+
375
+ if openpose:
376
+ cv2.line(overlay, (int(kpt[part[0]][0]), int(kpt[part[0]][1])), (int(kpt[part[1]][0]), int(kpt[part[1]][1])), (255, 255, 255), 2)
377
+ else:
378
+ cv2.line(overlay, (int(kpt[part[0]][1]), int(kpt[part[0]][0])),
379
+ (int(kpt[part[1]][1]), int(kpt[part[1]][0])), (255, 255, 255), 2)
380
+
381
+ alpha = 0.4
382
+ image = cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0)
383
+
384
+ return image
385
+
386
+ def plot_3d_points(list_points):
387
+ """
388
+ Plot points in 3D
389
+
390
+ Args:
391
+ :list_points: A list of lists representing the points; each point has (x, y, z) coordinates represented by the first, second and third element of each list
392
+
393
+ Returns:
394
+ """
395
+ if list_points == []:
396
+ return
397
+
398
+ import matplotlib.pyplot as plt
399
+
400
+ fig = plt.figure()
401
+ ax = fig.add_subplot(111, projection='3d')
402
+
403
+ for point in list_points:
404
+ ax.scatter(point[0], point[1], point[2], c=np.array(0), marker='o')
405
+
406
+ ax.set_xlabel('x')
407
+ ax.set_ylabel('y')
408
+ ax.set_zlabel('z')
409
+
410
+ plt.show()
411
+
412
+ return
413
+
414
+
415
+ def draw_on_img(image, center, id_, res):
416
+ """
417
+ Draw arrow illustrating gaze direction on the image
418
+
419
+ Args:
420
+ :image (numpy.ndarray): The image where the vector will be printed
421
+ :center (list): x, y coordinates in pixels of the starting point from where the vector is drawn
422
+ :id_ (string): title displayed in the imshow function
423
+ (default is "")
424
+ :res (list): vector of the gaze in the form [gx, gy]
425
+
426
+ Returns:
427
+ :img_arrow (numpy.ndarray): The image with the vector drawn
428
+ """
429
+
430
+ res[0] *= image.shape[0]
431
+ res[1] *= image.shape[1]
432
+
433
+ norm1 = res / np.linalg.norm(res)
434
+ norm_aux = [norm1[0], norm1[1]] # normalized vectors
435
+
436
+ norm1[0] *= image.shape[0]*0.15
437
+ norm1[1] *= image.shape[0]*0.15
438
+
439
+ point = center + norm1
440
+
441
+
442
+ img_arrow = cv2.arrowedLine(image.copy(), (int(center[1]), int(center[0])), (int(point[1]), int(point[0])), (0, 0, 255), thickness=2, tipLength=0.2)
443
+
444
+ return img_arrow, [norm_aux, center]
445
+
446
+
447
+ def confusion_matrix(conf_matrix, target_names=None, title="", cmap=None):
448
+ """
449
+ Create the image of the confusion matrix given a matrix as input
450
+
451
+ Args:
452
+ :conf_matrix (list): list of lists that represent an MxM matrix e.g. [[v11, v12, v13], [v21, v22, v23], [v31, v32, v33]]
453
+ :target_names (list): list of target name of dimension M e.g. [[label1, label2, label3]]
454
+ (default is None)
455
+ :title (string): title string to be printed in the confusion matrix
456
+ (default is "")
457
+ :cmap (string): colormap that will be used by the confusion matrix
458
+ (default is None)
459
+
460
+ Returns:
461
+ :gbr (numpy.ndarray): The image where the lines connecting the key points will be printed
462
+ """
463
+ from laeo_per_frame.interaction_per_frame_uncertainty import LAEO_computation
464
+ import matplotlib.pyplot as plt
465
+
466
+ if not conf_matrix:
467
+ return []
468
+
469
+ # if cmap is None:
470
+ # cmap = plt.get_cmap('Blues')
471
+
472
+ plt.rcParams['xtick.bottom'] = plt.rcParams['xtick.labelbottom'] = False
473
+ plt.rcParams['xtick.top'] = plt.rcParams['xtick.labeltop'] = True
474
+
475
+ fig, ax = plt.subplots(figsize=(6, 4)) # 2, 2, figsize=(6, 4))
476
+ cax = ax.imshow(conf_matrix)
477
+
478
+ for i in range(len(conf_matrix[0])):
479
+ for j in range(len(conf_matrix[1])):
480
+ ax.text(j, i, str(np.around(conf_matrix[i][j], 3)), va='center', ha='center', color="black")
481
+
482
+ if target_names is not None:
483
+ ax.set_xticks(np.arange(len(target_names)))
484
+ ax.set_yticks(np.arange(len(target_names)))
485
+ ax.set_xticklabels(target_names)
486
+ ax.set_yticklabels(target_names)
487
+
488
+ plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
489
+ fig.tight_layout()
490
+ fig.colorbar(cax)
491
+ # plt.show()
492
+
493
+ fig.canvas.draw()
494
+
495
+ width, height = fig.get_size_inches() * fig.get_dpi()
496
+ aux_img = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
497
+ gbr = aux_img[..., [2, 0, 1]].copy()
498
+
499
+ # cv2.imshow("1312", gbr)
500
+ # cv2.waitKey(0)
501
+
502
+ return gbr
503
+
504
+
505
+ def join_images(image1, image2):
506
+ """
507
+ Join two images vertically into a new image with the height that is the maximum height of the two images passed as input and the width that is
508
+ the sum of the widths of the two images passed as input
509
+
510
+ Args:
511
+ :image1 (numpy.ndarray): The image that will be in the left part of the joined images
512
+ :image2 (numpy.ndarray): The image that will be in the right part of the joined images
513
+
514
+ Returns:
515
+ :joined_image (numpy.ndarray): The image that is the results of the merge of the two images passed as input
516
+ """
517
+
518
+ if type(image1) == list or type(image2) == list:
519
+ return None
520
+
521
+ image1_width, image1_height, image2_width, image2_height = image1.shape[1], image1.shape[0], image2.shape[1], image2.shape[0]
522
+
523
+ new_shape_height = max(image1_height, image2_height)
524
+ new_shape = (new_shape_height, image1_width + image2_width, 3)
525
+
526
+ joined_image = np.zeros(new_shape, dtype=np.uint8)
527
+ joined_image[:image1_height, :image1_width, :] = image1
528
+ joined_image[:image2_height, image1_width:, :] = image2
529
+
530
+ cv2.imshow("", cv2.resize(joined_image, (1200, 500)))
531
+ cv2.waitKey(0)
532
+ return joined_image
533
+
534
+
535
+ def draw_axis_from_json(img, json_file):
536
+ if os.path.isfile(json_file):
537
+ cv2.imshow("", img)
538
+ cv2.waitKey(0)
539
+
540
+ with open(json_file) as f:
541
+ data = json.load(f)
542
+ print(data)
543
+ aux = data['people']
544
+ for elem in aux:
545
+ draw_axis(elem['yaw'][0], elem['pitch'][0], elem['roll'][0], img, elem['center_xy'][0], elem['center_xy'][1])
546
+ cv2.imshow("", img)
547
+ cv2.waitKey(0)
548
+
549
+ return
550
+
551
+
552
+ def points_on_circumference(center=(0, 0), r=50, n=100):
553
+ return [(center[0] + (cos(2 * pi / n * x) * r), center[1] + (sin(2 * pi / n * x) * r)) for x in range(0, n + 1)]
554
+
555
+
556
+ def draw_cones(yaw, pitch, roll, unc_yaw, unc_pitch, unc_roll, image=None, tdx=None, tdy=None, size=300):
557
+ """
558
+ Draw yaw pitch and roll axis on the image if passed as input and returns the vector containing the projection of the vector on the image plane
559
+
560
+ Args:
561
+ :yaw (float): value that represents the yaw rotation of the face
562
+ :pitch (float): value that represents the pitch rotation of the face
563
+ :roll (float): value that represents the roll rotation of the face
564
+ :image (numpy.ndarray): The image where the three vector will be printed
565
+ (default is None)
566
+ :tdx (float64): x coordinate from where the vector drawing start expressed in pixel coordinates
567
+ (default is None)
568
+ :tdy (float64): y coordinate from where the vector drawing start expressed in pixel coordinates
569
+ (default is None)
570
+ :size (int): value that will be multiplied to each x, y and z value that enlarge the "vector drawing"
571
+ (default is 50)
572
+
573
+ Returns:
574
+ :list_projection_xy (list): list containing the unit vector [x, y, z]
575
+ """
576
+
577
+ pitch = pitch * np.pi / 180
578
+ yaw = -(yaw * np.pi / 180)
579
+ roll = roll * np.pi / 180
580
+
581
+ if tdx != None and tdy != None:
582
+ tdx = tdx
583
+ tdy = tdy
584
+
585
+ else:
586
+ height, width = image.shape[:2]
587
+ tdx = width / 2
588
+ tdy = height / 2
589
+
590
+ # PROJECT 3D TO 2D XY plane (Z = 0)
591
+
592
+ # X-Axis pointing to right. drawn in red
593
+ x1 = size * (cos(yaw) * cos(roll)) + tdx
594
+ y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
595
+
596
+ # Y-Axis | drawn in green
597
+ x2 = size * (-cos(yaw) * sin(roll)) + tdx
598
+ y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
599
+
600
+ # Z-Axis (out of the screen) drawn in blue
601
+ x3 = size * (sin(yaw)) + tdx
602
+ y3 = size * (-cos(yaw) * sin(pitch)) + tdy
603
+ z3 = size * (cos(pitch) * cos(yaw)) + tdy
604
+
605
+ unc_mean = (unc_yaw + unc_pitch + unc_roll) / 3
606
+
607
+ radius = 12 * unc_mean
608
+
609
+ overlay = image.copy()
610
+ if image is not None:
611
+ # cv2.line(image, (int(tdx), int(tdy)), (int(x1), int(y1)), (0, 0, 255), 2)
612
+ # cv2.line(image, (int(tdx), int(tdy)), (int(x2), int(y2)), (0, 255, 0), 2)
613
+ cv2.line(image, (int(tdx), int(tdy)), (int(x3), int(y3)), (255, 0, 0), 2)
614
+
615
+ points = points_on_circumference((int(x3), int(y3)), radius, 400)
616
+
617
+ for point in points:
618
+ cv2.line(image, (int(tdx), int(tdy)), (int(point[0]), int(point[1])), (255, 0, 0), 2)
619
+
620
+ # cv2.circle(image, (int(x3), int(y3)), int(radius), (255, 0, 0), 2)
621
+
622
+ alpha = 0.5
623
+ image = cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0)
624
+
625
+ # cv2.imshow("cc", image)
626
+ # cv2.waitKey(0)
627
+ # exit()
628
+
629
+ list_projection_xy = [sin(yaw), -cos(yaw) * sin(pitch)]
630
+ return list_projection_xy, image
631
+
632
+ def draw_axis_3d(yaw, pitch, roll, image=None, tdx=None, tdy=None, size=50, yaw_uncertainty=-1, pitch_uncertainty=-1, roll_uncertainty=-1):
633
+ """
634
+ Draw yaw pitch and roll axis on the image if passed as input and returns the vector containing the projection of the vector on the image plane
635
+ Args:
636
+ :yaw (float): value that represents the yaw rotation of the face
637
+ :pitch (float): value that represents the pitch rotation of the face
638
+ :roll (float): value that represents the roll rotation of the face
639
+ :image (numpy.ndarray): The image where the three vector will be printed
640
+ (default is None)
641
+ :tdx (float64): x coordinate from where the vector drawing start expressed in pixel coordinates
642
+ (default is None)
643
+ :tdy (float64): y coordinate from where the vector drawing start expressed in pixel coordinates
644
+ (default is None)
645
+ :size (int): value that will be multiplied to each x, y and z value that enlarge the "vector drawing"
646
+ (default is 50)
647
+ Returns:
648
+ :list_projection_xy (list): list containing the unit vector [x, y, z]
649
+ """
650
+ pitch = pitch * np.pi / 180
651
+ yaw = -(yaw * np.pi / 180)
652
+ roll = roll * np.pi / 180
653
+ # print(yaw, pitch, roll)
654
+ if tdx != None and tdy != None:
655
+ tdx = tdx
656
+ tdy = tdy
657
+ else:
658
+ height, width = image.shape[:2]
659
+ tdx = width / 2
660
+ tdy = height / 2
661
+ # PROJECT 3D TO 2D XY plane (Z = 0)
662
+ # X-Axis pointing to right. drawn in red
663
+ x1 = size * (cos(yaw) * cos(roll)) + tdx
664
+ y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
665
+ # Y-Axis | drawn in green
666
+ x2 = size * (-cos(yaw) * sin(roll)) + tdx
667
+ y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
668
+ # Z-Axis (out of the screen) drawn in blue
669
+ x3 = size * (sin(yaw)) + tdx
670
+ y3 = size * (-cos(yaw) * sin(pitch)) + tdy
671
+ z3 = size * (cos(pitch) * cos(yaw)) + tdy
672
+ if image is not None:
673
+ cv2.line(image, (int(tdx), int(tdy)), (int(x1), int(y1)), (0, 0, 255), 2)
674
+ cv2.line(image, (int(tdx), int(tdy)), (int(x2), int(y2)), (0, 255, 0), 2)
675
+ cv2.line(image, (int(tdx), int(tdy)), (int(x3), int(y3)), (255, 0, 0), 2)
676
+ return image
utils/labels.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ coco_category_index = {
2
+ 1: {'id': 1, 'name': 'person'},
3
+ 2: {'id': 2, 'name': 'bicycle'},
4
+ 3: {'id': 3, 'name': 'car'},
5
+ 4: {'id': 4, 'name': 'motorcycle'},
6
+ 5: {'id': 5, 'name': 'airplane'},
7
+ 6: {'id': 6, 'name': 'bus'},
8
+ 7: {'id': 7, 'name': 'train'},
9
+ 8: {'id': 8, 'name': 'truck'},
10
+ 9: {'id': 9, 'name': 'boat'},
11
+ 10: {'id': 10, 'name': 'traffic light'},
12
+ 11: {'id': 11, 'name': 'fire hydrant'},
13
+ 13: {'id': 13, 'name': 'stop sign'},
14
+ 14: {'id': 14, 'name': 'parking meter'},
15
+ 15: {'id': 15, 'name': 'bench'},
16
+ 16: {'id': 16, 'name': 'bird'},
17
+ 17: {'id': 17, 'name': 'cat'},
18
+ 18: {'id': 18, 'name': 'dog'},
19
+ 19: {'id': 19, 'name': 'horse'},
20
+ 20: {'id': 20, 'name': 'sheep'},
21
+ 21: {'id': 21, 'name': 'cow'},
22
+ 22: {'id': 22, 'name': 'elephant'},
23
+ 23: {'id': 23, 'name': 'bear'},
24
+ 24: {'id': 24, 'name': 'zebra'},
25
+ 25: {'id': 25, 'name': 'giraffe'},
26
+ 27: {'id': 27, 'name': 'backpack'},
27
+ 28: {'id': 28, 'name': 'umbrella'},
28
+ 31: {'id': 31, 'name': 'handbag'},
29
+ 32: {'id': 32, 'name': 'tie'},
30
+ 33: {'id': 33, 'name': 'suitcase'},
31
+ 34: {'id': 34, 'name': 'frisbee'},
32
+ 35: {'id': 35, 'name': 'skis'},
33
+ 36: {'id': 36, 'name': 'snowboard'},
34
+ 37: {'id': 37, 'name': 'sports ball'},
35
+ 38: {'id': 38, 'name': 'kite'},
36
+ 39: {'id': 39, 'name': 'baseball bat'},
37
+ 40: {'id': 40, 'name': 'baseball glove'},
38
+ 41: {'id': 41, 'name': 'skateboard'},
39
+ 42: {'id': 42, 'name': 'surfboard'},
40
+ 43: {'id': 43, 'name': 'tennis racket'},
41
+ 44: {'id': 44, 'name': 'bottle'},
42
+ 46: {'id': 46, 'name': 'wine glass'},
43
+ 47: {'id': 47, 'name': 'cup'},
44
+ 48: {'id': 48, 'name': 'fork'},
45
+ 49: {'id': 49, 'name': 'knife'},
46
+ 50: {'id': 50, 'name': 'spoon'},
47
+ 51: {'id': 51, 'name': 'bowl'},
48
+ 52: {'id': 52, 'name': 'banana'},
49
+ 53: {'id': 53, 'name': 'apple'},
50
+ 54: {'id': 54, 'name': 'sandwich'},
51
+ 55: {'id': 55, 'name': 'orange'},
52
+ 56: {'id': 56, 'name': 'broccoli'},
53
+ 57: {'id': 57, 'name': 'carrot'},
54
+ 58: {'id': 58, 'name': 'hot dog'},
55
+ 59: {'id': 59, 'name': 'pizza'},
56
+ 60: {'id': 60, 'name': 'donut'},
57
+ 61: {'id': 61, 'name': 'cake'},
58
+ 62: {'id': 62, 'name': 'chair'},
59
+ 63: {'id': 63, 'name': 'couch'},
60
+ 64: {'id': 64, 'name': 'potted plant'},
61
+ 65: {'id': 65, 'name': 'bed'},
62
+ 67: {'id': 67, 'name': 'dining table'},
63
+ 70: {'id': 70, 'name': 'toilet'},
64
+ 72: {'id': 72, 'name': 'tv'},
65
+ 73: {'id': 73, 'name': 'laptop'},
66
+ 74: {'id': 74, 'name': 'mouse'},
67
+ 75: {'id': 75, 'name': 'remote'},
68
+ 76: {'id': 76, 'name': 'keyboard'},
69
+ 77: {'id': 77, 'name': 'cell phone'},
70
+ 78: {'id': 78, 'name': 'microwave'},
71
+ 79: {'id': 79, 'name': 'oven'},
72
+ 80: {'id': 80, 'name': 'toaster'},
73
+ 81: {'id': 81, 'name': 'sink'},
74
+ 82: {'id': 82, 'name': 'refrigerator'},
75
+ 84: {'id': 84, 'name': 'book'},
76
+ 85: {'id': 85, 'name': 'clock'},
77
+ 86: {'id': 86, 'name': 'vase'},
78
+ 87: {'id': 87, 'name': 'scissors'},
79
+ 88: {'id': 88, 'name': 'teddy bear'},
80
+ 89: {'id': 89, 'name': 'hair drier'},
81
+ 90: {'id': 90, 'name': 'toothbrush'},
82
+ }
83
+
84
+ rgb_colors = {
85
+ 1: (240, 248, 255),
86
+ 2: (250, 235, 215),
87
+ 3: (0, 255, 255),
88
+ 4: (127, 255, 212),
89
+ 5: (240, 255, 255),
90
+ 6: (245, 245, 220),
91
+ 7: (255, 228, 196),
92
+ 8: (255, 255, 255),
93
+ 9: (255, 235, 205),
94
+ 10: (0, 0, 255),
95
+ 11: (138, 43, 226),
96
+ 12: (165, 42, 42),
97
+ 13: (222, 184, 135),
98
+ 14: (95, 158, 160),
99
+ 15: (127, 255, 0),
100
+ 16: (210, 105, 30),
101
+ 17: (255, 127, 80),
102
+ 18: (100, 149, 237),
103
+ 19: (255, 248, 220),
104
+ 20: (220, 20, 60),
105
+ 21: (0, 255, 255),
106
+ 22: (0, 0, 139),
107
+ 23: (0, 139, 139),
108
+ 24: (184, 134, 11),
109
+ 25: (169, 169, 169),
110
+ 26: (0, 100, 0),
111
+ 27: (169, 169, 169),
112
+ 28: (189, 183, 107),
113
+ 29: (139, 0, 139),
114
+ 30: (85, 107, 47),
115
+ 31: (255, 140, 0),
116
+ 32: (153, 50, 204),
117
+ 33: (139, 0, 0),
118
+ 34: (233, 150, 122),
119
+ 35: (143, 188, 143),
120
+ 36: (72, 61, 139),
121
+ 37: (47, 79, 79),
122
+ 38: (47, 79, 79),
123
+ 39: (0, 206, 209),
124
+ 40: (148, 0, 211),
125
+ 41: (255, 20, 147),
126
+ 42: (0, 191, 255),
127
+ 43: (105, 105, 105),
128
+ 44: (105, 105, 105),
129
+ 45: (30, 144, 255),
130
+ 46: (178, 34, 34),
131
+ 47: (255, 250, 240),
132
+ 48: (34, 139, 34),
133
+ 49: (255, 0, 255),
134
+ 50: (220, 220, 220),
135
+ 51: (248, 248, 255),
136
+ 52: (255, 215, 0),
137
+ 53: (218, 165, 32),
138
+ 54: (128, 128, 128),
139
+ 55: (0, 128, 0),
140
+ 56: (173, 255, 47),
141
+ 57: (128, 128, 128),
142
+ 58: (240, 255, 240),
143
+ 59: (255, 105, 180),
144
+ 60: (205, 92, 92),
145
+ 61: (75, 0, 130),
146
+ 62: (255, 0, 122),
147
+ 63: (240, 230, 140),
148
+ 64: (230, 230, 250),
149
+ 65: (255, 240, 245),
150
+ 66: (124, 252, 0),
151
+ 67: (255, 250, 205),
152
+ 68: (173, 216, 230),
153
+ 69: (240, 128, 128),
154
+ 70: (224, 255, 255),
155
+ 71: (250, 250, 210),
156
+ 72: (211, 211, 211),
157
+ 73: (144, 238, 144),
158
+ 74: (211, 211, 211),
159
+ 75: (255, 182, 193),
160
+ 76: (255, 160, 122),
161
+ 77: (32, 178, 170),
162
+ 78: (135, 206, 250),
163
+ 79: (119, 136, 153),
164
+ 80: (119, 136, 153),
165
+ 81: (176, 196, 222),
166
+ 82: (255, 255, 224),
167
+ 83: (0, 255, 0),
168
+ 84: (50, 205, 50),
169
+ 85: (250, 240, 230),
170
+ 86: (255, 0, 255),
171
+ 87: (128, 0, 0),
172
+ 88: (102, 205, 170),
173
+ 89: (0, 0, 205),
174
+ 90: (186, 85, 211),
175
+ }
176
+
177
+ color_pose = {
178
+ "purple": (255, 0, 100),
179
+ "light_pink": (80, 0, 255),
180
+ "dark_pink": (220, 0, 255),
181
+ "light_orange": (0, 80, 255),
182
+ "dark_orange": (255, 220, 0.),
183
+ "yellow": (0, 220, 255),
184
+ "blue": (255, 0, 0),
185
+ "green": (0,255,0),
186
+ }
187
+
188
+ color_pose_normalized = {
189
+ "purple": (100/255., 0/255., 255/255.),
190
+ "light_pink": (255/255., 0/255., 80/255.),
191
+ "dark_pink": (255/255., 0/255., 220/255.),
192
+ "light_orange": (255/255., 80/255., 0/255.),
193
+ "dark_orange": (255/255., 220/255., 0/255.),
194
+ "blue": (0/255., 0/255., 255/255.)
195
+ }
196
+
197
+ pose_id_part = {
198
+ 0: "Nose",# purple
199
+ 1: "LEye",#light_pink
200
+ 2: "REye",#dark_pink
201
+ 3: "LEar",#light_orange
202
+ 4: "REar",#yellow
203
+ 5: "LShoulder",
204
+ 6: "RShoulder",
205
+ 7: "LElbow",
206
+ 8: "RElbow",
207
+ 9: "LWrist",
208
+ 10: "RWrist",
209
+ 11: "LHip",
210
+ 12: "RHip",
211
+ 13: "LKnee",
212
+ 14: "RKnee",
213
+ 15: "LAnkle",
214
+ 16: "RAnkle"
215
+ }
216
+
217
+ rev_pose_id_part = {value: key for key, value in pose_id_part.items()}
218
+
219
+ pose_id_part_openpose = {
220
+ 0: "Nose",
221
+ 1: "Neck",
222
+ 2: "RShoulder",
223
+ 3: "RElbow",
224
+ 4: "RWrist",
225
+ 5: "LShoulder",
226
+ 6: "LElbow",
227
+ 7: "LWrist",
228
+ 8: "MidHip",
229
+ 9: "RHip",
230
+ 10: "RKnee",
231
+ 11: "RAnkle",
232
+ 12: "LHip",
233
+ 13: "LKnee",
234
+ 14: "LAnkle",
235
+ 15: "REye",
236
+ 16: "LEye",
237
+ 17: "REar",
238
+ 18: "LEar",
239
+ 19: "LBigToe",
240
+ 20: "LSmallToe",
241
+ 21: "LHeel",
242
+ 22: "RBigToe",
243
+ 23: "RSmallToe",
244
+ 24: "RHeel",
245
+ 25: "Background"
246
+ }
247
+
248
+ pose_id_part_zedcam = {
249
+ 0: "Nose",
250
+ 1: "Neck",
251
+ 2: "RShoulder",
252
+ 3: "RElbow",
253
+ 4: "RWrist",
254
+ 5: "LShoulder",
255
+ 6: "LElbow",
256
+ 7: "LWrist",
257
+ 8: "RHip",
258
+ 9: "RKnee",
259
+ 10: "RAnkle",
260
+ 11: "LHip",
261
+ 12: "LKnee",
262
+ 13: "LAnkle",
263
+ 14: "REye",
264
+ 15: "LEye",
265
+ 16: "REar",
266
+ 17: "LEar",
267
+ }
268
+ pose_id_part_centernet = {
269
+ 0: "Nose",
270
+ 1: "Neck",
271
+ 2: "RShoulder",
272
+ 3: "RElbow",
273
+ 4: "RWrist",
274
+ 5: "LShoulder",
275
+ 6: "LElbow",
276
+ 7: "LWrist",
277
+ 8: "MidHip",
278
+ 9: "RHip",
279
+ 10: "RKnee",
280
+ 11: "RAnkle",
281
+ 12: "LHip",
282
+ 13: "LKnee",
283
+ 14: "LAnkle",
284
+ 15: "REye",
285
+ 16: "LEye",
286
+ 17: "REar",
287
+ 18: "LEar",
288
+ 19: "LBigToe",
289
+ 20: "LSmallToe",
290
+ 21: "LHeel",
291
+ 22: "RBigToe",
292
+ 23: "RSmallToe",
293
+ 24: "RHeel",
294
+ 25: "Background"
295
+ }
296
+
297
+ rev_pose_id_part_openpose = {value: key for key, value in pose_id_part_openpose.items()}
298
+
299
+ face_category_index = {
300
+ 1: {'id': 1, 'name': 'Face'},
301
+ }
302
+
303
+ tracking_colors = {
304
+ 0: (255, 0, 0),
305
+ 1: (0, 255, 0),
306
+ 2: (0, 0, 255),
307
+ 3: (255, 0, 255),
308
+ 4: (255, 255, 0),
309
+ 5: (0, 255, 255),
310
+ 6: (255, 255, 255),
311
+ 7: (0, 0, 0),
312
+ 8: (128, 128, 128),
313
+ 9: (128, 0, 0),
314
+ 10: (0, 128, 0),
315
+ 11: (0, 0, 128),
316
+ 12: (128, 128, 0),
317
+ 13: (128, 0, 128),
318
+ 14: (0, 128, 128),
319
+ }
320
+
321
+ body_parts = [(5, 6), (5, 7), (6, 8), (7, 9), (8, 10), (11, 12), (5, 11), (6, 12), (11, 13), (12, 14), (13, 15), (14, 16)]
322
+
323
+ body_parts_openpose = [(5, 2), (5, 6), (2, 3), (6, 7), (3, 4), (12, 9), (5, 12), (2, 9), (12, 13), (9, 10), (13, 14),
324
+ (10, 11)]
325
+
326
+ body_parts_zedcam = [(5, 2), (5, 6), (2, 3), (6, 7), (3, 4), (11, 8), (5, 11), (2, 8), (11, 12), (8, 9), (12, 13),
327
+ (9, 10)]
328
+
329
+ face_points = [0, 1, 2, 3, 4]
330
+
331
+ face_points_openpose = [0, 16, 15, 18, 17]
332
+
333
+ face_points_zedcam = [0, 14, 15, 16, 17]
utils/my_utils.py ADDED
@@ -0,0 +1,1375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from scipy.spatial import distance as dist
3
+ from utils.labels import pose_id_part, pose_id_part_openpose, rev_pose_id_part_openpose, rev_pose_id_part
4
+ import cv2
5
+ import os
6
+ import json
7
+
8
+
9
+ def rescale_bb(boxes, pad, im_width, im_height):
10
+ """
11
+ Modify in place the bounding box coordinates (percentage) to the new image width and height
12
+
13
+ Args:
14
+ :boxes (numpy.ndarray): Array of bounding box coordinates expressed in percentage [y_min, x_min, y_max, x_max]
15
+ :pad (tuple): The first element represents the right padding (applied by resize_preserving_ar() function);
16
+ the second element represents the bottom padding (applied by resize_preserving_ar() function) and
17
+ the third element is a tuple that is the shape of the image after resizing without the padding (this is useful for
18
+ the coordinates changes)
19
+ :im_width (int): The new image width
20
+ :im_height (int): The new image height
21
+
22
+ Returns:
23
+ """
24
+
25
+ right_padding = pad[0]
26
+ bottom_padding = pad[1]
27
+
28
+ if bottom_padding != 0:
29
+ for box in boxes:
30
+ y_min, y_max = box[0] * im_height, box[2] * im_height # to pixels
31
+ box[0], box[2] = y_min / (im_height - pad[1]), y_max / (im_height - pad[1]) # back to percentage
32
+
33
+ if right_padding != 0:
34
+ for box in boxes:
35
+ x_min, x_max = box[1] * im_width, box[3] * im_width # to pixels
36
+ box[1], box[3] = x_min / (im_width - pad[0]), x_max / (im_width - pad[0]) # back to percentage
37
+
38
+
39
+ def rescale_key_points(key_points, pad, im_width, im_height):
40
+ """
41
+ Modify in place the bounding box coordinates (percentage) to the new image width and height
42
+
43
+ Args:
44
+ :key_points (numpy.ndarray): Array of bounding box coordinates expressed in percentage [y_min, x_min, y_max, x_max]
45
+ :pad (tuple): The first element represents the right padding (applied by resize_preserving_ar() function);
46
+ the second element represents the bottom padding (applied by resize_preserving_ar() function) and
47
+ the third element is a tuple that is the shape of the image after resizing without the padding (this is useful for
48
+ the coordinates changes)
49
+ :im_width (int): The new image width
50
+ :im_height (int): The new image height
51
+
52
+ Returns:
53
+ """
54
+
55
+ right_padding = pad[0]
56
+ bottom_padding = pad[1]
57
+
58
+ if bottom_padding != 0:
59
+ for aux in key_points:
60
+ for point in aux: # x 1 y 0
61
+ y = point[0] * im_height
62
+ point[0] = y / (im_height - pad[1])
63
+
64
+ if right_padding != 0:
65
+ for aux in key_points:
66
+ for point in aux:
67
+ x = point[1] * im_width
68
+ point[1] = x / (im_width - pad[0])
69
+
70
+
71
+ def change_coordinates_aspect_ratio(aux_key_points_array, img_person, img_person_resized):
72
+ """
73
+
74
+ Args:
75
+ :
76
+
77
+ Returns:
78
+ :
79
+ """
80
+
81
+ aux_key_points_array_ratio = []
82
+ ratio_h, ratio_w = img_person.shape[0] / (img_person_resized.shape[1]), img_person.shape[1] / (img_person_resized.shape[2]) # shape 0 batch 1
83
+
84
+ for elem in aux_key_points_array:
85
+ aux = np.zeros(3)
86
+ aux[0] = int((elem[0]) * ratio_h)
87
+ aux[1] = int(elem[1] * ratio_h)
88
+ aux[2] = int(elem[2])
89
+ aux_key_points_array_ratio.append(aux)
90
+
91
+ aux_key_points_array_ratio = np.array(aux_key_points_array_ratio, dtype=int)
92
+
93
+ return aux_key_points_array_ratio
94
+
95
+
96
+ def parse_output_pose(heatmaps, offsets, threshold):
97
+ """
98
+ Parse the output pose (auxiliary function for tflite models)
99
+ Args:
100
+ :
101
+
102
+ Returns:
103
+ :
104
+ """
105
+ #
106
+ # heatmaps: 9x9x17 probability of appearance of each keypoint in the particular part of the image (9,9) -> used to locate position of the joints
107
+ # offsets: 9x9x34 used for calculation of the keypoint's position (first 17 x coords, the second 17 y coords)
108
+ #
109
+ joint_num = heatmaps.shape[-1]
110
+ pose_kps = np.zeros((joint_num, 3), np.uint32)
111
+
112
+ for i in range(heatmaps.shape[-1]):
113
+ joint_heatmap = heatmaps[..., i]
114
+ max_val_pos = np.squeeze(np.argwhere(joint_heatmap == np.max(joint_heatmap)))
115
+ remap_pos = np.array(max_val_pos / 8 * 257, dtype=np.int32)
116
+ pose_kps[i, 0] = int(remap_pos[0] + offsets[max_val_pos[0], max_val_pos[1], i])
117
+ pose_kps[i, 1] = int(remap_pos[1] + offsets[max_val_pos[0], max_val_pos[1], i + joint_num])
118
+ max_prob = np.max(joint_heatmap)
119
+
120
+ if max_prob > threshold:
121
+ if pose_kps[i, 0] < 257 and pose_kps[i, 1] < 257:
122
+ pose_kps[i, 2] = 1
123
+
124
+ return pose_kps
125
+
126
+
127
+ def retrieve_xyz_from_detection(points_list, point_cloud_img):
128
+ """
129
+ Retrieve the xyz of the list of points passed as input (if we have the point cloud of the image)
130
+ Args:
131
+ :points_list (list): list of points for which we want to retrieve xyz information
132
+ :point_cloud_img (numpy.ndarray): numpy array containing XYZRGBA information of the image
133
+
134
+ Returns:
135
+ :xyz (list): list of lists of 3D points with XYZ information (left camera origin (0,0,0))
136
+ """
137
+
138
+ xyz = [[point_cloud_img[:, :, 0][point[1], point[0]], point_cloud_img[:, :, 1][point[1], point[0]], point_cloud_img[:, :, 2][point[1], point[0]]]
139
+ for point in points_list]
140
+ return xyz
141
+
142
+
143
+ def retrieve_xyz_pose_points(point_cloud_image, key_points_score, key_points):
144
+ """Retrieve the key points from the point cloud to get the XYZ position in the 3D space
145
+
146
+ Args:
147
+ :point_cloud_image (numpy.ndarray):
148
+ :key_points_score (list):
149
+ :key_points (list):
150
+
151
+ Returns:
152
+ :xyz_pose: a list of lists representing the XYZ 3D coordinates of each key point (j is the index number of the id pose)
153
+ """
154
+ xyz_pose = []
155
+
156
+ for i in range(len(key_points_score)):
157
+ xyz_pose_aux = []
158
+ for j in range(len(key_points_score[i])):
159
+ # if key_points_score[i][j] > threshold:# and j < 5:
160
+ x, y = int(key_points[i][j][0] * point_cloud_image.shape[0]) - 1, int(key_points[i][j][1] * point_cloud_image.shape[1]) - 1
161
+ xyz_pose_aux.append([point_cloud_image[x, y, 0], point_cloud_image[x, y, 1], point_cloud_image[x, y, 2], key_points_score[i][j]])
162
+
163
+ xyz_pose.append(xyz_pose_aux)
164
+ return xyz_pose
165
+
166
+
167
+ def compute_distance(points_list, min_distance=1.5):
168
+ """
169
+ Compute the distance between each point and find if there are points that are closer to each other that do not respect a certain distance
170
+ expressed in meter.
171
+
172
+ Args:
173
+ :points_list (list): list of points expressed in xyz 3D coordinates (meters)
174
+ :min_distance (float): minimum threshold for distances (if the l2 distance between two objects is lower than this value it is considered a violation)
175
+ (default is 1.5)
176
+
177
+ Returns:
178
+ :distance_matrix: matrix containing the distances between each points (diagonal 0)
179
+ :violate: set of points that violate the minimum distance threshold
180
+ :couple_points: list of lists of couple points that violate the min_distance threshold (to keep track of each couple)
181
+ """
182
+
183
+ if points_list is None or len(points_list) == 1 or len(points_list) == 0:
184
+ return None, None, None
185
+ else: # if there are more than two points
186
+ violate = set()
187
+ couple_points = []
188
+ aux = np.array(points_list)
189
+ distance_matrix = dist.cdist(aux, aux, 'euclidean')
190
+ for i in range(0, distance_matrix.shape[0]): # loop over the upper triangular of the distance matrix
191
+ for j in range(i + 1, distance_matrix.shape[1]):
192
+ if distance_matrix[i, j] < min_distance:
193
+ # print("Distance between {} and {} is {:.2f} meters".format(i, j, distance_matrix[i, j]))
194
+ violate.add(i)
195
+ violate.add(j)
196
+ couple_points.append((i, j))
197
+
198
+ return distance_matrix, violate, couple_points
199
+
200
+
201
+ def initialize_video_recorder(output_path, output_depth_path, fps, shape):
202
+ """Initialize OpenCV video recorders that will be used to write each image/frame to a single video
203
+
204
+ Args:
205
+ :output (str): The file location where the recorded video will be saved
206
+ :output_depth (str): The file location where the recorded video with depth information will be saved
207
+ :fps (int): The frame per seconds of the output videos
208
+ :shape (tuple): The dimension of the output video (width, height)
209
+
210
+ Returns:
211
+ :writer (cv2.VideoWriter): The video writer used to save the video
212
+ :writer_depth (cv2.VideoWriter): The video writer used to save the video with depth information
213
+ """
214
+
215
+ if not os.path.isdir(os.path.split(output_path)[0]):
216
+ logger.error("Invalid path for the video writer; folder does not exist")
217
+ exit(1)
218
+
219
+ fourcc = cv2.VideoWriter_fourcc(*"MJPG")
220
+ writer = cv2.VideoWriter(output_path, fourcc, fps, shape, True)
221
+ writer_depth = None
222
+
223
+ if output_depth_path:
224
+ if not os.path.isdir(os.path.split(output_depth_path)[0]):
225
+ logger.error("Invalid path for the depth video writer; folder does not exist")
226
+ exit(1)
227
+ writer_depth = cv2.VideoWriter(output_depth_path, fourcc, fps, shape, True)
228
+
229
+ return writer, writer_depth
230
+
231
+
232
+ def delete_items_from_array_aux(arr, i):
233
+ """
234
+ Auxiliary function that delete the item at a certain index from a numpy array
235
+
236
+ Args:
237
+ :arr (numpy.ndarray): Array of array where each element correspond to the four coordinates of bounding box expressed in percentage
238
+ :i (int): Index of the element to be deleted
239
+
240
+ Returns:
241
+ :arr_ret: the array without the element at index i
242
+ """
243
+
244
+ aux = arr.tolist()
245
+ aux.pop(i)
246
+ arr_ret = np.array(aux)
247
+ return arr_ret
248
+
249
+
250
+ def fit_plane_least_square(xyz):
251
+ # find a plane that best fit xyz points using least squares
252
+ (rows, cols) = xyz.shape
253
+ g = np.ones((rows, 3))
254
+ g[:, 0] = xyz[:, 0] # X
255
+ g[:, 1] = xyz[:, 1] # Y
256
+ z = xyz[:, 2]
257
+ (a, b, c), _, rank, s = np.linalg.lstsq(g, z, rcond=None)
258
+
259
+ normal = (a, b, -1)
260
+ nn = np.linalg.norm(normal)
261
+ normal = normal / nn
262
+ point = np.array([0.0, 0.0, c])
263
+ d = -point.dot(normal)
264
+ return d, normal, point
265
+
266
+
267
+ #
268
+ # def plot_plane(data, normal, d):
269
+ # from mpl_toolkits.mplot3d import Axes3D
270
+ # import matplotlib.pyplot as plt
271
+ #
272
+ # fig = plt.figure()
273
+ # ax = fig.gca(projection='3d')
274
+ #
275
+ # # plot fitted plane
276
+ # maxx = np.max(data[:, 0])
277
+ # maxy = np.max(data[:, 1])
278
+ # minx = np.min(data[:, 0])
279
+ # miny = np.min(data[:, 1])
280
+ #
281
+ # # compute needed points for plane plotting
282
+ # xx, yy = np.meshgrid([minx - 10, maxx + 10], [miny - 10, maxy + 10])
283
+ # z = (-normal[0] * xx - normal[1] * yy - d) * 1. / normal[2]
284
+ #
285
+ # # plot plane
286
+ # ax.plot_surface(xx, yy, z, alpha=0.2)
287
+ #
288
+ # ax.set_xlabel('x')
289
+ # ax.set_ylabel('y')
290
+ # ax.set_zlabel('z')
291
+ # plt.show()
292
+ #
293
+ # return
294
+
295
+
296
+ def shape_to_np(shape, dtype="int"):
297
+ """
298
+ Function used for the dlib facial detector; it determine the facial landmarks for the face region, then convert the facial landmark
299
+ (x, y)-coordinates to a NumPy array
300
+
301
+ Args:
302
+ :shape ():
303
+ :dtype ():
304
+ (Default is "int")
305
+
306
+ Returns:
307
+ :coordinates (list): list of x, y coordinates
308
+ """
309
+ # initialize the list of (x, y)-coordinates
310
+ coordinates = np.zeros((68, 2), dtype=dtype)
311
+ # loop over the 68 facial landmarks and convert them to a 2-tuple of (x, y)-coordinates
312
+ for i in range(0, 68):
313
+ coordinates[i] = (shape.part(i).x, shape.part(i).y)
314
+ # return the list of (x, y)-coordinates
315
+ return coordinates
316
+
317
+
318
+ def rect_to_bb(rect):
319
+ """
320
+ Function used for the dlib facial detector; it converts dlib's rectangle to a tuple (x, y, w, h) where x and y represent xmin and ymin
321
+ coordinates while w and h represent the width and the height
322
+
323
+ Args:
324
+ :rect (dlib.rectangle): dlib rectangle object that represents the region of the image where a face is detected
325
+
326
+ Returns:
327
+ :res (tuple): tuple that represents the region of the image where a face is detected in the form x, y, w, h
328
+ """
329
+ # take a bounding predicted by dlib and convert it to the format (x, y, w, h) as we would normally do with OpenCV
330
+ x = rect.left()
331
+ y = rect.top()
332
+ w = rect.right() - x
333
+ h = rect.bottom() - y
334
+ # return a tuple of (x, y, w, h)
335
+ res = x, y, w, h
336
+ return res
337
+
338
+
339
+ def enlarge_bb(y_min, x_min, y_max, x_max, im_width, im_height):
340
+ """
341
+ Enlarge the bounding box to include more background margin (used for face detection)
342
+
343
+ Args:
344
+ :y_min (int): the top y coordinate of the bounding box
345
+ :x_min (int): the left x coordinate of the bounding box
346
+ :y_max (int): the bottom y coordinate of the bounding box
347
+ :x_max (int): the right x coordinate of the bounding box
348
+ :im_width (int): The width of the image
349
+ :im_height (int): The height of the image
350
+
351
+ Returns:
352
+ :y_min (int): the top y coordinate of the bounding box after enlarging
353
+ :x_min (int): the left x coordinate of the bounding box after enlarging
354
+ :y_max (int): the bottom y coordinate of the bounding box after enlarging
355
+ :x_max (int): the right x coordinate of the bounding box after enlarging
356
+ """
357
+
358
+ y_min = int(max(0, y_min - abs(y_min - y_max) / 10))
359
+ y_max = int(min(im_height, y_max + abs(y_min - y_max) / 10))
360
+ x_min = int(max(0, x_min - abs(x_min - x_max) / 5))
361
+ x_max = int(min(im_width, x_max + abs(x_min - x_max) / 4)) # 5
362
+ x_max = int(min(x_max, im_width))
363
+ return y_min, x_min, y_max, x_max
364
+
365
+
366
+ def linear_assignment(cost_matrix):
367
+ try:
368
+ import lap
369
+ _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
370
+ return np.array([[y[i], i] for i in x if i >= 0])
371
+ except ImportError:
372
+ from scipy.optimize import linear_sum_assignment
373
+ x, y = linear_sum_assignment(cost_matrix)
374
+ return np.array(list(zip(x, y)))
375
+
376
+
377
+ def iou_batch(bb_test, bb_gt):
378
+ """
379
+ From SORT: Computes IUO between two bboxes in the form [x1,y1,x2,y2]
380
+
381
+ Args:
382
+ :bb_test ():
383
+ :bb_gt ():
384
+
385
+ Returns:
386
+
387
+ """
388
+ # print(bb_test, bb_gt)
389
+ bb_gt = np.expand_dims(bb_gt, 0)
390
+ bb_test = np.expand_dims(bb_test, 1)
391
+
392
+ xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0])
393
+ yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
394
+ xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
395
+ yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
396
+ w = np.maximum(0., xx2 - xx1)
397
+ h = np.maximum(0., yy2 - yy1)
398
+ wh = w * h
399
+ o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1]) + (bb_gt[..., 2] - bb_gt[..., 0]) * (
400
+ bb_gt[..., 3] - bb_gt[..., 1]) - wh)
401
+ return o
402
+
403
+
404
+ def convert_bbox_to_z(bbox):
405
+ """
406
+ Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
407
+ the aspect ratio
408
+
409
+ Args:
410
+ :bbox ():
411
+
412
+ Returns:
413
+
414
+ """
415
+ w = bbox[2] - bbox[0]
416
+ h = bbox[3] - bbox[1]
417
+ x = bbox[0] + w / 2.
418
+ y = bbox[1] + h / 2.
419
+ s = w * h # scale is just area
420
+ r = w / float(h) if float(h) != 0 else w
421
+ return np.array([x, y, s, r]).reshape((4, 1))
422
+
423
+
424
+ def convert_x_to_bbox(x, score=None):
425
+ """
426
+ Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
427
+ [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
428
+
429
+ Args:
430
+ :x ():
431
+ :score ():
432
+ (Default is None)
433
+
434
+ Returns:
435
+
436
+ """
437
+ w = np.sqrt(x[2] * x[3])
438
+ h = x[2] / w
439
+ if score is None:
440
+ return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2.]).reshape((1, 4))
441
+ else:
442
+ return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2., score]).reshape((1, 5))
443
+
444
+
445
+ def associate_detections_to_trackers(detections, trackers, iou_threshold=0.3):
446
+ """
447
+ Assigns detections to tracked object (both represented as bounding boxes)
448
+ Returns 3 lists of matches, unmatched_detections and unmatched_trackers
449
+
450
+ Args:
451
+ :detections ():
452
+ :trackers ():
453
+ :iou_threshold ():
454
+ (Default is 0.3)
455
+
456
+ Returns:
457
+
458
+ """
459
+ if len(trackers) == 0:
460
+ return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 5), dtype=int)
461
+
462
+ iou_matrix = iou_batch(detections, trackers)
463
+ # print("IOU MATRIX: ", iou_matrix)
464
+
465
+ if min(iou_matrix.shape) > 0:
466
+ a = (iou_matrix > iou_threshold).astype(np.int32)
467
+ if a.sum(1).max() == 1 and a.sum(0).max() == 1:
468
+ matched_indices = np.stack(np.where(a), axis=1)
469
+ else:
470
+ matched_indices = linear_assignment(-iou_matrix)
471
+ else:
472
+ matched_indices = np.empty(shape=(0, 2))
473
+
474
+ unmatched_detections = []
475
+ for d, det in enumerate(detections):
476
+ if d not in matched_indices[:, 0]:
477
+ unmatched_detections.append(d)
478
+ unmatched_trackers = []
479
+ for t, trk in enumerate(trackers):
480
+ if t not in matched_indices[:, 1]:
481
+ unmatched_trackers.append(t)
482
+
483
+ # filter out matched with low IOU
484
+ matches = []
485
+ for m in matched_indices:
486
+ if iou_matrix[m[0], m[1]] < iou_threshold:
487
+ unmatched_detections.append(m[0])
488
+ unmatched_trackers.append(m[1])
489
+ else:
490
+ matches.append(m.reshape(1, 2))
491
+ if len(matches) == 0:
492
+ matches = np.empty((0, 2), dtype=int)
493
+ else:
494
+ matches = np.concatenate(matches, axis=0)
495
+
496
+ return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
497
+
498
+
499
+ def find_face_from_key_points(key_points, bboxes, image, person=None, openpose=False, gazefollow=True):
500
+ """
501
+
502
+ Args:
503
+ key_points:
504
+ bboxes:
505
+ image:
506
+ person:
507
+ openpose:
508
+ gazefollow:
509
+
510
+ Returns:
511
+
512
+ """
513
+
514
+ im_width, im_height = image.shape[1], image.shape[0]
515
+
516
+ # key_points, bboxes = person.get_key_points()[-1], person.get_bboxes()[-1]
517
+ # print("PERSON ID:", person.get_id())
518
+
519
+ # 0 nose, 1/2 left/right eye, 3/4 left/right ear
520
+ # 5/6 leftShoulder/rightShoulder
521
+ # 7/8 leftElbow/rightElbow
522
+ # 9/10 leftWrist/rightWrist
523
+ # 11/12 leftHip/rightHip
524
+ # 13/14 leftKnee/rightKnee
525
+ # 15/16 leftAnkle/rightAnkle
526
+ # print(key_points)
527
+
528
+ face_points = key_points[:7]
529
+
530
+ if openpose:
531
+ face_points = []
532
+ for point in key_points[:7]:
533
+ # print(point[2], type(point[2]))
534
+ if point[2] > 0.0:
535
+ face_points.append(point)
536
+ # print("face1", face_points)
537
+
538
+ if len(face_points) == 0:
539
+ return None, []
540
+
541
+ # print("bboxe", bboxes, face_points)
542
+ if not gazefollow:
543
+ ct = compute_centroid(face_points)
544
+
545
+ x_min, y_min = ct[0] - 10, ct[1] - 15
546
+ x_max, y_max = ct[0] + 10, ct[1] + 10
547
+
548
+ y_min_bbox = y_min
549
+
550
+ elif gazefollow:
551
+ # [l_shoulder, r_shoulder] = key_points[5:]
552
+ # print(l_shoulder, r_shoulder)
553
+ print("FACE", face_points)
554
+ if len(face_points) == 1:
555
+ return None, []
556
+
557
+ x_min, y_min, _ = np.amin(face_points, axis=0)
558
+ x_max, y_max, _ = np.amax(face_points, axis=0)
559
+
560
+ # aux_diff =
561
+ # print("X: ", aux_diff)
562
+ # if aux_diff < 20:
563
+ # x_max += 20
564
+ # x_min -= 20
565
+
566
+ aux_diff = y_max - y_min
567
+ print("y: ", aux_diff)
568
+ if aux_diff < 50: # rapporto xmax -xmin o altro
569
+ y_max += (x_max - x_min) / 1.4
570
+ y_min -= (x_max - x_min) / 1.2
571
+ # x_min -= 10
572
+ # x_max += 10
573
+
574
+ y_min_bbox = int(y_min) # int(bboxes[1]) if bboxes is not None else y_min - (x_max-x_min)
575
+ # if bboxes is None:
576
+ # y_max = y_max + (x_max-x_min)
577
+
578
+ y_min, x_min, y_max, x_max = enlarge_bb(y_min_bbox, x_min, y_max, x_max, im_width, im_height)
579
+ # print(y_min, x_min, y_max, x_max, y_max - y_min, x_max - x_min)
580
+ # if -1 < y_max - y_min < 5 and -1 < x_max - x_min < 5: # due punti uguali
581
+ # # print("AAAAA")
582
+ # return None, []
583
+
584
+ face_image = image[y_min:y_max, x_min:x_max]
585
+
586
+ if person is not None:
587
+ # person.print_()
588
+ person.update_faces(face_image)
589
+ person.update_faces_coordinates([y_min, x_min, y_max, x_max])
590
+ # person.update_faces_key_points(face_points)
591
+ # person.print_()
592
+ return None
593
+ else:
594
+ return face_image, [y_min, x_min, y_max, x_max]
595
+
596
+
597
+ def compute_interaction_cosine(head_position, target_position, gaze_direction):
598
+ """
599
+ Computes the interaction between two people using the angle of view.
600
+ The interaction in measured as the cosine of the angle formed by the line from person A to B and the gaze direction of person A.
601
+
602
+ Args:
603
+ :head_position (list): list of pixel coordinates [x, y] that represents the position of the head of person A
604
+ :target_position (list): list of pixel coordinates [x, y] that represents the position of head of person B
605
+ :gaze_direction (list): list that represents the gaze direction of the head of person A in the form [gx, gy]
606
+
607
+ Returns:
608
+ :val (float): value that describe the quantity of interaction
609
+ """
610
+
611
+ if head_position == target_position:
612
+ return 0 # or -1
613
+ else:
614
+ # direction from observer to target
615
+ direction = np.arctan2((target_position[1] - head_position[1]), (target_position[0] - head_position[0]))
616
+ direction_gaze = np.arctan2(gaze_direction[1], gaze_direction[0])
617
+ difference = direction - direction_gaze
618
+
619
+ # difference of the line joining observer -> target with the gazing direction,
620
+ val = np.cos(difference)
621
+ if val < 0:
622
+ return 0
623
+ else:
624
+ return val
625
+
626
+
627
+ def compute_attention_from_vectors(list_objects):
628
+ """
629
+
630
+ Args:
631
+ :list_objects ():
632
+
633
+ Returns:
634
+
635
+ """
636
+
637
+ dict_person = dict()
638
+ id_list = []
639
+ for obj in list_objects:
640
+ if len(obj.get_key_points()) > 0:
641
+ # print("Object ID: ", obj.get_id(), "x: ", obj.get_poses_vector_norm()[-1][0], "y: ", obj.get_poses_vector_norm()[-1][1])
642
+ id_list.append(obj.get_id())
643
+
644
+ # print("kpts: ", obj.get_key_points()[-1])
645
+ aux = [obj.get_key_points()[-1][j][:2] for j in [0, 2, 1, 4, 3]]
646
+ dict_person[obj.get_id()] = [obj.get_poses_vector_norm()[-1], np.mean(aux, axis=0).tolist()]
647
+
648
+ attention_matrix = np.zeros((len(dict_person), len(dict_person)), dtype=np.float32)
649
+
650
+ for i in range(attention_matrix.shape[0]):
651
+ for j in range(attention_matrix.shape[1]):
652
+ if i == j:
653
+ continue
654
+ attention_matrix[i][j] = compute_interaction_cosine(dict_person[i][1], dict_person[j][1], dict_person[i][0])
655
+
656
+ return attention_matrix.tolist(), id_list
657
+
658
+
659
+ def compute_attention_ypr(list_objects):
660
+ """
661
+
662
+ Args:
663
+ :list_objects ():
664
+
665
+ Returns:
666
+ :
667
+ """
668
+
669
+ for obj in list_objects:
670
+ if len(obj.get_key_points()) > 0:
671
+ print("Object ID: ", obj.get_id(), "yaw: ", obj.get_poses_ypr()[-1][0], "pitch: ", obj.get_poses_ypr()[-1][1], "roll: ",
672
+ obj.get_poses_ypr()[-1][2])
673
+
674
+
675
+ def save_key_points_to_json(ids, kpts, path_json, openpose=False):
676
+ """
677
+ Save key points to .json format according to Openpose output format
678
+
679
+ Args:
680
+ :kpts ():
681
+ :path_json ():
682
+
683
+ Returns:
684
+ """
685
+
686
+ # print(path_json)
687
+ dict_file = {"version": 1.3}
688
+ list_dict_person = []
689
+ for j in range(len(kpts)):
690
+ dict_person = {"person_id": [int(ids[j])],
691
+ "face_keypoints_2d": [],
692
+ "hand_left_keypoints_2d": [],
693
+ "hand_right_keypoints_2d": [],
694
+ "pose_keypoints_3d": [],
695
+ "face_keypoints_3d": [],
696
+ "hand_left_keypoints_3d": [],
697
+ "hand_right_keypoints_3d": []}
698
+
699
+ kpts_openpose = np.zeros((25, 3))
700
+ for i, point in enumerate(kpts[j]):
701
+ if openpose:
702
+ idx_op = rev_pose_id_part_openpose[pose_id_part_openpose[i]]
703
+ else:
704
+ idx_op = rev_pose_id_part_openpose[pose_id_part[i]]
705
+ # print(idx_op, point[1], point[0], point[2])
706
+ kpts_openpose[idx_op] = [point[1], point[0], point[2]] # x, y, conf
707
+
708
+ list_kpts_openpose = list(np.concatenate(kpts_openpose).ravel())
709
+ dict_person["pose_keypoints_2d"] = list_kpts_openpose
710
+ # print(dict_person)
711
+ list_dict_person.append(dict_person)
712
+
713
+ dict_file["people"] = list_dict_person
714
+
715
+ # Serializing json
716
+ json_object = json.dumps(dict_file, indent=4)
717
+
718
+ # Writing to sample.json
719
+ with open(path_json, "w") as outfile:
720
+ outfile.write(json_object)
721
+
722
+
723
+ def json_to_poses(json_data):
724
+ """
725
+
726
+ Args:
727
+ :js_data ():
728
+
729
+ Returns:
730
+ :res ():
731
+ """
732
+ poses = []
733
+ confidences = []
734
+ ids = []
735
+
736
+ for arr in json_data["people"]:
737
+ ids.append(arr["person_id"])
738
+ confidences.append(arr["pose_keypoints_2d"][2::3])
739
+ aux = arr["pose_keypoints_2d"][2::3]
740
+ arr = np.delete(arr["pose_keypoints_2d"], slice(2, None, 3))
741
+ # print("B", list(zip(arr[::2], arr[1::2])))
742
+ poses.append(list(zip(arr[::2], arr[1::2], aux)))
743
+
744
+ return poses, confidences, ids
745
+
746
+
747
+ def parse_json1(aux):
748
+ # print(aux['people'])
749
+ list_kpts = []
750
+ id_list = []
751
+ for person in aux['people']:
752
+ # print(len(person['pose_keypoints_2d']))
753
+ aux = person['pose_keypoints_2d']
754
+ aux_kpts = [[aux[i+1], aux[i], aux[i+2]] for i in range(0, 75, 3)]
755
+ # print(len(aux_kpts))
756
+ list_kpts.append(aux_kpts)
757
+ id_list.append(person['person_id'])
758
+
759
+ # print(list_kpts)
760
+ return list_kpts, id_list
761
+
762
+
763
+ def load_poses_from_json1(json_filename):
764
+ """
765
+
766
+ Args:
767
+ :json_filename ():
768
+
769
+ Returns:
770
+ :poses, conf:
771
+ """
772
+ with open(json_filename) as data_file:
773
+ loaded = json.load(data_file)
774
+ zz = parse_json1(loaded)
775
+ return zz
776
+
777
+
778
+ def load_poses_from_json(json_filename):
779
+ """
780
+
781
+ Args:
782
+ :json_filename ():
783
+
784
+ Returns:
785
+ :poses, conf:
786
+ """
787
+ with open(json_filename) as data_file:
788
+ loaded = json.load(data_file)
789
+ poses, conf, ids = json_to_poses(loaded)
790
+
791
+ if len(poses) < 1: # != 1:
792
+ return None, None, None
793
+ else:
794
+ return poses, conf, ids
795
+
796
+
797
+ def compute_head_features(img, pose, conf, open_pose=True):
798
+ """
799
+
800
+ Args:
801
+ img:
802
+ pose:
803
+ conf:
804
+ open_pose:
805
+
806
+ Returns:
807
+
808
+ """
809
+
810
+ joints = [0, 15, 16, 17, 18] if open_pose else [0, 2, 1, 4, 3]
811
+
812
+ n_joints_set = [pose[joint] for joint in joints if joint_set(pose[joint])] # if open_pose else pose
813
+
814
+ if len(n_joints_set) < 1:
815
+ return None, None
816
+
817
+ centroid = compute_centroid(n_joints_set)
818
+
819
+ # for j in n_joints_set:
820
+ # print(j, centroid)
821
+ max_dist = max([dist_2D([j[0], j[1]], centroid) for j in n_joints_set])
822
+
823
+ new_repr = [(np.array([pose[joint][0], pose[joint][1]]) - np.array(centroid)) for joint in joints] if open_pose else [
824
+ (np.array(pose[i]) - np.array(centroid)) for i in range(len(n_joints_set))]
825
+ result = []
826
+
827
+ for i in range(0, 5):
828
+
829
+ if joint_set(pose[joints[i]]):
830
+ if max_dist != 0.0:
831
+ result.append([new_repr[i][0] / max_dist, new_repr[i][1] / max_dist])
832
+ else:
833
+ result.append([new_repr[i][0], new_repr[i][1]])
834
+ else:
835
+ result.append([0, 0])
836
+
837
+ flat_list = [item for sublist in result for item in sublist]
838
+
839
+ conf_list = []
840
+
841
+ for j in joints:
842
+ conf_list.append(conf[j])
843
+
844
+ return flat_list, conf_list, centroid
845
+
846
+
847
+ def compute_body_features(pose, conf):
848
+ """
849
+
850
+ Args:
851
+ pose:
852
+ conf:
853
+
854
+ Returns:
855
+
856
+ """
857
+ joints = [0, 15, 16, 17, 18]
858
+ alljoints = range(0, 25)
859
+
860
+ n_joints_set = [pose[joint] for joint in joints if joint_set(pose[joint])]
861
+
862
+ if len(n_joints_set) < 1:
863
+ return None, None
864
+
865
+ centroid = compute_centroid(n_joints_set)
866
+
867
+ n_joints_set = [pose[joint] for joint in alljoints if joint_set(pose[joint])]
868
+
869
+ max_dist = max([dist_2D(j, centroid) for j in n_joints_set])
870
+
871
+ new_repr = [(np.array(pose[joint]) - np.array(centroid)) for joint in alljoints]
872
+
873
+ result = []
874
+
875
+ for i in range(0, 25):
876
+
877
+ if joint_set(pose[i]):
878
+ result.append([new_repr[i][0] / max_dist, new_repr[i][1] / max_dist])
879
+ else:
880
+ result.append([0, 0])
881
+
882
+ flat_list = [item for sublist in result for item in sublist]
883
+
884
+ for j in alljoints:
885
+ flat_list.append(conf[j])
886
+
887
+ return flat_list, centroid
888
+
889
+
890
+ def compute_centroid(points):
891
+ """
892
+
893
+ Args:
894
+ points:
895
+
896
+ Returns:
897
+
898
+ """
899
+ x, y = [], []
900
+ for point in points:
901
+ if len(point) == 3:
902
+ if point[2] > 0.0:
903
+ x.append(point[0])
904
+ y.append(point[1])
905
+ else:
906
+ x.append(point[0])
907
+ y.append(point[1])
908
+
909
+ # print(x, y)
910
+ if x == [] or y == []:
911
+ return [None, None]
912
+ mean_x = np.mean(x)
913
+ mean_y = np.mean(y)
914
+
915
+ return [mean_x, mean_y]
916
+
917
+
918
+ def joint_set(p):
919
+ """
920
+
921
+ Args:
922
+ p:
923
+
924
+ Returns:
925
+
926
+ """
927
+ return p[0] != 0.0 or p[1] != 0.0
928
+
929
+
930
+ def dist_2D(p1, p2):
931
+ """
932
+
933
+ Args:
934
+ p1:
935
+ p2:
936
+
937
+ Returns:
938
+
939
+ """
940
+ # print(p1)
941
+ # print(p2)
942
+
943
+ p1 = np.array(p1)
944
+ p2 = np.array(p2)
945
+
946
+ squared_dist = np.sum((p1 - p2) ** 2, axis=0)
947
+ return np.sqrt(squared_dist)
948
+
949
+
950
+ def compute_head_centroid(pose):
951
+ """
952
+
953
+ Args:
954
+ pose:
955
+
956
+ Returns:
957
+
958
+ """
959
+ joints = [0, 15, 16, 17, 18]
960
+
961
+ n_joints_set = [pose[joint] for joint in joints if joint_set(pose[joint])]
962
+
963
+ # if len(n_joints_set) < 2:
964
+ # return None
965
+
966
+ centroid = compute_centroid(n_joints_set)
967
+
968
+ return centroid
969
+
970
+
971
+ def head_direction_to_json(path_json, norm_list, unc_list, ids_list, file_name):
972
+
973
+ dict_file = {}
974
+ list_dict_person = []
975
+ for k, i in enumerate(norm_list):
976
+ dict_person = {"id_person": [ids_list[k]],
977
+ "norm_xy": [i[0][0].item(), i[0][1].item()], # from numpy to native python type for json serilization
978
+ "center_xy": [int(i[1][0]), int(i[1][1])],
979
+ "uncertainty": [unc_list[k].item()]}
980
+
981
+ list_dict_person.append(dict_person)
982
+ dict_file["people"] = list_dict_person
983
+
984
+ json_object = json.dumps(dict_file, indent=4)
985
+
986
+ with open(path_json, "w") as outfile:
987
+ outfile.write(json_object)
988
+
989
+
990
+ def ypr_to_json(path_json, yaw_list, pitch_list, roll_list, yaw_u_list, pitch_u_list, roll_u_list, ids_list, center_xy):
991
+
992
+ dict_file = {}
993
+ list_dict_person = []
994
+ for k in range(len(yaw_list)):
995
+ dict_person = {"id_person": [ids_list[k]],
996
+ "yaw": [yaw_list[k].item()],
997
+ "yaw_u": [yaw_u_list[k].item()],
998
+ "pitch": [pitch_list[k].item()],
999
+ "pitch_u": [pitch_u_list[k].item()],
1000
+ "roll": [roll_list[k].item()],
1001
+ "roll_u": [roll_u_list[k].item()],
1002
+ "center_xy": [int(center_xy[k][0]), int(center_xy[k][1])]}
1003
+
1004
+ list_dict_person.append(dict_person)
1005
+ dict_file["people"] = list_dict_person
1006
+
1007
+ json_object = json.dumps(dict_file, indent=4)
1008
+
1009
+ with open(path_json, "w") as outfile:
1010
+ outfile.write(json_object)
1011
+ # exit()
1012
+
1013
+
1014
+ def save_keypoints_image(img, poses, suffix_, path_save=''):
1015
+ """
1016
+ Save the image with the key points drawn on it
1017
+ Args:
1018
+ img:
1019
+ poses:
1020
+ suffix_:
1021
+
1022
+ Returns:
1023
+
1024
+ """
1025
+ aux = img.copy()
1026
+ for point in poses:
1027
+ for i, p in enumerate(point):
1028
+ if i in [0, 15, 16, 17, 18]:
1029
+ cv2.circle(aux, (int(p[0]), int(p[1])), 2, (0, 255, 0), 2)
1030
+
1031
+ cv2.imwrite(os.path.join(path_save, suffix_ + '.jpg'), aux)
1032
+
1033
+
1034
+ def unit_vector(vector):
1035
+ """
1036
+ Returns the unit vector of the vector.
1037
+
1038
+ Args:
1039
+ vector:
1040
+
1041
+ Returns:
1042
+
1043
+ """
1044
+ return vector / np.linalg.norm(vector)
1045
+
1046
+
1047
+ def angle_between(v1, v2):
1048
+ """
1049
+ Returns the angle in radians between vectors 'v1' and 'v2'::
1050
+
1051
+ angle_between((1, 0, 0), (0, 1, 0))
1052
+ 1.5707963267948966
1053
+ angle_between((1, 0, 0), (1, 0, 0))
1054
+ 0.0
1055
+ angle_between((1, 0, 0), (-1, 0, 0))
1056
+ 3.141592653589793
1057
+ """
1058
+ # if not unit vector
1059
+ v1_u = unit_vector(tuple(v1))
1060
+ v2_u = unit_vector(tuple(v2))
1061
+ angle = np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
1062
+ return angle if angle < 1.80 else angle - 1.80
1063
+
1064
+
1065
+ def centroid_constraint(centroid, centroid_det, gazefollow=False): # x y
1066
+ """
1067
+
1068
+ Args:
1069
+ centroid:
1070
+ centroid_det:
1071
+
1072
+ Returns:
1073
+
1074
+ """
1075
+ if centroid_det == [None, None]:
1076
+ return False
1077
+
1078
+ if gazefollow == False:
1079
+ if 0 < centroid_det[0] < 143 and 0 < centroid_det[1] < 24: # centroid in the overprinted text of hour in the video
1080
+ return False
1081
+ if 0 < centroid_det[1] < 4:
1082
+ return False
1083
+ if centroid[0] - 3 < centroid_det[0] < centroid[0] + 3 and centroid[1] - 3 < centroid_det[1] < centroid[
1084
+ 1] + 3: # detected centroid near the gt centroid
1085
+ return True
1086
+ else:
1087
+ return False
1088
+ else:
1089
+ if int(centroid[0] - 30) < int(centroid_det[0]) < int(centroid[0] + 30) and int(centroid[1] - 30) < int(centroid_det[1]) < int(
1090
+ centroid[1] + 30): # detected centroid near the gt centroid
1091
+ return True
1092
+ else:
1093
+ return False
1094
+
1095
+
1096
+ def initialize_video_reader(path_video):
1097
+ """
1098
+
1099
+ Args:
1100
+ path_video:
1101
+
1102
+ Returns:
1103
+
1104
+ """
1105
+ cap = cv2.VideoCapture(path_video)
1106
+ if cap is None or not cap.isOpened():
1107
+ print('Warning: unable to open video source: ', path_video)
1108
+ exit(-1)
1109
+ return cap
1110
+
1111
+
1112
+ def distance_skeletons(kpts1, kpts2, dst_type):
1113
+ """
1114
+ Function to compute the distance between skeletons
1115
+ #TO DO
1116
+ Args:
1117
+ kpts1:
1118
+ kpts2:
1119
+ dts_type:
1120
+
1121
+ Returns:
1122
+
1123
+ """
1124
+ if len(kpts1) != len(kpts2):
1125
+ print("Error: Different notation used for keypoints")
1126
+ exit(-1)
1127
+
1128
+ print(len(kpts1), len(kpts2))
1129
+ # to openpose notations
1130
+ if len(kpts1) == len(kpts2) == 17:
1131
+ kpts1, kpts2 = kpt_centernet_to_openpose(kpts1), kpt_centernet_to_openpose(kpts2)
1132
+ print(len(kpts1), len(kpts2))
1133
+
1134
+ if len(kpts1) != 25 or len(kpts2) != 25:
1135
+ print("Error")
1136
+ exit(-1)
1137
+
1138
+ res_dist = 0
1139
+
1140
+ if dst_type == 'all_points':
1141
+ for i, _ in enumerate(kpts1):
1142
+ res_dist += dist_2D(kpts1[i][:2], kpts2[i][:2])
1143
+ res_dist /= 25
1144
+ return res_dist
1145
+
1146
+ elif dst_type == 'head_centroid':
1147
+ top1_c, top2_c = compute_head_centroid(kpts1), compute_head_centroid(kpts2)
1148
+ if top1_c == [None, None] or top2_c == [None, None]:
1149
+ res_dist = 900
1150
+ else:
1151
+ res_dist = dist_2D(top1_c[:2], top2_c[:2])
1152
+ return res_dist
1153
+
1154
+ elif dst_type == 'three_centroids':
1155
+ #TO DO
1156
+ # top1_c, top2_c = compute_centroid(kpts1[0, 15, 16, 17, 18]), compute_centroid(kpts2[0, 15, 16, 17, 18])
1157
+ # mid1_c, mid2_c = compute_centroid(kpts1[2, 5, 9, 12]), compute_centroid(kpts2[2, 5, 9, 12])
1158
+ # btm1_c, btm2_c = compute_centroid(kpts1[9, 12, 10, 13]), compute_centroid(kpts2[9, 12, 10, 13])
1159
+ # res_dist = dist_2D(top1_c[:2], top2_c[:2]) + dist_2D(mid1_c[:2], mid2_c[:2]) + dist_2D(btm1_c[:2], btm2_c[:2])
1160
+ # res_dist /= 3
1161
+ # return res_dist
1162
+ return None
1163
+
1164
+ elif dst_type == '':
1165
+ print("dst_typ not valid")
1166
+ exit(-1)
1167
+
1168
+
1169
+ def kpt_openpose_to_centernet(kpts):
1170
+ """
1171
+
1172
+ Args:
1173
+ kpts:
1174
+
1175
+ Returns:
1176
+
1177
+ """
1178
+ #TO TEST
1179
+ kpts_openpose = np.zeros((16, 3))
1180
+ for i, point in enumerate(kpts):
1181
+ idx_op = rev_pose_id_part[pose_id_part_openpose[i]]
1182
+ kpts_openpose[idx_op] = [point[0], point[1], point[2]]
1183
+
1184
+ return kpts_openpose
1185
+
1186
+
1187
+ def kpt_centernet_to_openpose(kpts):
1188
+ """
1189
+
1190
+ Args:
1191
+ kpts:
1192
+
1193
+ Returns:
1194
+
1195
+ """
1196
+ #TO TEST
1197
+ kpts_openpose = np.zeros((25, 3))
1198
+ for i, point in enumerate(kpts):
1199
+ idx_op = rev_pose_id_part_openpose[pose_id_part[i]]
1200
+ kpts_openpose[idx_op] = [point[1], point[0], point[2]]
1201
+
1202
+ return kpts_openpose
1203
+
1204
+
1205
+ def non_maxima_aux(det, kpt, threshold=15): # threshold in pxels
1206
+ # print("A", kpt, "\n", len(kpt))
1207
+
1208
+ indexes_to_delete = []
1209
+
1210
+ if len(kpt) == 0 or len(det) == 0:
1211
+ return [], []
1212
+
1213
+ if len(kpt) == 1 or len(det) == 1:
1214
+ return det, kpt
1215
+
1216
+ kpt_res = kpt.copy()
1217
+ det_res_aux = det.copy()
1218
+
1219
+ for i in range(0, len(kpt)):
1220
+ for j in range(i, len(kpt)):
1221
+ if i == j:
1222
+ continue
1223
+ dist = distance_skeletons(kpt[i], kpt[j], 'head_centroid')
1224
+ # print("DIST", i, j, dist)
1225
+ if dist < threshold:
1226
+ if j not in indexes_to_delete:
1227
+ indexes_to_delete.append(j)
1228
+ # kpt_res.pop(j)
1229
+ det_res = []
1230
+
1231
+ # print(indexes_to_delete)
1232
+ indexes_to_delete = sorted(indexes_to_delete, reverse=True)
1233
+ # print(len(kpt_res))
1234
+ for index in indexes_to_delete:
1235
+ kpt_res.pop(index)
1236
+
1237
+ det_res_aux = list(np.delete(det_res_aux, indexes_to_delete, axis=0))
1238
+ det_res = np.array(det_res_aux)
1239
+
1240
+ return det_res, kpt_res
1241
+
1242
+
1243
+ def compute_centroid_list(points):
1244
+ """
1245
+
1246
+ Args:
1247
+ points:
1248
+
1249
+ Returns:
1250
+
1251
+ """
1252
+ x, y = [], []
1253
+ for i in range(0, len(points), 3):
1254
+ if points[i + 2] > 0.0: # confidence openpose
1255
+ x.append(points[i])
1256
+ y.append(points[i + 1])
1257
+
1258
+ if x == [] or y == []:
1259
+ return [None, None]
1260
+ mean_x = np.mean(x)
1261
+ mean_y = np.mean(y)
1262
+
1263
+ return [mean_x, mean_y]
1264
+
1265
+
1266
+ def normalize_wrt_maximum_distance_point(points, file_name=''):
1267
+ centroid = compute_centroid_list(points)
1268
+ # centroid = [points[0], points[1]]
1269
+ # print(centroid)
1270
+ # exit()
1271
+
1272
+ max_dist_x, max_dist_y = 0, 0
1273
+ for i in range(0, len(points), 3):
1274
+ if points[i + 2] > 0.0: # confidence openpose take only valid keypoints (if not detected (0, 0, 0)
1275
+ distance_x = abs(points[i] - centroid[0])
1276
+ distance_y = abs(points[i+1] - centroid[1])
1277
+ # dist_aux.append(distance)
1278
+ if distance_x > max_dist_x:
1279
+ max_dist_x = distance_x
1280
+ if distance_y > max_dist_y:
1281
+ max_dist_y = distance_y
1282
+ elif points[i + 2] == 0.0: # check for centernet people on borders with confidence 0
1283
+ points[i] = 0
1284
+ points[i+1] = 0
1285
+
1286
+ for i in range(0, len(points), 3):
1287
+ if points[i + 2] > 0.0:
1288
+ if max_dist_x != 0.0:
1289
+ points[i] = (points[i] - centroid[0]) / max_dist_x
1290
+ if max_dist_y != 0.0:
1291
+ points[i + 1] = (points[i + 1] - centroid[1]) / max_dist_y
1292
+ if max_dist_x == 0.0: # only one point valid with some confidence value so it become (0,0, confidence)
1293
+ points[i] = 0.0
1294
+ if max_dist_y == 0.0:
1295
+ points[i + 1] = 0.0
1296
+
1297
+ return points
1298
+
1299
+
1300
+ def retrieve_interest_points(kpts, detector):
1301
+ """
1302
+
1303
+ :param kpts:
1304
+ :return:
1305
+ """
1306
+ res_kpts = []
1307
+
1308
+ if detector == 'centernet':
1309
+ face_points = [0, 1, 2, 3, 4]
1310
+ for index in face_points:
1311
+ res_kpts.append(kpts[index][1])
1312
+ res_kpts.append(kpts[index][0])
1313
+ res_kpts.append(kpts[index][2])
1314
+ elif detector== 'zedcam':
1315
+ face_points = [0, 14, 15, 16, 17]
1316
+ for index in face_points:
1317
+ res_kpts.append(kpts[index][0])
1318
+ res_kpts.append(kpts[index][1])
1319
+ res_kpts.append(kpts[index][2])
1320
+ else:
1321
+ # take only interest points (5 points of face)
1322
+ face_points = [0, 16, 15, 18, 17]
1323
+ for index in face_points:
1324
+ res_kpts.append(kpts[index][0])
1325
+ res_kpts.append(kpts[index][1])
1326
+ res_kpts.append(kpts[index][2])
1327
+
1328
+
1329
+
1330
+ return res_kpts
1331
+
1332
+ def create_bbox_from_openpose_keypoints(data):
1333
+ # from labels import pose_id_part_openpose
1334
+ bbox = list()
1335
+ ids = list()
1336
+ kpt = list()
1337
+ kpt_scores = list()
1338
+ for person in data['people']:
1339
+ ids.append(person['person_id'][0])
1340
+ kpt_temp = list()
1341
+ kpt_score_temp = list()
1342
+ # create bbox with min max each dimension
1343
+ x, y = [], []
1344
+ for i in pose_id_part_openpose:
1345
+ if i < 25:
1346
+ # kpt and kpts scores
1347
+ kpt_temp.append([int(person['pose_keypoints_2d'][i * 3]), int(person['pose_keypoints_2d'][(i * 3) + 1]),
1348
+ person['pose_keypoints_2d'][(i * 3) + 2]])
1349
+ kpt_score_temp.append(person['pose_keypoints_2d'][(i * 3) + 2])
1350
+ # check confidence != 0
1351
+ if person['pose_keypoints_2d'][(3 * i) + 2]!=0:
1352
+ x.append(int(person['pose_keypoints_2d'][3 * i]))
1353
+ y.append(int(person['pose_keypoints_2d'][(3 * i) + 1]))
1354
+ kpt_scores.append(kpt_score_temp)
1355
+ kpt.append(kpt_temp)
1356
+ xmax = max(x)
1357
+ xmin = min(x)
1358
+ ymax = max(y)
1359
+ ymin = min(y)
1360
+ bbox.append([xmin, ymin, xmax, ymax, 1]) # last value is for compatibility of centernet
1361
+
1362
+ return bbox, kpt, kpt_scores # not to use scores
1363
+
1364
+ def atoi(text):
1365
+ return int(text) if text.isdigit() else text
1366
+
1367
+
1368
+ def natural_keys(text):
1369
+ """
1370
+ alist.sort(key=natural_keys) sorts in human order
1371
+ http://nedbatchelder.com/blog/200712/human_sorting.html
1372
+ (See Toothy's implementation in the comments)
1373
+ """
1374
+ import re
1375
+ return [atoi(c) for c in re.split(r'(\d+)', text)]