Return and show bounding box confidence
Browse files- rtmo_demo.py +2 -2
- rtmo_demo_batch.py +6 -4
- rtmo_gpu.py +44 -17
rtmo_demo.py
CHANGED
@@ -36,7 +36,7 @@ if __name__ == "__main__":
|
|
36 |
if not success:
|
37 |
break
|
38 |
|
39 |
-
frame_out, bboxes, keypoints, scores = body(frame)
|
40 |
|
41 |
if keypoints is not None:
|
42 |
if frame_idx % args.batch_size == 0 and frame_idx:
|
@@ -56,7 +56,7 @@ if __name__ == "__main__":
|
|
56 |
scores,
|
57 |
kpt_thr=0.3,
|
58 |
line_width=2)
|
59 |
-
img_show = draw_bbox(img_show, bboxes)
|
60 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
61 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
62 |
cv2.imshow(f'{model}', img_show)
|
|
|
36 |
if not success:
|
37 |
break
|
38 |
|
39 |
+
frame_out, bboxes, bboxes_scores, keypoints, scores = body(frame)
|
40 |
|
41 |
if keypoints is not None:
|
42 |
if frame_idx % args.batch_size == 0 and frame_idx:
|
|
|
56 |
scores,
|
57 |
kpt_thr=0.3,
|
58 |
line_width=2)
|
59 |
+
img_show = draw_bbox(img_show, bboxes, bboxes_scores)
|
60 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
61 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
62 |
cv2.imshow(f'{model}', img_show)
|
rtmo_demo_batch.py
CHANGED
@@ -24,7 +24,7 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
24 |
# Process the batch when it's full
|
25 |
if len(batch_frames) == batch_size:
|
26 |
s = time.time()
|
27 |
-
batch_bboxes, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
28 |
det_time = time.time() - s
|
29 |
fps = round(batch_size / det_time, 1)
|
30 |
print(f'Batch det: {fps} FPS')
|
@@ -33,9 +33,10 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
33 |
scores = batch_scores[i]
|
34 |
frame = batch_frames[i]
|
35 |
bboxes = batch_bboxes[i]
|
|
|
36 |
img_show = frame.copy()
|
37 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
38 |
-
img_show = draw_bbox(img_show, bboxes)
|
39 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
40 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
41 |
cv2.imshow(f'{video_path}', img_show)
|
@@ -54,14 +55,15 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
54 |
|
55 |
# Option 2: Duplicate the last frame
|
56 |
batch_frames.append(batch_frames[-1])
|
57 |
-
batch_bboxes, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
58 |
for i, keypoints in enumerate(batch_keypoints):
|
59 |
scores = batch_scores[i]
|
60 |
frame = batch_frames[i]
|
61 |
bboxes = batch_bboxes[i]
|
|
|
62 |
img_show = frame.copy()
|
63 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
64 |
-
img_show = draw_bbox(img_show, bboxes)
|
65 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
66 |
cv2.imshow(f'{video_path}', img_show)
|
67 |
#cv2.waitKey(10)
|
|
|
24 |
# Process the batch when it's full
|
25 |
if len(batch_frames) == batch_size:
|
26 |
s = time.time()
|
27 |
+
batch_bboxes, batch_bboxes_scores, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
28 |
det_time = time.time() - s
|
29 |
fps = round(batch_size / det_time, 1)
|
30 |
print(f'Batch det: {fps} FPS')
|
|
|
33 |
scores = batch_scores[i]
|
34 |
frame = batch_frames[i]
|
35 |
bboxes = batch_bboxes[i]
|
36 |
+
bboxes_scores = batch_bboxes_scores[i]
|
37 |
img_show = frame.copy()
|
38 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
39 |
+
img_show = draw_bbox(img_show, bboxes, bboxes_scores)
|
40 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
41 |
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
42 |
cv2.imshow(f'{video_path}', img_show)
|
|
|
55 |
|
56 |
# Option 2: Duplicate the last frame
|
57 |
batch_frames.append(batch_frames[-1])
|
58 |
+
batch_bboxes, batch_bboxes_scores, batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
59 |
for i, keypoints in enumerate(batch_keypoints):
|
60 |
scores = batch_scores[i]
|
61 |
frame = batch_frames[i]
|
62 |
bboxes = batch_bboxes[i]
|
63 |
+
bboxes_scores = batch_bboxes_scores[i]
|
64 |
img_show = frame.copy()
|
65 |
img_show = draw_skeleton(img_show, keypoints, scores, kpt_thr=0.3, line_width=2)
|
66 |
+
img_show = draw_bbox(img_show, bboxes, bboxes_scores)
|
67 |
img_show = resize_to_fit_screen(img_show, 720, 480)
|
68 |
cv2.imshow(f'{video_path}', img_show)
|
69 |
#cv2.waitKey(10)
|
rtmo_gpu.py
CHANGED
@@ -207,12 +207,32 @@ def draw_mmpose(img,
|
|
207 |
|
208 |
return img
|
209 |
|
210 |
-
def draw_bbox(img, bboxes,
|
211 |
-
for bbox in bboxes:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
img = cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
|
213 |
-
(int(bbox[2]), int(bbox[3])),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
return img
|
215 |
-
|
216 |
# with simplification to use onnxruntime only
|
217 |
def draw_skeleton(img,
|
218 |
keypoints,
|
@@ -333,6 +353,8 @@ class RTMO_GPU(object):
|
|
333 |
tuple:
|
334 |
- final_boxes (np.ndarray): Final bounding boxes.
|
335 |
- final_scores (np.ndarray): Final scores.
|
|
|
|
|
336 |
"""
|
337 |
|
338 |
if not self.is_yolo_nas_pose:
|
@@ -346,6 +368,7 @@ class RTMO_GPU(object):
|
|
346 |
isscore = final_scores > 0.3
|
347 |
isbbox = [i for i in isscore]
|
348 |
final_boxes = final_boxes[isbbox]
|
|
|
349 |
|
350 |
# decode pose outputs
|
351 |
keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
|
@@ -359,14 +382,15 @@ class RTMO_GPU(object):
|
|
359 |
if flat_predictions.shape[0] > 0: # at least one person found
|
360 |
mask = flat_predictions[:, 0] == 0
|
361 |
final_boxes = flat_predictions[mask, 1:5]
|
|
|
362 |
pred_joints = flat_predictions[mask, 6:].reshape((len(final_boxes), -1, 3))
|
363 |
keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
|
364 |
keypoints = keypoints / ratio
|
365 |
final_boxes = final_boxes / ratio
|
366 |
else: # no detection
|
367 |
-
final_boxes, keypoints, scores = np.zeros((0, 4)),np.zeros((0, 17, 2)), np.zeros((0, 17))
|
368 |
|
369 |
-
return final_boxes, keypoints, scores
|
370 |
|
371 |
def inference(self, img: np.ndarray):
|
372 |
"""Inference model.
|
@@ -425,9 +449,9 @@ class RTMO_GPU(object):
|
|
425 |
|
426 |
outputs = self.inference(image)
|
427 |
|
428 |
-
bboxes, keypoints, scores = self.postprocess(outputs, ratio)
|
429 |
|
430 |
-
return bboxes, keypoints, scores
|
431 |
|
432 |
def __init__(self,
|
433 |
model: str = None,
|
@@ -569,22 +593,24 @@ class RTMO_GPU_Batch(RTMO_GPU):
|
|
569 |
batch_keypoints = []
|
570 |
batch_scores = []
|
571 |
batch_bboxes = []
|
|
|
572 |
|
573 |
b_dets, b_keypoints = outputs
|
574 |
for i, ratio in enumerate(ratios):
|
575 |
output = [np.expand_dims(b_dets[i], axis=0), np.expand_dims(b_keypoints[i],axis=0)]
|
576 |
-
bboxes, keypoints, scores = super().postprocess(output, ratio)
|
577 |
batch_keypoints.append(keypoints)
|
578 |
batch_scores.append(scores)
|
579 |
batch_bboxes.append(bboxes)
|
|
|
580 |
|
581 |
-
return batch_bboxes, batch_keypoints, batch_scores
|
582 |
|
583 |
def __batch_call__(self, images: List[np.ndarray]):
|
584 |
batch_img, ratios = self.preprocess_batch(images)
|
585 |
outputs = self.inference(batch_img)
|
586 |
-
bboxes, keypoints, scores = self.postprocess_batch(outputs, ratios)
|
587 |
-
return bboxes, keypoints, scores
|
588 |
|
589 |
def __call__(self, image: np.array, camera_id = 0):
|
590 |
|
@@ -600,18 +626,19 @@ class RTMO_GPU_Batch(RTMO_GPU):
|
|
600 |
in_queue.put(image)
|
601 |
|
602 |
if len(self.buffers[camera_id]) == self.batch_size:
|
603 |
-
b_bboxes, b_keypoints, b_scores = self.__batch_call__(self.buffers[camera_id])
|
604 |
for i, (keypoints, scores) in enumerate(zip(b_keypoints, b_scores)):
|
605 |
bboxes = b_bboxes[i]
|
606 |
-
|
|
|
607 |
self.buffers[camera_id] = []
|
608 |
|
609 |
-
frame, bboxes, keypoints, scores = None, None, None, None
|
610 |
if not out_queue.empty():
|
611 |
-
bboxes, keypoints, scores = out_queue.get()
|
612 |
frame = in_queue.get()
|
613 |
|
614 |
-
return frame, bboxes, keypoints, scores
|
615 |
|
616 |
|
617 |
def __init__(self,
|
|
|
207 |
|
208 |
return img
|
209 |
|
210 |
+
def draw_bbox(img, bboxes, bboxes_scores=None, color=None):
|
211 |
+
for i, bbox in enumerate(bboxes):
|
212 |
+
# Determine the color based on the score if no color is given
|
213 |
+
if color is None and bboxes_scores is not None:
|
214 |
+
# Scale the score to a color range (green to red)
|
215 |
+
score = bboxes_scores[i]
|
216 |
+
green = int((1 - score) * 255)
|
217 |
+
red = int(score * 255)
|
218 |
+
box_color = (0, green, red)
|
219 |
+
else:
|
220 |
+
box_color = color if color is not None else (0, 255, 0)
|
221 |
+
|
222 |
+
# Draw the bounding box
|
223 |
img = cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
|
224 |
+
(int(bbox[2]), int(bbox[3])), box_color, 1)
|
225 |
+
|
226 |
+
# Display the score at the top-right corner of the bounding box
|
227 |
+
if bboxes_scores is not None:
|
228 |
+
score_text = f'{bboxes_scores[i]:.2f}'
|
229 |
+
text_size, _ = cv2.getTextSize(score_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
|
230 |
+
text_x = int(bbox[2]) - text_size[0]
|
231 |
+
text_y = int(bbox[1]) + text_size[1]
|
232 |
+
img = cv2.putText(img, score_text, (text_x, text_y),
|
233 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 1, cv2.LINE_AA)
|
234 |
return img
|
235 |
+
|
236 |
# with simplification to use onnxruntime only
|
237 |
def draw_skeleton(img,
|
238 |
keypoints,
|
|
|
353 |
tuple:
|
354 |
- final_boxes (np.ndarray): Final bounding boxes.
|
355 |
- final_scores (np.ndarray): Final scores.
|
356 |
+
- final keypoints
|
357 |
+
- final keypoints scores
|
358 |
"""
|
359 |
|
360 |
if not self.is_yolo_nas_pose:
|
|
|
368 |
isscore = final_scores > 0.3
|
369 |
isbbox = [i for i in isscore]
|
370 |
final_boxes = final_boxes[isbbox]
|
371 |
+
final_boxes_scores = final_scores[isbbox]
|
372 |
|
373 |
# decode pose outputs
|
374 |
keypoints, scores = pose_outputs[0, :, :, :2], pose_outputs[0, :, :, 2]
|
|
|
382 |
if flat_predictions.shape[0] > 0: # at least one person found
|
383 |
mask = flat_predictions[:, 0] == 0
|
384 |
final_boxes = flat_predictions[mask, 1:5]
|
385 |
+
final_boxes_scores = flat_predictions[mask, 5]
|
386 |
pred_joints = flat_predictions[mask, 6:].reshape((len(final_boxes), -1, 3))
|
387 |
keypoints, scores = pred_joints[:,:,:2], pred_joints[:,:,-1]
|
388 |
keypoints = keypoints / ratio
|
389 |
final_boxes = final_boxes / ratio
|
390 |
else: # no detection
|
391 |
+
final_boxes, final_boxes_scores, keypoints, scores = np.zeros((0, 4)),np.zeros((0, 1)),np.zeros((0, 17, 2)), np.zeros((0, 17))
|
392 |
|
393 |
+
return final_boxes, final_boxes_scores, keypoints, scores
|
394 |
|
395 |
def inference(self, img: np.ndarray):
|
396 |
"""Inference model.
|
|
|
449 |
|
450 |
outputs = self.inference(image)
|
451 |
|
452 |
+
bboxes, bboxes_scores, keypoints, scores = self.postprocess(outputs, ratio)
|
453 |
|
454 |
+
return bboxes, bboxes_scores, keypoints, scores
|
455 |
|
456 |
def __init__(self,
|
457 |
model: str = None,
|
|
|
593 |
batch_keypoints = []
|
594 |
batch_scores = []
|
595 |
batch_bboxes = []
|
596 |
+
batch_bboxes_scores = []
|
597 |
|
598 |
b_dets, b_keypoints = outputs
|
599 |
for i, ratio in enumerate(ratios):
|
600 |
output = [np.expand_dims(b_dets[i], axis=0), np.expand_dims(b_keypoints[i],axis=0)]
|
601 |
+
bboxes, bboxes_scores, keypoints, scores = super().postprocess(output, ratio)
|
602 |
batch_keypoints.append(keypoints)
|
603 |
batch_scores.append(scores)
|
604 |
batch_bboxes.append(bboxes)
|
605 |
+
batch_bboxes_scores.append(bboxes_scores)
|
606 |
|
607 |
+
return batch_bboxes, batch_bboxes_scores, batch_keypoints, batch_scores
|
608 |
|
609 |
def __batch_call__(self, images: List[np.ndarray]):
|
610 |
batch_img, ratios = self.preprocess_batch(images)
|
611 |
outputs = self.inference(batch_img)
|
612 |
+
bboxes, bboxes_scores, keypoints, scores = self.postprocess_batch(outputs, ratios)
|
613 |
+
return bboxes, bboxes_scores, keypoints, scores
|
614 |
|
615 |
def __call__(self, image: np.array, camera_id = 0):
|
616 |
|
|
|
626 |
in_queue.put(image)
|
627 |
|
628 |
if len(self.buffers[camera_id]) == self.batch_size:
|
629 |
+
b_bboxes, b_bboxes_scores, b_keypoints, b_scores = self.__batch_call__(self.buffers[camera_id])
|
630 |
for i, (keypoints, scores) in enumerate(zip(b_keypoints, b_scores)):
|
631 |
bboxes = b_bboxes[i]
|
632 |
+
bboxes_scores = b_bboxes_scores[i]
|
633 |
+
out_queue.put((bboxes, bboxes_scores, keypoints, scores))
|
634 |
self.buffers[camera_id] = []
|
635 |
|
636 |
+
frame, bboxes, bboxes_scores, keypoints, scores = None, None, None, None, None
|
637 |
if not out_queue.empty():
|
638 |
+
bboxes, bboxes_scores, keypoints, scores = out_queue.get()
|
639 |
frame = in_queue.get()
|
640 |
|
641 |
+
return frame, bboxes, bboxes_scores, keypoints, scores
|
642 |
|
643 |
|
644 |
def __init__(self,
|