Luigi
commited on
Commit
•
42892de
1
Parent(s):
15801f5
Unify batched and non-batched versions
Browse files- demo_batch.sh +1 -1
- rtmo_demo.py +32 -23
- rtmo_demo_batch.py +2 -2
- rtmo_gpu.py +37 -2
demo_batch.sh
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
#!/bin/sh
|
2 |
-
python3
|
|
|
1 |
#!/bin/sh
|
2 |
+
python3 rtmo_demo.py ./video rtmo-t.fp16.onnx --batch_size 4
|
rtmo_demo.py
CHANGED
@@ -5,7 +5,8 @@ import cv2
|
|
5 |
from pathlib import Path
|
6 |
import argparse
|
7 |
import os
|
8 |
-
from rtmo_gpu import
|
|
|
9 |
|
10 |
if __name__ == "__main__":
|
11 |
|
@@ -14,44 +15,52 @@ if __name__ == "__main__":
|
|
14 |
parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
|
15 |
parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX (or engine) model file (required)')
|
16 |
parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
|
|
|
17 |
|
18 |
# Parse the command-line arguments
|
19 |
args = parser.parse_args()
|
20 |
|
21 |
model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
|
22 |
|
23 |
-
body =
|
24 |
|
25 |
for mp4_path in Path(args.path).glob('*'):
|
26 |
|
27 |
# Now, use the best.url, which is the direct video link for streaming
|
28 |
cap = cv2.VideoCapture(filename=os.path.abspath(mp4_path))
|
29 |
-
|
30 |
frame_idx = 0
|
31 |
-
|
32 |
while cap.isOpened():
|
33 |
success, frame = cap.read()
|
34 |
frame_idx += 1
|
|
|
35 |
|
36 |
if not success:
|
37 |
break
|
38 |
-
|
39 |
keypoints, scores = body(frame)
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from pathlib import Path
|
6 |
import argparse
|
7 |
import os
|
8 |
+
from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen
|
9 |
+
from queue import Queue
|
10 |
|
11 |
if __name__ == "__main__":
|
12 |
|
|
|
15 |
parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
|
16 |
parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX (or engine) model file (required)')
|
17 |
parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
|
18 |
+
parser.add_argument('--batch_size', type=int, default=1, help='Path to a RTMO ONNX input batch size')
|
19 |
|
20 |
# Parse the command-line arguments
|
21 |
args = parser.parse_args()
|
22 |
|
23 |
model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
|
24 |
|
25 |
+
body = RTMO_GPU_Batch(model=model, is_yolo_nas_pose=args.yolo_nas_pose, batch_size=args.batch_size)
|
26 |
|
27 |
for mp4_path in Path(args.path).glob('*'):
|
28 |
|
29 |
# Now, use the best.url, which is the direct video link for streaming
|
30 |
cap = cv2.VideoCapture(filename=os.path.abspath(mp4_path))
|
31 |
+
in_queue = Queue(maxsize=args.batch_size)
|
32 |
frame_idx = 0
|
33 |
+
s = time.time()
|
34 |
while cap.isOpened():
|
35 |
success, frame = cap.read()
|
36 |
frame_idx += 1
|
37 |
+
in_queue.put(frame)
|
38 |
|
39 |
if not success:
|
40 |
break
|
41 |
+
|
42 |
keypoints, scores = body(frame)
|
43 |
+
|
44 |
+
if keypoints is not None:
|
45 |
+
if frame_idx % args.batch_size == 0 and frame_idx:
|
46 |
+
current_time = time.time()
|
47 |
+
det_time = current_time - s
|
48 |
+
fps = round(args.batch_size / det_time, 1)
|
49 |
+
print(f'det: {fps} FPS')
|
50 |
+
s = current_time
|
51 |
+
|
52 |
+
frame = in_queue.get()
|
53 |
+
img_show = frame.copy()
|
54 |
+
|
55 |
+
# if you want to use black background instead of original image,
|
56 |
+
# img_show = np.zeros(img_show.shape, dtype=np.uint8)
|
57 |
+
|
58 |
+
img_show = draw_skeleton(img_show,
|
59 |
+
keypoints,
|
60 |
+
scores,
|
61 |
+
kpt_thr=0.3,
|
62 |
+
line_width=2)
|
63 |
+
img_show = resize_to_fit_screen(img_show, 720, 480)
|
64 |
+
cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
|
65 |
+
cv2.imshow(f'{model}', img_show)
|
66 |
+
cv2.waitKey(10)
|
rtmo_demo_batch.py
CHANGED
@@ -24,7 +24,7 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
24 |
# Process the batch when it's full
|
25 |
if len(batch_frames) == batch_size:
|
26 |
s = time.time()
|
27 |
-
batch_keypoints, batch_scores = body_estimator(batch_frames)
|
28 |
det_time = time.time() - s
|
29 |
fps = round(batch_size / det_time, 1)
|
30 |
print(f'Batch det: {fps} FPS')
|
@@ -52,7 +52,7 @@ def process_video(video_path, body_estimator, batch_size=4):
|
|
52 |
|
53 |
# Option 2: Duplicate the last frame
|
54 |
batch_frames.append(batch_frames[-1])
|
55 |
-
batch_keypoints, batch_scores = body_estimator(batch_frames)
|
56 |
for i, keypoints in enumerate(batch_keypoints):
|
57 |
scores = batch_scores[i]
|
58 |
frame = batch_frames[i]
|
|
|
24 |
# Process the batch when it's full
|
25 |
if len(batch_frames) == batch_size:
|
26 |
s = time.time()
|
27 |
+
batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
28 |
det_time = time.time() - s
|
29 |
fps = round(batch_size / det_time, 1)
|
30 |
print(f'Batch det: {fps} FPS')
|
|
|
52 |
|
53 |
# Option 2: Duplicate the last frame
|
54 |
batch_frames.append(batch_frames[-1])
|
55 |
+
batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
|
56 |
for i, keypoints in enumerate(batch_keypoints):
|
57 |
scores = batch_scores[i]
|
58 |
frame = batch_frames[i]
|
rtmo_gpu.py
CHANGED
@@ -3,6 +3,7 @@ import numpy as np
|
|
3 |
from typing import List, Tuple
|
4 |
import onnxruntime as ort
|
5 |
import cv2
|
|
|
6 |
os.environ['ORT_TENSORRT_EXTRA_PLUGIN_LIB_PATHS']='libmmdeploy_tensorrt_ops.so'
|
7 |
|
8 |
# dictionary from https://github.com/Tau-J/rtmlib/blob/4b29101d54b611048ef165277cebfffff3030074/rtmlib/visualization/skeleton/coco17.py
|
@@ -458,6 +459,7 @@ class RTMO_GPU(object):
|
|
458 |
'cudnn_conv_algo_search': 'DEFAULT',
|
459 |
'cudnn_conv_use_max_workspace': True
|
460 |
}),
|
|
|
461 |
'CPUExecutionProvider']}
|
462 |
|
463 |
self.session = ort.InferenceSession(path_or_bytes=model,
|
@@ -547,7 +549,7 @@ class RTMO_GPU_Batch(RTMO_GPU):
|
|
547 |
self,
|
548 |
outputs: List[np.ndarray],
|
549 |
ratios: List[float]
|
550 |
-
) -> List[
|
551 |
"""Process outputs for a batch of images.
|
552 |
|
553 |
Args:
|
@@ -569,11 +571,44 @@ class RTMO_GPU_Batch(RTMO_GPU):
|
|
569 |
|
570 |
return batch_keypoints, batch_scores
|
571 |
|
572 |
-
def
|
573 |
batch_img, ratios = self.preprocess_batch(images)
|
574 |
outputs = self.inference(batch_img)
|
575 |
keypoints, scores = self.postprocess_batch(outputs, ratios)
|
576 |
return keypoints, scores
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
577 |
|
578 |
def resize_to_fit_screen(image, screen_width, screen_height):
|
579 |
# Get the dimensions of the image
|
|
|
3 |
from typing import List, Tuple
|
4 |
import onnxruntime as ort
|
5 |
import cv2
|
6 |
+
from queue import Queue
|
7 |
os.environ['ORT_TENSORRT_EXTRA_PLUGIN_LIB_PATHS']='libmmdeploy_tensorrt_ops.so'
|
8 |
|
9 |
# dictionary from https://github.com/Tau-J/rtmlib/blob/4b29101d54b611048ef165277cebfffff3030074/rtmlib/visualization/skeleton/coco17.py
|
|
|
459 |
'cudnn_conv_algo_search': 'DEFAULT',
|
460 |
'cudnn_conv_use_max_workspace': True
|
461 |
}),
|
462 |
+
'OpenVINOExecutionProvider',
|
463 |
'CPUExecutionProvider']}
|
464 |
|
465 |
self.session = ort.InferenceSession(path_or_bytes=model,
|
|
|
549 |
self,
|
550 |
outputs: List[np.ndarray],
|
551 |
ratios: List[float]
|
552 |
+
) -> Tuple[List[np.ndarray], List[np.ndarray]]:
|
553 |
"""Process outputs for a batch of images.
|
554 |
|
555 |
Args:
|
|
|
571 |
|
572 |
return batch_keypoints, batch_scores
|
573 |
|
574 |
+
def __batch_call__(self, images: List[np.ndarray]):
|
575 |
batch_img, ratios = self.preprocess_batch(images)
|
576 |
outputs = self.inference(batch_img)
|
577 |
keypoints, scores = self.postprocess_batch(outputs, ratios)
|
578 |
return keypoints, scores
|
579 |
+
|
580 |
+
def __call__(self, image: np.array):
|
581 |
+
self.buffer.append(image)
|
582 |
+
|
583 |
+
if len(self.buffer) == self.batch_size:
|
584 |
+
b_keypoints, b_scores = self.__batch_call__(self.buffer)
|
585 |
+
for keypoints, scores in zip(b_keypoints, b_scores):
|
586 |
+
self.out_queue.put((keypoints, scores))
|
587 |
+
self.buffer = []
|
588 |
+
|
589 |
+
keypoints, scores = None, None
|
590 |
+
if not self.out_queue.empty():
|
591 |
+
keypoints, scores = self.out_queue.get()
|
592 |
+
|
593 |
+
return keypoints, scores
|
594 |
+
|
595 |
+
|
596 |
+
def __init__(self,
|
597 |
+
model: str = None,
|
598 |
+
mean: tuple = None,
|
599 |
+
std: tuple = None,
|
600 |
+
device: str = 'cuda',
|
601 |
+
is_yolo_nas_pose = False,
|
602 |
+
batch_size: int = 1):
|
603 |
+
super().__init__(model,
|
604 |
+
mean,
|
605 |
+
std,
|
606 |
+
device,
|
607 |
+
is_yolo_nas_pose)
|
608 |
+
|
609 |
+
self.batch_size = batch_size
|
610 |
+
self.out_queue = Queue(maxsize=self.batch_size)
|
611 |
+
self.buffer = []
|
612 |
|
613 |
def resize_to_fit_screen(image, screen_width, screen_height):
|
614 |
# Get the dimensions of the image
|