Add support TensorRT engine support for RTMO
Browse files- rtmo_demo.py +5 -5
- rtmo_gpu.py +69 -36
rtmo_demo.py
CHANGED
@@ -12,18 +12,18 @@ if __name__ == "__main__":
|
|
12 |
# Set up argument parsing
|
13 |
parser = argparse.ArgumentParser(description='Process the path to a video file folder.')
|
14 |
parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
|
15 |
-
parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX model file (required)')
|
16 |
parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
|
17 |
|
18 |
# Parse the command-line arguments
|
19 |
args = parser.parse_args()
|
20 |
|
21 |
-
|
22 |
|
23 |
# Only Tiny Model has (416,416) as input model
|
24 |
-
model_input_size = (416,416) if 'rtmo-t' in
|
25 |
|
26 |
-
body = RTMO_GPU(
|
27 |
model_input_size=model_input_size, is_yolo_nas_pose=args.yolo_nas_pose)
|
28 |
|
29 |
for mp4_path in Path(args.path).glob('*'):
|
@@ -55,5 +55,5 @@ if __name__ == "__main__":
|
|
55 |
kpt_thr=0.3,
|
56 |
line_width=2)
|
57 |
img_show = cv2.resize(img_show, (788, 525))
|
58 |
-
cv2.imshow(f'{
|
59 |
cv2.waitKey(10)
|
|
|
12 |
# Set up argument parsing
|
13 |
parser = argparse.ArgumentParser(description='Process the path to a video file folder.')
|
14 |
parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
|
15 |
+
parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX (or engine) model file (required)')
|
16 |
parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
|
17 |
|
18 |
# Parse the command-line arguments
|
19 |
args = parser.parse_args()
|
20 |
|
21 |
+
model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
|
22 |
|
23 |
# Only Tiny Model has (416,416) as input model
|
24 |
+
model_input_size = (416,416) if 'rtmo-t' in model.lower() and not args.yolo_nas_pose else (640,640)
|
25 |
|
26 |
+
body = RTMO_GPU(model=model,
|
27 |
model_input_size=model_input_size, is_yolo_nas_pose=args.yolo_nas_pose)
|
28 |
|
29 |
for mp4_path in Path(args.path).glob('*'):
|
|
|
55 |
kpt_thr=0.3,
|
56 |
line_width=2)
|
57 |
img_show = cv2.resize(img_show, (788, 525))
|
58 |
+
cv2.imshow(f'{model}', img_show)
|
59 |
cv2.waitKey(10)
|
rtmo_gpu.py
CHANGED
@@ -334,32 +334,48 @@ class RTMO_GPU(object):
|
|
334 |
Returns:
|
335 |
outputs (np.ndarray): Output of RTMPose model.
|
336 |
"""
|
|
|
337 |
# build input to (1, 3, H, W)
|
338 |
img = img.transpose(2, 0, 1)
|
339 |
img = np.ascontiguousarray(img, dtype=np.float32 if not self.is_yolo_nas_pose else np.uint8)
|
340 |
input = img[None, :, :, :]
|
341 |
|
342 |
-
|
343 |
-
io_binding = self.session.io_binding()
|
344 |
|
345 |
-
|
346 |
-
|
347 |
-
io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
|
348 |
-
io_binding.bind_output(name='dets')
|
349 |
-
io_binding.bind_output(name='keypoints')
|
350 |
-
else:
|
351 |
-
# NAS Pose, flat format
|
352 |
-
io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.uint8, shape=input.shape, buffer_ptr=input.ctypes.data)
|
353 |
-
io_binding.bind_output(name='graph2_flat_predictions')
|
354 |
|
355 |
-
|
356 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
|
358 |
-
# Retrieve the outputs from the IO Binding object
|
359 |
-
outputs = [output.numpy() for output in io_binding.get_outputs()]
|
360 |
-
|
361 |
return outputs
|
362 |
|
|
|
|
|
|
|
|
|
|
|
363 |
def __call__(self, image: np.ndarray):
|
364 |
image, ratio = self.preprocess(image)
|
365 |
|
@@ -371,33 +387,50 @@ class RTMO_GPU(object):
|
|
371 |
return keypoints, scores
|
372 |
|
373 |
def __init__(self,
|
374 |
-
|
375 |
model_input_size: tuple = (640, 640),
|
376 |
mean: tuple = None,
|
377 |
std: tuple = None,
|
378 |
device: str = 'cuda',
|
379 |
is_yolo_nas_pose = False):
|
380 |
-
|
381 |
-
if not os.path.exists(
|
382 |
# If the file does not exist, raise FileNotFoundError
|
383 |
-
raise FileNotFoundError(f"The specified ONNX model file was not found: {
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
self.
|
398 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
-
self.onnx_model = onnx_model
|
401 |
self.model_input_size = model_input_size
|
402 |
self.mean = mean
|
403 |
self.std = std
|
|
|
334 |
Returns:
|
335 |
outputs (np.ndarray): Output of RTMPose model.
|
336 |
"""
|
337 |
+
|
338 |
# build input to (1, 3, H, W)
|
339 |
img = img.transpose(2, 0, 1)
|
340 |
img = np.ascontiguousarray(img, dtype=np.float32 if not self.is_yolo_nas_pose else np.uint8)
|
341 |
input = img[None, :, :, :]
|
342 |
|
343 |
+
if self.model_format == 'onnx':
|
|
|
344 |
|
345 |
+
# Create an IO Binding object
|
346 |
+
io_binding = self.session.io_binding()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
|
348 |
+
if not self.is_yolo_nas_pose:
|
349 |
+
# RTMO
|
350 |
+
io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
|
351 |
+
io_binding.bind_output(name='dets')
|
352 |
+
io_binding.bind_output(name='keypoints')
|
353 |
+
else:
|
354 |
+
# NAS Pose, flat format
|
355 |
+
io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.uint8, shape=input.shape, buffer_ptr=input.ctypes.data)
|
356 |
+
io_binding.bind_output(name='graph2_flat_predictions')
|
357 |
+
|
358 |
+
# Run inference with IO Binding
|
359 |
+
self.session.run_with_iobinding(io_binding)
|
360 |
+
|
361 |
+
# Retrieve the outputs from the IO Binding object
|
362 |
+
outputs = [output.numpy() for output in io_binding.get_outputs()]
|
363 |
+
|
364 |
+
else: # 'engine'
|
365 |
+
|
366 |
+
if not self.session.is_active:
|
367 |
+
self.session.activate()
|
368 |
+
|
369 |
+
outputs = self.session.infer(feed_dict={'input': input}, check_inputs=False)
|
370 |
+
outputs = [output for output in outputs.values()]
|
371 |
|
|
|
|
|
|
|
372 |
return outputs
|
373 |
|
374 |
+
def __exit__(self):
|
375 |
+
if self.model_format == 'engine':
|
376 |
+
if self.session.is_active:
|
377 |
+
self.session.deactivate()
|
378 |
+
|
379 |
def __call__(self, image: np.ndarray):
|
380 |
image, ratio = self.preprocess(image)
|
381 |
|
|
|
387 |
return keypoints, scores
|
388 |
|
389 |
def __init__(self,
|
390 |
+
model: str = None,
|
391 |
model_input_size: tuple = (640, 640),
|
392 |
mean: tuple = None,
|
393 |
std: tuple = None,
|
394 |
device: str = 'cuda',
|
395 |
is_yolo_nas_pose = False):
|
396 |
+
|
397 |
+
if not os.path.exists(model):
|
398 |
# If the file does not exist, raise FileNotFoundError
|
399 |
+
raise FileNotFoundError(f"The specified ONNX model file was not found: {model}")
|
400 |
+
|
401 |
+
self.model = model
|
402 |
+
if model.endswith('.onnx'):
|
403 |
+
self.model_format = 'onnx'
|
404 |
+
elif model.endswith('.engine'):
|
405 |
+
self.model_format = 'engine'
|
406 |
+
from polygraphy.backend.common import BytesFromPath
|
407 |
+
from polygraphy.backend.trt import EngineFromBytes, TrtRunner, load_plugins
|
408 |
+
load_plugins(plugins=['libmmdeploy_tensorrt_ops.so'])
|
409 |
+
else:
|
410 |
+
raise TypeError("Your model is neither ONNX nor Engine !")
|
411 |
+
|
412 |
+
|
413 |
+
if self.model_format == 'onnx':
|
414 |
+
|
415 |
+
providers = {'cpu': 'CPUExecutionProvider',
|
416 |
+
'cuda': [
|
417 |
+
#('TensorrtExecutionProvider', {
|
418 |
+
# 'trt_fp16_enable':True,
|
419 |
+
# 'trt_engine_cache_enable':True,
|
420 |
+
# 'trt_engine_cache_path':'cache'}),
|
421 |
+
('CUDAExecutionProvider', {
|
422 |
+
'cudnn_conv_algo_search': 'DEFAULT',
|
423 |
+
'cudnn_conv_use_max_workspace': True
|
424 |
+
}),
|
425 |
+
'CPUExecutionProvider']}
|
426 |
+
|
427 |
+
self.session = ort.InferenceSession(path_or_bytes=model,
|
428 |
+
providers=providers[device])
|
429 |
+
|
430 |
+
else: # 'engine'
|
431 |
+
engine = EngineFromBytes(BytesFromPath(model))
|
432 |
+
self.session = TrtRunner(engine)
|
433 |
|
|
|
434 |
self.model_input_size = model_input_size
|
435 |
self.mean = mean
|
436 |
self.std = std
|