pesi
/

Luigi commited on
Commit
bbf20b6
1 Parent(s): f9a6075

Add support TensorRT engine support for RTMO

Browse files
Files changed (2) hide show
  1. rtmo_demo.py +5 -5
  2. rtmo_gpu.py +69 -36
rtmo_demo.py CHANGED
@@ -12,18 +12,18 @@ if __name__ == "__main__":
12
  # Set up argument parsing
13
  parser = argparse.ArgumentParser(description='Process the path to a video file folder.')
14
  parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
15
- parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX model file (required)')
16
  parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
17
 
18
  # Parse the command-line arguments
19
  args = parser.parse_args()
20
 
21
- onnx_model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
22
 
23
  # Only Tiny Model has (416,416) as input model
24
- model_input_size = (416,416) if 'rtmo-t' in onnx_model.lower() and not args.yolo_nas_pose else (640,640)
25
 
26
- body = RTMO_GPU(onnx_model=onnx_model,
27
  model_input_size=model_input_size, is_yolo_nas_pose=args.yolo_nas_pose)
28
 
29
  for mp4_path in Path(args.path).glob('*'):
@@ -55,5 +55,5 @@ if __name__ == "__main__":
55
  kpt_thr=0.3,
56
  line_width=2)
57
  img_show = cv2.resize(img_show, (788, 525))
58
- cv2.imshow(f'{onnx_model}', img_show)
59
  cv2.waitKey(10)
 
12
  # Set up argument parsing
13
  parser = argparse.ArgumentParser(description='Process the path to a video file folder.')
14
  parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
15
+ parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX (or engine) model file (required)')
16
  parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
17
 
18
  # Parse the command-line arguments
19
  args = parser.parse_args()
20
 
21
+ model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
22
 
23
  # Only Tiny Model has (416,416) as input model
24
+ model_input_size = (416,416) if 'rtmo-t' in model.lower() and not args.yolo_nas_pose else (640,640)
25
 
26
+ body = RTMO_GPU(model=model,
27
  model_input_size=model_input_size, is_yolo_nas_pose=args.yolo_nas_pose)
28
 
29
  for mp4_path in Path(args.path).glob('*'):
 
55
  kpt_thr=0.3,
56
  line_width=2)
57
  img_show = cv2.resize(img_show, (788, 525))
58
+ cv2.imshow(f'{model}', img_show)
59
  cv2.waitKey(10)
rtmo_gpu.py CHANGED
@@ -334,32 +334,48 @@ class RTMO_GPU(object):
334
  Returns:
335
  outputs (np.ndarray): Output of RTMPose model.
336
  """
 
337
  # build input to (1, 3, H, W)
338
  img = img.transpose(2, 0, 1)
339
  img = np.ascontiguousarray(img, dtype=np.float32 if not self.is_yolo_nas_pose else np.uint8)
340
  input = img[None, :, :, :]
341
 
342
- # Create an IO Binding object
343
- io_binding = self.session.io_binding()
344
 
345
- if not self.is_yolo_nas_pose:
346
- # RTMO
347
- io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
348
- io_binding.bind_output(name='dets')
349
- io_binding.bind_output(name='keypoints')
350
- else:
351
- # NAS Pose, flat format
352
- io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.uint8, shape=input.shape, buffer_ptr=input.ctypes.data)
353
- io_binding.bind_output(name='graph2_flat_predictions')
354
 
355
- # Run inference with IO Binding
356
- self.session.run_with_iobinding(io_binding)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
 
358
- # Retrieve the outputs from the IO Binding object
359
- outputs = [output.numpy() for output in io_binding.get_outputs()]
360
-
361
  return outputs
362
 
 
 
 
 
 
363
  def __call__(self, image: np.ndarray):
364
  image, ratio = self.preprocess(image)
365
 
@@ -371,33 +387,50 @@ class RTMO_GPU(object):
371
  return keypoints, scores
372
 
373
  def __init__(self,
374
- onnx_model: str = None,
375
  model_input_size: tuple = (640, 640),
376
  mean: tuple = None,
377
  std: tuple = None,
378
  device: str = 'cuda',
379
  is_yolo_nas_pose = False):
380
-
381
- if not os.path.exists(onnx_model):
382
  # If the file does not exist, raise FileNotFoundError
383
- raise FileNotFoundError(f"The specified ONNX model file was not found: {onnx_model}")
384
-
385
- providers = {'cpu': 'CPUExecutionProvider',
386
- 'cuda': [
387
- ('TensorrtExecutionProvider', {
388
- 'trt_fp16_enable':True,
389
- 'trt_engine_cache_enable':True,
390
- 'trt_engine_cache_path':'cache'}),
391
- ('CUDAExecutionProvider', {
392
- 'cudnn_conv_algo_search': 'DEFAULT',
393
- 'cudnn_conv_use_max_workspace': True
394
- }),
395
- 'CPUExecutionProvider']}
396
-
397
- self.session = ort.InferenceSession(path_or_bytes=onnx_model,
398
- providers=providers[device])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
- self.onnx_model = onnx_model
401
  self.model_input_size = model_input_size
402
  self.mean = mean
403
  self.std = std
 
334
  Returns:
335
  outputs (np.ndarray): Output of RTMPose model.
336
  """
337
+
338
  # build input to (1, 3, H, W)
339
  img = img.transpose(2, 0, 1)
340
  img = np.ascontiguousarray(img, dtype=np.float32 if not self.is_yolo_nas_pose else np.uint8)
341
  input = img[None, :, :, :]
342
 
343
+ if self.model_format == 'onnx':
 
344
 
345
+ # Create an IO Binding object
346
+ io_binding = self.session.io_binding()
 
 
 
 
 
 
 
347
 
348
+ if not self.is_yolo_nas_pose:
349
+ # RTMO
350
+ io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.float32, shape=input.shape, buffer_ptr=input.ctypes.data)
351
+ io_binding.bind_output(name='dets')
352
+ io_binding.bind_output(name='keypoints')
353
+ else:
354
+ # NAS Pose, flat format
355
+ io_binding.bind_input(name='input', device_type='cpu', device_id=0, element_type=np.uint8, shape=input.shape, buffer_ptr=input.ctypes.data)
356
+ io_binding.bind_output(name='graph2_flat_predictions')
357
+
358
+ # Run inference with IO Binding
359
+ self.session.run_with_iobinding(io_binding)
360
+
361
+ # Retrieve the outputs from the IO Binding object
362
+ outputs = [output.numpy() for output in io_binding.get_outputs()]
363
+
364
+ else: # 'engine'
365
+
366
+ if not self.session.is_active:
367
+ self.session.activate()
368
+
369
+ outputs = self.session.infer(feed_dict={'input': input}, check_inputs=False)
370
+ outputs = [output for output in outputs.values()]
371
 
 
 
 
372
  return outputs
373
 
374
+ def __exit__(self):
375
+ if self.model_format == 'engine':
376
+ if self.session.is_active:
377
+ self.session.deactivate()
378
+
379
  def __call__(self, image: np.ndarray):
380
  image, ratio = self.preprocess(image)
381
 
 
387
  return keypoints, scores
388
 
389
  def __init__(self,
390
+ model: str = None,
391
  model_input_size: tuple = (640, 640),
392
  mean: tuple = None,
393
  std: tuple = None,
394
  device: str = 'cuda',
395
  is_yolo_nas_pose = False):
396
+
397
+ if not os.path.exists(model):
398
  # If the file does not exist, raise FileNotFoundError
399
+ raise FileNotFoundError(f"The specified ONNX model file was not found: {model}")
400
+
401
+ self.model = model
402
+ if model.endswith('.onnx'):
403
+ self.model_format = 'onnx'
404
+ elif model.endswith('.engine'):
405
+ self.model_format = 'engine'
406
+ from polygraphy.backend.common import BytesFromPath
407
+ from polygraphy.backend.trt import EngineFromBytes, TrtRunner, load_plugins
408
+ load_plugins(plugins=['libmmdeploy_tensorrt_ops.so'])
409
+ else:
410
+ raise TypeError("Your model is neither ONNX nor Engine !")
411
+
412
+
413
+ if self.model_format == 'onnx':
414
+
415
+ providers = {'cpu': 'CPUExecutionProvider',
416
+ 'cuda': [
417
+ #('TensorrtExecutionProvider', {
418
+ # 'trt_fp16_enable':True,
419
+ # 'trt_engine_cache_enable':True,
420
+ # 'trt_engine_cache_path':'cache'}),
421
+ ('CUDAExecutionProvider', {
422
+ 'cudnn_conv_algo_search': 'DEFAULT',
423
+ 'cudnn_conv_use_max_workspace': True
424
+ }),
425
+ 'CPUExecutionProvider']}
426
+
427
+ self.session = ort.InferenceSession(path_or_bytes=model,
428
+ providers=providers[device])
429
+
430
+ else: # 'engine'
431
+ engine = EngineFromBytes(BytesFromPath(model))
432
+ self.session = TrtRunner(engine)
433
 
 
434
  self.model_input_size = model_input_size
435
  self.mean = mean
436
  self.std = std