pesi
/

Luigi commited on
Commit
42892de
1 Parent(s): 15801f5

Unify batched and non-batched versions

Browse files
Files changed (4) hide show
  1. demo_batch.sh +1 -1
  2. rtmo_demo.py +32 -23
  3. rtmo_demo_batch.py +2 -2
  4. rtmo_gpu.py +37 -2
demo_batch.sh CHANGED
@@ -1,2 +1,2 @@
1
  #!/bin/sh
2
- python3 rtmo_demo_batch.py ./video rtmo-t.fp16.onnx 4
 
1
  #!/bin/sh
2
+ python3 rtmo_demo.py ./video rtmo-t.fp16.onnx --batch_size 4
rtmo_demo.py CHANGED
@@ -5,7 +5,8 @@ import cv2
5
  from pathlib import Path
6
  import argparse
7
  import os
8
- from rtmo_gpu import RTMO_GPU, draw_skeleton, resize_to_fit_screen
 
9
 
10
  if __name__ == "__main__":
11
 
@@ -14,44 +15,52 @@ if __name__ == "__main__":
14
  parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
15
  parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX (or engine) model file (required)')
16
  parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
 
17
 
18
  # Parse the command-line arguments
19
  args = parser.parse_args()
20
 
21
  model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
22
 
23
- body = RTMO_GPU(model=model, is_yolo_nas_pose=args.yolo_nas_pose)
24
 
25
  for mp4_path in Path(args.path).glob('*'):
26
 
27
  # Now, use the best.url, which is the direct video link for streaming
28
  cap = cv2.VideoCapture(filename=os.path.abspath(mp4_path))
29
-
30
  frame_idx = 0
31
-
32
  while cap.isOpened():
33
  success, frame = cap.read()
34
  frame_idx += 1
 
35
 
36
  if not success:
37
  break
38
- s = time.time()
39
  keypoints, scores = body(frame)
40
- det_time = time.time() - s
41
- fps = round(1.0 / det_time,1)
42
- print(f'det: {fps} FPS')
43
-
44
- img_show = frame.copy()
45
-
46
- # if you want to use black background instead of original image,
47
- # img_show = np.zeros(img_show.shape, dtype=np.uint8)
48
-
49
- img_show = draw_skeleton(img_show,
50
- keypoints,
51
- scores,
52
- kpt_thr=0.3,
53
- line_width=2)
54
- img_show = resize_to_fit_screen(img_show, 720, 480)
55
- cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
56
- cv2.imshow(f'{model}', img_show)
57
- cv2.waitKey(10)
 
 
 
 
 
 
 
5
  from pathlib import Path
6
  import argparse
7
  import os
8
+ from rtmo_gpu import RTMO_GPU_Batch, draw_skeleton, resize_to_fit_screen
9
+ from queue import Queue
10
 
11
  if __name__ == "__main__":
12
 
 
15
  parser.add_argument('path', type=str, help='Path to the folder containing video files (required)')
16
  parser.add_argument('model_path', type=str, help='Path to a RTMO ONNX (or engine) model file (required)')
17
  parser.add_argument('--yolo_nas_pose', action='store_true', help='Use YOLO NAS Pose (flat format only) instead of RTMO Model')
18
+ parser.add_argument('--batch_size', type=int, default=1, help='Path to a RTMO ONNX input batch size')
19
 
20
  # Parse the command-line arguments
21
  args = parser.parse_args()
22
 
23
  model = args.model_path # 'rtmo-s_8xb32-600e_body7-640x640.onnx'
24
 
25
+ body = RTMO_GPU_Batch(model=model, is_yolo_nas_pose=args.yolo_nas_pose, batch_size=args.batch_size)
26
 
27
  for mp4_path in Path(args.path).glob('*'):
28
 
29
  # Now, use the best.url, which is the direct video link for streaming
30
  cap = cv2.VideoCapture(filename=os.path.abspath(mp4_path))
31
+ in_queue = Queue(maxsize=args.batch_size)
32
  frame_idx = 0
33
+ s = time.time()
34
  while cap.isOpened():
35
  success, frame = cap.read()
36
  frame_idx += 1
37
+ in_queue.put(frame)
38
 
39
  if not success:
40
  break
41
+
42
  keypoints, scores = body(frame)
43
+
44
+ if keypoints is not None:
45
+ if frame_idx % args.batch_size == 0 and frame_idx:
46
+ current_time = time.time()
47
+ det_time = current_time - s
48
+ fps = round(args.batch_size / det_time, 1)
49
+ print(f'det: {fps} FPS')
50
+ s = current_time
51
+
52
+ frame = in_queue.get()
53
+ img_show = frame.copy()
54
+
55
+ # if you want to use black background instead of original image,
56
+ # img_show = np.zeros(img_show.shape, dtype=np.uint8)
57
+
58
+ img_show = draw_skeleton(img_show,
59
+ keypoints,
60
+ scores,
61
+ kpt_thr=0.3,
62
+ line_width=2)
63
+ img_show = resize_to_fit_screen(img_show, 720, 480)
64
+ cv2.putText(img_show, f'{fps:.1f}', (10, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1, cv2.LINE_AA)
65
+ cv2.imshow(f'{model}', img_show)
66
+ cv2.waitKey(10)
rtmo_demo_batch.py CHANGED
@@ -24,7 +24,7 @@ def process_video(video_path, body_estimator, batch_size=4):
24
  # Process the batch when it's full
25
  if len(batch_frames) == batch_size:
26
  s = time.time()
27
- batch_keypoints, batch_scores = body_estimator(batch_frames)
28
  det_time = time.time() - s
29
  fps = round(batch_size / det_time, 1)
30
  print(f'Batch det: {fps} FPS')
@@ -52,7 +52,7 @@ def process_video(video_path, body_estimator, batch_size=4):
52
 
53
  # Option 2: Duplicate the last frame
54
  batch_frames.append(batch_frames[-1])
55
- batch_keypoints, batch_scores = body_estimator(batch_frames)
56
  for i, keypoints in enumerate(batch_keypoints):
57
  scores = batch_scores[i]
58
  frame = batch_frames[i]
 
24
  # Process the batch when it's full
25
  if len(batch_frames) == batch_size:
26
  s = time.time()
27
+ batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
28
  det_time = time.time() - s
29
  fps = round(batch_size / det_time, 1)
30
  print(f'Batch det: {fps} FPS')
 
52
 
53
  # Option 2: Duplicate the last frame
54
  batch_frames.append(batch_frames[-1])
55
+ batch_keypoints, batch_scores = body_estimator.__batch_call__(batch_frames)
56
  for i, keypoints in enumerate(batch_keypoints):
57
  scores = batch_scores[i]
58
  frame = batch_frames[i]
rtmo_gpu.py CHANGED
@@ -3,6 +3,7 @@ import numpy as np
3
  from typing import List, Tuple
4
  import onnxruntime as ort
5
  import cv2
 
6
  os.environ['ORT_TENSORRT_EXTRA_PLUGIN_LIB_PATHS']='libmmdeploy_tensorrt_ops.so'
7
 
8
  # dictionary from https://github.com/Tau-J/rtmlib/blob/4b29101d54b611048ef165277cebfffff3030074/rtmlib/visualization/skeleton/coco17.py
@@ -458,6 +459,7 @@ class RTMO_GPU(object):
458
  'cudnn_conv_algo_search': 'DEFAULT',
459
  'cudnn_conv_use_max_workspace': True
460
  }),
 
461
  'CPUExecutionProvider']}
462
 
463
  self.session = ort.InferenceSession(path_or_bytes=model,
@@ -547,7 +549,7 @@ class RTMO_GPU_Batch(RTMO_GPU):
547
  self,
548
  outputs: List[np.ndarray],
549
  ratios: List[float]
550
- ) -> List[Tuple[np.ndarray, np.ndarray]]:
551
  """Process outputs for a batch of images.
552
 
553
  Args:
@@ -569,11 +571,44 @@ class RTMO_GPU_Batch(RTMO_GPU):
569
 
570
  return batch_keypoints, batch_scores
571
 
572
- def __call__(self, images: List[np.ndarray]):
573
  batch_img, ratios = self.preprocess_batch(images)
574
  outputs = self.inference(batch_img)
575
  keypoints, scores = self.postprocess_batch(outputs, ratios)
576
  return keypoints, scores
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
 
578
  def resize_to_fit_screen(image, screen_width, screen_height):
579
  # Get the dimensions of the image
 
3
  from typing import List, Tuple
4
  import onnxruntime as ort
5
  import cv2
6
+ from queue import Queue
7
  os.environ['ORT_TENSORRT_EXTRA_PLUGIN_LIB_PATHS']='libmmdeploy_tensorrt_ops.so'
8
 
9
  # dictionary from https://github.com/Tau-J/rtmlib/blob/4b29101d54b611048ef165277cebfffff3030074/rtmlib/visualization/skeleton/coco17.py
 
459
  'cudnn_conv_algo_search': 'DEFAULT',
460
  'cudnn_conv_use_max_workspace': True
461
  }),
462
+ 'OpenVINOExecutionProvider',
463
  'CPUExecutionProvider']}
464
 
465
  self.session = ort.InferenceSession(path_or_bytes=model,
 
549
  self,
550
  outputs: List[np.ndarray],
551
  ratios: List[float]
552
+ ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
553
  """Process outputs for a batch of images.
554
 
555
  Args:
 
571
 
572
  return batch_keypoints, batch_scores
573
 
574
+ def __batch_call__(self, images: List[np.ndarray]):
575
  batch_img, ratios = self.preprocess_batch(images)
576
  outputs = self.inference(batch_img)
577
  keypoints, scores = self.postprocess_batch(outputs, ratios)
578
  return keypoints, scores
579
+
580
+ def __call__(self, image: np.array):
581
+ self.buffer.append(image)
582
+
583
+ if len(self.buffer) == self.batch_size:
584
+ b_keypoints, b_scores = self.__batch_call__(self.buffer)
585
+ for keypoints, scores in zip(b_keypoints, b_scores):
586
+ self.out_queue.put((keypoints, scores))
587
+ self.buffer = []
588
+
589
+ keypoints, scores = None, None
590
+ if not self.out_queue.empty():
591
+ keypoints, scores = self.out_queue.get()
592
+
593
+ return keypoints, scores
594
+
595
+
596
+ def __init__(self,
597
+ model: str = None,
598
+ mean: tuple = None,
599
+ std: tuple = None,
600
+ device: str = 'cuda',
601
+ is_yolo_nas_pose = False,
602
+ batch_size: int = 1):
603
+ super().__init__(model,
604
+ mean,
605
+ std,
606
+ device,
607
+ is_yolo_nas_pose)
608
+
609
+ self.batch_size = batch_size
610
+ self.out_queue = Queue(maxsize=self.batch_size)
611
+ self.buffer = []
612
 
613
  def resize_to_fit_screen(image, screen_width, screen_height):
614
  # Get the dimensions of the image