File size: 7,499 Bytes
127e10d
 
 
 
 
 
 
 
 
 
 
 
 
 
d0c0220
 
 
 
 
 
127e10d
 
 
 
 
 
 
d0c0220
127e10d
 
 
 
097b358
127e10d
 
 
 
 
 
 
 
 
 
 
 
d0c0220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127e10d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0c0220
0057ce2
d0c0220
 
 
127e10d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8d8df3
127e10d
 
 
 
 
 
 
 
 
 
 
097b358
127e10d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#! /usr/bin/python3

from termcolor import cprint, colored
from super_gradients.common.object_names import Models
from super_gradients.training import models
from super_gradients.conversion import ExportTargetBackend, ExportQuantizationMode, DetectionOutputFormatMode
import time
import cv2
import numpy as np
from super_gradients.training.utils.media.image import load_image
import onnxruntime
import os
from super_gradients.training.utils.visualization.pose_estimation import PoseVisualization
import matplotlib.pyplot as plt
from datasets import load_dataset
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import matplotlib.pyplot as plt


os.environ['CRASH_HANDLER']='0'

# Conversion Setting

CONVERSION = True
input_image_shape = [640, 640]
quantization_modes = [ExportQuantizationMode.INT8, ExportQuantizationMode.FP16, None]
output_predictions_format=DetectionOutputFormatMode.FLAT_FORMAT

# NMS-related Setting
confidence_threshold=.15
nms_threshold=.2
num_pre_nms_predictions=1000
max_predictions_per_image=10

# ONNXruntime Benchmark Setting
BENCHMARK=True
n_run = 1000
n_warm_up = 200
image_name = "https://deci-pretrained-models.s3.amazonaws.com/sample_images/beatles-abbeyroad.jpg"

# Check
SHAPE_CHECK=True
VISUAL_CHECK=True
CALIBRATION_DATASET_CHECK=False

# Function to convert tensor to image for visualization
def tensor_to_image(tensor):
    # Convert the tensor to a numpy array
    numpy_image = tensor.numpy()

    # The output of ToTensor() is in C x H x W format, convert to H x W x C
    numpy_image = numpy_image.transpose(1, 2, 0)

    # Undo the normalization (if any)
    # numpy_image = numpy_image * std + mean  # Adjust based on your normalization

    return numpy_image

class HFDatasetWrapper(Dataset):
    def __init__(self, hf_dataset, transform=None):
        self.hf_dataset = hf_dataset
        self.transform = transform

    def __len__(self):
        return len(self.hf_dataset)

    def __getitem__(self, idx):
        item = self.hf_dataset[idx]
        if self.transform:
            item = self.transform(item)
        return item['image']

def preprocess(data):
    # Convert byte data to PIL Image
    image = data['image']

    # Convert to RGB if not already
    if image.mode != 'RGB':
        image = image.convert('RGB')

    # Define your transformations
    transform = transforms.Compose([
        transforms.Resize((640, 640)),  # Resize (example size)
        transforms.ToTensor(),          # Convert to tensor
        # Add normalization or other transformations if needed
    ])

	# Process Image
    transformed = transform(image)

    if CALIBRATION_DATASET_CHECK:
		# Display the Processed Image
        plt_image = tensor_to_image(transformed)
        plt.imshow(plt_image)
        plt.axis('off')  # Turn off axis numbers
        plt.show()

    return {'image': transformed}

def iterate_over_flat_predictions(predictions, batch_size):
    [flat_predictions] = predictions

    for image_index in range(batch_size):
        mask = flat_predictions[:, 0] == image_index
        pred_bboxes = flat_predictions[mask, 1:5]
        pred_scores = flat_predictions[mask, 5]
        pred_joints = flat_predictions[mask, 6:].reshape((len(pred_bboxes), -1, 3))
        yield image_index, pred_bboxes, pred_scores, pred_joints
        
def show_predictions_from_flat_format(image, predictions):
    image_index, pred_boxes, pred_scores, pred_joints = next(iter(iterate_over_flat_predictions(predictions, 1)))

    image = PoseVisualization.draw_poses(
        image=image, poses=pred_joints, scores=pred_scores, boxes=pred_boxes,
        edge_links=None, edge_colors=None, keypoint_colors=None, is_crowd=None
    )

    plt.figure(figsize=(8, 8))
    plt.imshow(image)
    plt.tight_layout()
    plt.show()
    
image = load_image(image_name)
image = cv2.resize(image, (input_image_shape[1], input_image_shape[0]))
image_bchw = np.transpose(np.expand_dims(image, 0), (0, 3, 1, 2))

# Prepare Calibration Dataset for INT8 Quantization
dataset = load_dataset("cppe-5", split="train")
hf_dataset_wrapper = HFDatasetWrapper(dataset, transform=preprocess)
calibration_loader = DataLoader(hf_dataset_wrapper, batch_size=8)

for model_name in [Models.YOLO_NAS_POSE_L,  Models.YOLO_NAS_POSE_M,  Models.YOLO_NAS_POSE_N,  Models.YOLO_NAS_POSE_S ]:
	for q in quantization_modes:
		
		# Specify Quantization Mode in Exported ONNX Model Name 
		if q == None:
			q_label = 'fp32'
		elif q == ExportQuantizationMode.INT8:
			q_label = 'int8'
		elif q == ExportQuantizationMode.FP16:
			q_label = 'fp16'
		else:
			raise
			
		export_name = f"{model_name}_{q_label}.onnx"

		# Perform Model Conversion from PyTorch to ONNX using Super-Gradiant Official Method
		print(f"1. Convert {colored(model_name,'blue')} from PyTorch to ONNX format using {colored(q_label,'red')} precision, saved as {colored(export_name,'green')}")
		
		if CONVERSION:

			model = models.get(model_name, pretrained_weights="coco_pose")

			export_result = model.export(
				output=export_name,
				confidence_threshold=confidence_threshold,
				nms_threshold=nms_threshold,
				engine=ExportTargetBackend.ONNXRUNTIME,
				quantization_mode=q,
				#selective_quantizer: Optional["SelectiveQuantizer"] = None,  # noqa
				calibration_loader = calibration_loader if q == ExportQuantizationMode.INT8 else None,
				#calibration_method: str = "percentile",
				#calibration_batches: int = 16,
				#calibration_percentile: float = 99.99,
				preprocessing=True,
				postprocessing=True,
				#postprocessing_kwargs: Optional[dict] = None,
				batch_size=1,
				input_image_shape=input_image_shape,
				#input_image_channels: Optional[int] = None,
				#input_image_dtype: Optional[torch.dtype] = None,
				max_predictions_per_image=max_predictions_per_image,
				onnx_export_kwargs={"opset_version":14},
				onnx_simplify=True,
				#device: Optional[Union[torch.device, str]] = None,
				output_predictions_format=output_predictions_format,
				num_pre_nms_predictions=num_pre_nms_predictions,
				)

			# Export Also Model Usage in Text
			usage_name = export_name + '.usage.txt'
			with open(usage_name, 'w') as f:
				f.write(str(export_result))
			print(f"1.1 Related usage to {colored(export_name, 'green')} has been stored to {colored(usage_name,'yellow')}")
				
		if BENCHMARK:
			# Perform Inference on ONNXruntime
			session = onnxruntime.InferenceSession(export_name, providers=['CUDAExecutionProvider',"CPUExecutionProvider"])
			inputs = [o.name for o in session.get_inputs()]
			outputs = [o.name for o in session.get_outputs()]

			# Detection Result Shape
			for i in range(n_warm_up): result = session.run(outputs, {inputs[0]: image_bchw})

			t=time.time()
			for i in range(n_run): result = session.run(outputs, {inputs[0]: image_bchw})
			latency=(time.time()-t)/n_run
			fps = round(1/latency,2)

			print(f'2. Averaged FPS: {colored(fps, "red")}')

		if SHAPE_CHECK: 
			for image_index, pred_bboxes, pred_scores, pred_joints in iterate_over_flat_predictions(result, batch_size=1):

				N = pred_scores.shape[0]

				for i in range(N):
					print(f'Detected Object {colored(i,"green")}')
					print(f'Predicted Bounding Box (Dimension: 1 x 4)', pred_bboxes[i,:])
					print(f'Pose Confidence (scalar)', pred_scores[i])
					print(f'Predicted Joints (Dimension: 3 x 17)', pred_joints[i,:,:])

		if VISUAL_CHECK:
			# Detection Result Visual Check
			show_predictions_from_flat_format(image, result)