pesi
/

Luigi commited on
Commit
06f3435
1 Parent(s): a9e21bc

Make 2 distinct ONNX2TRT conversion scripts, one for JetPack 4.6, the other for JetPack 5.1

Browse files
onnx_to_engine.py → onnx_to_engine-jetpack_4p6.py RENAMED
File without changes
onnx_to_engine-jetpack_5p1.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ #
3
+ # SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
+ # SPDX-License-Identifier: Apache-2.0
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+
19
+ """
20
+ This script demonstrates how to use the Calibrator API provided by Polygraphy
21
+ to calibrate a TensorRT engine to run in INT8 precision.
22
+ """
23
+ import numpy as np
24
+ from polygraphy.backend.trt import Calibrator, CreateConfig, EngineFromNetwork, NetworkFromOnnxPath, TrtRunner, save_engine, load_plugins, Profile
25
+ from termcolor import cprint
26
+ load_plugins(plugins=['libmmdeploy_tensorrt_ops.so'])
27
+ import cv2
28
+ import argparse
29
+
30
+ PREVIEW_CALIBRATOR_OUTPUT = True
31
+
32
+ def calib_data_from_video(batch_size=1):
33
+
34
+ # image preproc3ssing taken from rtmlib
35
+ def preprocess(img: np.ndarray):
36
+ """Do preprocessing for RTMPose model inference.
37
+
38
+ Args:
39
+ img (np.ndarray): Input image in shape.
40
+
41
+ Returns:
42
+ tuple:
43
+ - resized_img (np.ndarray): Preprocessed image.
44
+ - center (np.ndarray): Center of image.
45
+ - scale (np.ndarray): Scale of image.
46
+ """
47
+ if len(img.shape) == 3:
48
+ padded_img = np.ones(
49
+ (MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1], 3),
50
+ dtype=np.uint8) * 114
51
+ else:
52
+ padded_img = np.ones(MODEL_INPUT_SIZE, dtype=np.uint8) * 114
53
+
54
+ ratio = min(MODEL_INPUT_SIZE[0] / img.shape[0],
55
+ MODEL_INPUT_SIZE[1] / img.shape[1])
56
+ resized_img = cv2.resize(
57
+ img,
58
+ (int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
59
+ interpolation=cv2.INTER_LINEAR,
60
+ ).astype(np.uint8)
61
+ padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
62
+ padded_img[:padded_shape[0], :padded_shape[1]] = resized_img
63
+
64
+ return padded_img, ratio
65
+
66
+ cap = cv2.VideoCapture(filename=VIDEO_PATH)
67
+ imgs = []
68
+ while cap.isOpened():
69
+
70
+ success, frame = cap.read()
71
+ if success:
72
+ img, ratio = preprocess(frame) # pad & resize
73
+ img = img.transpose(2, 0, 1) # transpose to 1,3,416,416
74
+ img = np.ascontiguousarray(img, dtype=np.float32) # to f32
75
+ img = img[None, :, :, :] # add batch dim
76
+
77
+ imgs.append(img)
78
+ if len(imgs) == batch_size:
79
+ batch_img = np.vstack(imgs)
80
+ yield {"input": batch_img}
81
+ imgs = []
82
+ # cprint(f'batch_img.shape = {batch_img.shape}', 'yellow')
83
+ else:
84
+ break
85
+
86
+ cap.release()
87
+
88
+ def main(onnx_path, engine_path, batch_size):
89
+
90
+ # We can provide a path or file-like object if we want to cache calibration data.
91
+ # This lets us avoid running calibration the next time we build the engine.
92
+ #
93
+ # TIP: You can use this calibrator with TensorRT APIs directly (e.g. config.int8_calibrator).
94
+ # You don't have to use it with Polygraphy loaders if you don't want to.
95
+ if batch_size < 1: # dynamic batch size
96
+
97
+ profiles = [
98
+ # The low-latency case. For best performance, min == opt == max.
99
+ Profile().add("input",
100
+ min=(1, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1]),
101
+ opt=(4, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1]),
102
+ max=(9, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1])),
103
+ ]
104
+
105
+ else: # fixed
106
+ profiles = [
107
+ # The low-latency case. For best performance, min == opt == max.
108
+ Profile().add("input",
109
+ min=(batch_size, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1]),
110
+ opt=(batch_size, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1]),
111
+ max=(batch_size, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1])),
112
+ ]
113
+
114
+ opt_batch_size = profiles[0]['input'].opt[0]
115
+ calibrator = Calibrator(data_loader=calib_data_from_video(opt_batch_size))
116
+
117
+ # We must enable int8 mode in addition to providing the calibrator.
118
+ build_engine = EngineFromNetwork(
119
+ NetworkFromOnnxPath(f"{onnx_path}"), config=CreateConfig(
120
+ use_dla=False,
121
+ tf32=True,
122
+ fp16=True,
123
+ int8=True,
124
+ precision_constraints="prefer",
125
+ sparse_weights=True,
126
+ calibrator=calibrator,
127
+ profiles=profiles,
128
+ max_workspace_size = 2 * 1024 * 1024 * 1024,
129
+ allow_gpu_fallback=True,
130
+ )
131
+ )
132
+
133
+ # When we activate our runner, it will calibrate and build the engine. If we want to
134
+ # see the logging output from TensorRT, we can temporarily increase logging verbosity:
135
+ save_engine(build_engine, f'{engine_path}')
136
+
137
+ if __name__ == "__main__":
138
+
139
+ parser = argparse.ArgumentParser(description="Process a video file.")
140
+ parser.add_argument("video_path", type=str, help="The path to the video file used to calibrate int8 engine")
141
+ parser.add_argument("onnx_path", type=str, help="The path to the input ONNX model file")
142
+ parser.add_argument("engine_path", type=str, help="The path to the exported TensorRT Engine model file")
143
+ parser.add_argument("--batch_size", type=int, default=-1, help="Input batch size (not specified if dynamic)")
144
+ args = parser.parse_args()
145
+ VIDEO_PATH = args.video_path
146
+ MODEL_INPUT_SIZE=(416,416) if 'rtmo-t' in args.onnx_path else (640,640)
147
+
148
+ if PREVIEW_CALIBRATOR_OUTPUT:
149
+ cprint('You are previwing video used to calibrate TensorRT int8 engine model ...', 'yellow')
150
+ for output_dict in calib_data_from_video():
151
+ if output_dict:
152
+ image = output_dict['input'] # get frame
153
+ image_to_show = image.squeeze(0).transpose(1, 2, 0) / 255.0 # to-uint8 transpose remove batch dim
154
+ cv2.imshow(VIDEO_PATH,image_to_show)
155
+ if cv2.waitKey(1) & 0xFF == ord('q'): # Exit loop if 'q' is pressed
156
+ break
157
+ cv2.destroyAllWindows() # Close all OpenCV windows
158
+
159
+ main(args.onnx_path, args.engine_path, args.batch_size)