File size: 7,048 Bytes
92676db 09ccc6e 92676db 09ccc6e 92676db 09ccc6e 92676db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
#!/usr/bin/env python3
#
# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
This script demonstrates how to use the Calibrator API provided by Polygraphy
to calibrate a TensorRT engine to run in INT8 precision.
"""
import numpy as np
from polygraphy.backend.trt import Calibrator, CreateConfig, EngineFromNetwork, NetworkFromOnnxPath, TrtRunner, save_engine, load_plugins, Profile
from termcolor import cprint
load_plugins(plugins=['libmmdeploy_tensorrt_ops.so'])
import cv2
import argparse
PREVIEW_CALIBRATOR_OUTPUT = True
def calib_data_from_video():
# image preproc3ssing taken from rtmlib
def preprocess(img: np.ndarray):
"""Do preprocessing for RTMPose model inference.
Args:
img (np.ndarray): Input image in shape.
Returns:
tuple:
- resized_img (np.ndarray): Preprocessed image.
- center (np.ndarray): Center of image.
- scale (np.ndarray): Scale of image.
"""
if len(img.shape) == 3:
padded_img = np.ones(
(MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1], 3),
dtype=np.uint8) * 114
else:
padded_img = np.ones(MODEL_INPUT_SIZE, dtype=np.uint8) * 114
ratio = min(MODEL_INPUT_SIZE[0] / img.shape[0],
MODEL_INPUT_SIZE[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
interpolation=cv2.INTER_LINEAR,
).astype(np.uint8)
padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
padded_img[:padded_shape[0], :padded_shape[1]] = resized_img
return padded_img, ratio
cap = cv2.VideoCapture(filename=VIDEO_PATH)
while cap.isOpened():
success, frame = cap.read()
batch_id=0
if success:
img, ratio = preprocess(frame) # pad & resize
img = img.transpose(2, 0, 1) # transpose to 1,3,416,416
img = np.ascontiguousarray(img, dtype=np.float32) # to f32
#print(img.shape)
img = img[None, :, :, :] # add batch dim
# # Yield a dictionary mapping the input name of your model to the generated data
yield {"input": img}
else:
break
cap.release()
def main(onnx_path, engine_path, batch_size):
# We can provide a path or file-like object if we want to cache calibration data.
# This lets us avoid running calibration the next time we build the engine.
#
# TIP: You can use this calibrator with TensorRT APIs directly (e.g. config.int8_calibrator).
# You don't have to use it with Polygraphy loaders if you don't want to.
calibrator = Calibrator(data_loader=calib_data_from_video(), cache=f"{onnx_path}-calib.cache")
if batch_size < 1: # dynamic batch size
profiles = [
# The low-latency case. For best performance, min == opt == max.
Profile().add("input",
min=(1, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1]),
opt=(4, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1]),
max=(9, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1])),
]
else: # fixed
profiles = [
# The low-latency case. For best performance, min == opt == max.
Profile().add("input",
min=(batch_size, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1]),
opt=(batch_size, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1]),
max=(batch_size, 3, MODEL_INPUT_SIZE[0], MODEL_INPUT_SIZE[1])),
]
# We must enable int8 mode in addition to providing the calibrator.
build_engine = EngineFromNetwork(
NetworkFromOnnxPath(f"{onnx_path}"), config=CreateConfig(
use_dla=False,
tf32=True,
fp16=True,
int8=True,
obey_precision_constraints=False,
sparse_weights=True,
calibrator=calibrator,
profiles=profiles,
max_workspace_size = 2 * 1024 * 1024 * 1024,
allow_gpu_fallback=True
)
)
# When we activate our runner, it will calibrate and build the engine. If we want to
# see the logging output from TensorRT, we can temporarily increase logging verbosity:
save_engine(build_engine, f'{engine_path}')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process a video file.")
parser.add_argument("video_path", type=str, help="The path to the video file used to calibrate int8 engine")
parser.add_argument("onnx_path", type=str, help="The path to the input ONNX model file")
parser.add_argument("engine_path", type=str, help="The path to the exported TensorRT Engine model file")
parser.add_argument("--batch_size", type=int, default=-1, help="Input batch size (not specified if dynamic)")
args = parser.parse_args()
VIDEO_PATH = args.video_path
MODEL_INPUT_SIZE=(416,416) if 'rtmo-t' in args.onnx_path else (640,640)
if PREVIEW_CALIBRATOR_OUTPUT:
cprint('You are previwing video used to calibrate TensorRT int8 engine model ...', 'yellow')
for output_dict in calib_data_from_video():
if output_dict:
image = output_dict['input'] # get frame
image_to_show = image.squeeze(0).transpose(1, 2, 0) / 255.0 # to-uint8 transpose remove batch dim
cv2.imshow(VIDEO_PATH,image_to_show)
if cv2.waitKey(1) & 0xFF == ord('q'): # Exit loop if 'q' is pressed
break
cv2.destroyAllWindows() # Close all OpenCV windows
main(args.onnx_path, args.engine_path, args.batch_size)
|