diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..c7fcddab4f21c68d561884a13383bcf6f87d430d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,35 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_l_fp16.onnx.best.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_l_fp16.onnx.fp16.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_l_fp32.onnx.best.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_l_fp32.onnx.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_l_fp32.onnx.fp16.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_l_fp32.onnx.int8.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_l_int8.onnx.best.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_l_int8.onnx.int8.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_m_fp16.onnx.best.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_m_fp16.onnx.fp16.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_m_fp32.onnx.best.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_m_fp32.onnx.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_m_fp32.onnx.fp16.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_m_fp32.onnx.int8.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_m_int8.onnx.best.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_m_int8.onnx.int8.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_n_fp16.onnx.best.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_n_fp16.onnx.fp16.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_n_fp32.onnx.best.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_n_fp32.onnx.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_n_fp32.onnx.fp16.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_n_fp32.onnx.int8.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_n_int8.onnx.best.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_n_int8.onnx.int8.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_s_fp16.onnx.best.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_s_fp16.onnx.fp16.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_s_fp32.onnx.best.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_s_fp32.onnx.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_s_fp32.onnx.fp16.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_s_fp32.onnx.int8.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_s_int8.onnx.best.engine filter=lfs diff=lfs merge=lfs -text +yolo_nas_pose_s_int8.onnx.int8.engine filter=lfs diff=lfs merge=lfs -text diff --git a/benchmark_with_trtexec.sh b/benchmark_with_trtexec.sh new file mode 100644 index 0000000000000000000000000000000000000000..6b4ea04309b1b2ac97fe12d8515dfe32bfc50a7d --- /dev/null +++ b/benchmark_with_trtexec.sh @@ -0,0 +1,37 @@ +#!/bin/sh + +#/usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp32.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp32.onnx.fp32.engine > yolo_nas_pose_n_fp32.onnx.fp32.engine.log 2> yolo_nas_pose_n_fp32.onnx.fp32.engine.err +#/usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp32.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp32.onnx.fp16.engine > yolo_nas_pose_n_fp32.onnx.fp16.engine.log 2> yolo_nas_pose_n_fp32.onnx.fp16.engine.err +#/usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp32.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp32.onnx.best.engine > yolo_nas_pose_n_fp32.onnx.best.engine.log 2> yolo_nas_pose_n_fp32.onnx.best.engine.err +#/usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp32.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp32.onnx.int8.engine > yolo_nas_pose_n_fp32.onnx.int8.engine.log 2> yolo_nas_pose_n_fp32.onnx.int8.engine.err +#/usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_int8.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_int8.onnx.int8.engine > yolo_nas_pose_n_int8.onnx.int8.engine.log 2> yolo_nas_pose_n_int8.onnx.int8.engine.err +#/usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp32.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp32.onnx.fp32.engine > yolo_nas_pose_s_fp32.onnx.fp32.engine.log 2> yolo_nas_pose_s_fp32.onnx.fp32.engine.err +#/usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp32.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp32.onnx.fp16.engine > yolo_nas_pose_s_fp32.onnx.fp16.engine.log 2> yolo_nas_pose_s_fp32.onnx.fp16.engine.err +#/usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp32.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp32.onnx.best.engine > yolo_nas_pose_s_fp32.onnx.best.engine.log 2> yolo_nas_pose_s_fp32.onnx.best.engine.err +#/usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp32.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp32.onnx.int8.engine > yolo_nas_pose_s_fp32.onnx.int8.engine.log 2> yolo_nas_pose_s_fp32.onnx.int8.engine.err +#/usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_int8.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_int8.onnx.int8.engine > yolo_nas_pose_s_int8.onnx.int8.engine.log 2> yolo_nas_pose_s_int8.onnx.int8.engine.err + +onnx_models=( + "yolo_nas_pose_n_fp32" + "yolo_nas_pose_n_fp16" + "yolo_nas_pose_n_int8" + "yolo_nas_pose_s_fp32" + "yolo_nas_pose_s_fp16" + "yolo_nas_pose_s_int8" + "yolo_nas_pose_m_fp32" + "yolo_nas_pose_m_fp16" + "yolo_nas_pose_m_int8" + "yolo_nas_pose_l_fp32" + "yolo_nas_pose_l_fp16" + "yolo_nas_pose_l_int8" +) + +for onnx in "${onnx_models[@]}"; do + for opt in "" "--fp16" "--best" "--int8"; do + suffix="" + if [ ! -z "$opt" ]; then + suffix=".${opt:2}" + fi + /usr/src/tensorrt/bin/trtexec --onnx=${onnx}.onnx $opt --avgRuns=100 --duration=15 --saveEngine=${onnx}.onnx${suffix}.engine > ${onnx}.onnx${suffix}.engine.log 2> ${onnx}.onnx${suffix}.engine.err + done +done diff --git a/yolo_nas_pose_l_fp16.onnx b/yolo_nas_pose_l_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..41c9fcb2502dbf12717e6b6705367cdfeb1d45b2 --- /dev/null +++ b/yolo_nas_pose_l_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4da0316c679b93931da1a30d1224d280991d2350354cb8df7338b0affc154e3 +size 108987993 diff --git a/yolo_nas_pose_l_fp16.onnx.best.engine b/yolo_nas_pose_l_fp16.onnx.best.engine new file mode 100644 index 0000000000000000000000000000000000000000..7df7e8a61fb608c59974d88c9567aa00e1cc687c --- /dev/null +++ b/yolo_nas_pose_l_fp16.onnx.best.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a88cce7911337fa5283e63dca004d66fef580a25ee5c322aad72f974c1259e3 +size 57159395 diff --git a/yolo_nas_pose_l_fp16.onnx.best.engine.err b/yolo_nas_pose_l_fp16.onnx.best.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..65fabce400c958197332ab6c8e52dcefd16bfd16 --- /dev/null +++ b/yolo_nas_pose_l_fp16.onnx.best.engine.err @@ -0,0 +1,364 @@ +[12/28/2023-17:15:27] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-17:15:27] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-17:15:27] [W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool. +[12/28/2023-17:48:46] [W] [TRT] Tactic Device request: 4720MB Available: 2777MB. Device memory is insufficient to use tactic. +[12/28/2023-17:48:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4720 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:48:47] [W] [TRT] Tactic Device request: 4720MB Available: 2777MB. Device memory is insufficient to use tactic. +[12/28/2023-17:48:47] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4720 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:48:47] [W] [TRT] Tactic Device request: 4720MB Available: 2777MB. Device memory is insufficient to use tactic. +[12/28/2023-17:48:47] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4720 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:48:52] [W] [TRT] Tactic Device request: 4706MB Available: 2784MB. Device memory is insufficient to use tactic. +[12/28/2023-17:48:53] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4706 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:48:53] [W] [TRT] Tactic Device request: 4706MB Available: 2784MB. Device memory is insufficient to use tactic. +[12/28/2023-17:48:53] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4706 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:48:57] [W] [TRT] Tactic Device request: 4711MB Available: 2755MB. Device memory is insufficient to use tactic. +[12/28/2023-17:48:57] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:48:58] [W] [TRT] Tactic Device request: 4711MB Available: 2755MB. Device memory is insufficient to use tactic. +[12/28/2023-17:48:58] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:48:59] [W] [TRT] Tactic Device request: 4711MB Available: 2755MB. Device memory is insufficient to use tactic. +[12/28/2023-17:48:59] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:49:04] [W] [TRT] Tactic Device request: 4702MB Available: 2754MB. Device memory is insufficient to use tactic. +[12/28/2023-17:49:04] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4702 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:49:04] [W] [TRT] Tactic Device request: 4702MB Available: 2755MB. Device memory is insufficient to use tactic. +[12/28/2023-17:49:04] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4702 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:49:08] [W] [TRT] Tactic Device request: 4711MB Available: 2711MB. Device memory is insufficient to use tactic. +[12/28/2023-17:49:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:49:09] [W] [TRT] Tactic Device request: 4711MB Available: 2711MB. Device memory is insufficient to use tactic. +[12/28/2023-17:49:09] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:49:09] [W] [TRT] Tactic Device request: 4711MB Available: 2711MB. Device memory is insufficient to use tactic. +[12/28/2023-17:49:10] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:49:15] [W] [TRT] Tactic Device request: 4702MB Available: 2710MB. Device memory is insufficient to use tactic. +[12/28/2023-17:49:15] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4702 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:49:15] [W] [TRT] Tactic Device request: 4702MB Available: 2710MB. Device memory is insufficient to use tactic. +[12/28/2023-17:49:15] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4702 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:56:52] [W] [TRT] Tactic Device request: 4711MB Available: 2405MB. Device memory is insufficient to use tactic. +[12/28/2023-17:56:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:56:52] [W] [TRT] Tactic Device request: 4711MB Available: 2407MB. Device memory is insufficient to use tactic. +[12/28/2023-17:56:52] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:56:52] [W] [TRT] Tactic Device request: 4711MB Available: 2407MB. Device memory is insufficient to use tactic. +[12/28/2023-17:56:52] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:56:54] [W] [TRT] Tactic Device request: 4701MB Available: 2406MB. Device memory is insufficient to use tactic. +[12/28/2023-17:56:54] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4701 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:56:54] [W] [TRT] Tactic Device request: 4701MB Available: 2407MB. Device memory is insufficient to use tactic. +[12/28/2023-17:56:54] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4701 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:57:13] [W] [TRT] Tactic Device request: 6275MB Available: 2406MB. Device memory is insufficient to use tactic. +[12/28/2023-17:57:13] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:57:13] [W] [TRT] Tactic Device request: 6275MB Available: 2406MB. Device memory is insufficient to use tactic. +[12/28/2023-17:57:13] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6275 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:57:13] [W] [TRT] Tactic Device request: 6275MB Available: 2406MB. Device memory is insufficient to use tactic. +[12/28/2023-17:57:13] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:57:15] [W] [TRT] Tactic Device request: 6270MB Available: 2405MB. Device memory is insufficient to use tactic. +[12/28/2023-17:57:15] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:57:15] [W] [TRT] Tactic Device request: 6270MB Available: 2405MB. Device memory is insufficient to use tactic. +[12/28/2023-17:57:15] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:08:00] [W] [TRT] Tactic Device request: 7056MB Available: 2739MB. Device memory is insufficient to use tactic. +[12/28/2023-18:08:00] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:08:01] [W] [TRT] Tactic Device request: 7056MB Available: 2738MB. Device memory is insufficient to use tactic. +[12/28/2023-18:08:01] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:08:01] [W] [TRT] Tactic Device request: 7056MB Available: 2738MB. Device memory is insufficient to use tactic. +[12/28/2023-18:08:01] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:08:03] [W] [TRT] Tactic Device request: 7050MB Available: 2738MB. Device memory is insufficient to use tactic. +[12/28/2023-18:08:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7050 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:08:03] [W] [TRT] Tactic Device request: 7050MB Available: 2739MB. Device memory is insufficient to use tactic. +[12/28/2023-18:08:03] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 7050 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:08:23] [W] [TRT] Tactic Device request: 6354MB Available: 2732MB. Device memory is insufficient to use tactic. +[12/28/2023-18:08:24] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:08:24] [W] [TRT] Tactic Device request: 6354MB Available: 2731MB. Device memory is insufficient to use tactic. +[12/28/2023-18:08:24] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6354 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:08:24] [W] [TRT] Tactic Device request: 6354MB Available: 2732MB. Device memory is insufficient to use tactic. +[12/28/2023-18:08:24] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:08:26] [W] [TRT] Tactic Device request: 6351MB Available: 2732MB. Device memory is insufficient to use tactic. +[12/28/2023-18:08:26] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6351 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:08:26] [W] [TRT] Tactic Device request: 6351MB Available: 2732MB. Device memory is insufficient to use tactic. +[12/28/2023-18:08:26] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6351 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:23:59] [W] [TRT] Tactic Device request: 6540MB Available: 2641MB. Device memory is insufficient to use tactic. +[12/28/2023-18:23:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6540 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:23:59] [W] [TRT] Tactic Device request: 6540MB Available: 2641MB. Device memory is insufficient to use tactic. +[12/28/2023-18:23:59] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6540 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:23:59] [W] [TRT] Tactic Device request: 6540MB Available: 2641MB. Device memory is insufficient to use tactic. +[12/28/2023-18:23:59] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6540 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:24:01] [W] [TRT] Tactic Device request: 6538MB Available: 2642MB. Device memory is insufficient to use tactic. +[12/28/2023-18:24:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6538 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:24:01] [W] [TRT] Tactic Device request: 6538MB Available: 2641MB. Device memory is insufficient to use tactic. +[12/28/2023-18:24:01] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6538 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:32:34] [W] [TRT] Tactic Device request: 2457MB Available: 2443MB. Device memory is insufficient to use tactic. +[12/28/2023-18:32:34] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:32:34] [W] [TRT] Tactic Device request: 2457MB Available: 2444MB. Device memory is insufficient to use tactic. +[12/28/2023-18:32:34] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:32:34] [W] [TRT] Tactic Device request: 2457MB Available: 2444MB. Device memory is insufficient to use tactic. +[12/28/2023-18:32:34] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:32:35] [W] [TRT] Tactic Device request: 2456MB Available: 2444MB. Device memory is insufficient to use tactic. +[12/28/2023-18:32:35] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:32:35] [W] [TRT] Tactic Device request: 2456MB Available: 2444MB. Device memory is insufficient to use tactic. +[12/28/2023-18:32:35] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:33:03] [W] [TRT] Tactic Device request: 3587MB Available: 2405MB. Device memory is insufficient to use tactic. +[12/28/2023-18:33:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:33:03] [W] [TRT] Tactic Device request: 3587MB Available: 2405MB. Device memory is insufficient to use tactic. +[12/28/2023-18:33:03] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:33:03] [W] [TRT] Tactic Device request: 3587MB Available: 2405MB. Device memory is insufficient to use tactic. +[12/28/2023-18:33:03] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:33:04] [W] [TRT] Tactic Device request: 3585MB Available: 2405MB. Device memory is insufficient to use tactic. +[12/28/2023-18:33:04] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3585 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:33:04] [W] [TRT] Tactic Device request: 3585MB Available: 2405MB. Device memory is insufficient to use tactic. +[12/28/2023-18:33:04] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3585 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:45:49] [W] [TRT] Tactic Device request: 3556MB Available: 2253MB. Device memory is insufficient to use tactic. +[12/28/2023-18:45:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:45:49] [W] [TRT] Tactic Device request: 3556MB Available: 2254MB. Device memory is insufficient to use tactic. +[12/28/2023-18:45:49] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:45:49] [W] [TRT] Tactic Device request: 3556MB Available: 2254MB. Device memory is insufficient to use tactic. +[12/28/2023-18:45:49] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:45:50] [W] [TRT] Tactic Device request: 3551MB Available: 2254MB. Device memory is insufficient to use tactic. +[12/28/2023-18:45:50] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3551 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:45:50] [W] [TRT] Tactic Device request: 3551MB Available: 2254MB. Device memory is insufficient to use tactic. +[12/28/2023-18:45:50] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3551 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:45:52] [W] [TRT] Tactic Device request: 3140MB Available: 2253MB. Device memory is insufficient to use tactic. +[12/28/2023-18:45:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:45:52] [W] [TRT] Tactic Device request: 3140MB Available: 2253MB. Device memory is insufficient to use tactic. +[12/28/2023-18:45:52] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:45:52] [W] [TRT] Tactic Device request: 3140MB Available: 2253MB. Device memory is insufficient to use tactic. +[12/28/2023-18:45:52] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:45:53] [W] [TRT] Tactic Device request: 3136MB Available: 2253MB. Device memory is insufficient to use tactic. +[12/28/2023-18:45:53] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3136 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:45:53] [W] [TRT] Tactic Device request: 3136MB Available: 2253MB. Device memory is insufficient to use tactic. +[12/28/2023-18:45:53] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3136 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:57:51] [W] [TRT] Tactic Device request: 3161MB Available: 2021MB. Device memory is insufficient to use tactic. +[12/28/2023-18:57:51] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3161 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:57:51] [W] [TRT] Tactic Device request: 3161MB Available: 2021MB. Device memory is insufficient to use tactic. +[12/28/2023-18:57:51] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3161 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:57:51] [W] [TRT] Tactic Device request: 3161MB Available: 2021MB. Device memory is insufficient to use tactic. +[12/28/2023-18:57:51] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3161 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:57:52] [W] [TRT] Tactic Device request: 3156MB Available: 2021MB. Device memory is insufficient to use tactic. +[12/28/2023-18:57:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3156 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:57:52] [W] [TRT] Tactic Device request: 3156MB Available: 2021MB. Device memory is insufficient to use tactic. +[12/28/2023-18:57:52] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3156 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:57:59] [W] [TRT] Tactic Device request: 4189MB Available: 2022MB. Device memory is insufficient to use tactic. +[12/28/2023-18:57:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4189 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:57:59] [W] [TRT] Tactic Device request: 4189MB Available: 2015MB. Device memory is insufficient to use tactic. +[12/28/2023-18:57:59] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4189 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:58:00] [W] [TRT] Tactic Device request: 4189MB Available: 2016MB. Device memory is insufficient to use tactic. +[12/28/2023-18:58:00] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4189 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:58:04] [W] [TRT] Tactic Device request: 4183MB Available: 2018MB. Device memory is insufficient to use tactic. +[12/28/2023-18:58:04] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4183 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:58:04] [W] [TRT] Tactic Device request: 4183MB Available: 2017MB. Device memory is insufficient to use tactic. +[12/28/2023-18:58:04] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4183 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:58:08] [W] [TRT] Tactic Device request: 4186MB Available: 2017MB. Device memory is insufficient to use tactic. +[12/28/2023-18:58:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4186 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:58:08] [W] [TRT] Tactic Device request: 4186MB Available: 2015MB. Device memory is insufficient to use tactic. +[12/28/2023-18:58:08] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4186 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:58:09] [W] [TRT] Tactic Device request: 4186MB Available: 2016MB. Device memory is insufficient to use tactic. +[12/28/2023-18:58:09] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4186 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:58:13] [W] [TRT] Tactic Device request: 4182MB Available: 2016MB. Device memory is insufficient to use tactic. +[12/28/2023-18:58:13] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4182 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-18:58:13] [W] [TRT] Tactic Device request: 4182MB Available: 2015MB. Device memory is insufficient to use tactic. +[12/28/2023-18:58:13] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4182 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:10:51] [W] [TRT] Tactic Device request: 4764MB Available: 1955MB. Device memory is insufficient to use tactic. +[12/28/2023-19:10:51] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4764 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:10:52] [W] [TRT] Tactic Device request: 4764MB Available: 1953MB. Device memory is insufficient to use tactic. +[12/28/2023-19:10:52] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4764 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:10:52] [W] [TRT] Tactic Device request: 4764MB Available: 1953MB. Device memory is insufficient to use tactic. +[12/28/2023-19:10:52] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4764 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:10:53] [W] [TRT] Tactic Device request: 4761MB Available: 1954MB. Device memory is insufficient to use tactic. +[12/28/2023-19:10:53] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4761 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:10:53] [W] [TRT] Tactic Device request: 4761MB Available: 1954MB. Device memory is insufficient to use tactic. +[12/28/2023-19:10:53] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4761 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:11:02] [W] [TRT] Tactic Device request: 4244MB Available: 1769MB. Device memory is insufficient to use tactic. +[12/28/2023-19:11:02] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4244 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:11:03] [W] [TRT] Tactic Device request: 4244MB Available: 1768MB. Device memory is insufficient to use tactic. +[12/28/2023-19:11:03] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4244 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:11:04] [W] [TRT] Tactic Device request: 4244MB Available: 1775MB. Device memory is insufficient to use tactic. +[12/28/2023-19:11:04] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4244 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:11:09] [W] [TRT] Tactic Device request: 4240MB Available: 1773MB. Device memory is insufficient to use tactic. +[12/28/2023-19:11:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4240 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:11:09] [W] [TRT] Tactic Device request: 4240MB Available: 1771MB. Device memory is insufficient to use tactic. +[12/28/2023-19:11:09] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4240 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:11:14] [W] [TRT] Tactic Device request: 4241MB Available: 1774MB. Device memory is insufficient to use tactic. +[12/28/2023-19:11:14] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4241 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:11:15] [W] [TRT] Tactic Device request: 4241MB Available: 1772MB. Device memory is insufficient to use tactic. +[12/28/2023-19:11:15] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4241 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:11:16] [W] [TRT] Tactic Device request: 4241MB Available: 1770MB. Device memory is insufficient to use tactic. +[12/28/2023-19:11:16] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4241 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:11:21] [W] [TRT] Tactic Device request: 4239MB Available: 1769MB. Device memory is insufficient to use tactic. +[12/28/2023-19:11:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4239 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:11:22] [W] [TRT] Tactic Device request: 4239MB Available: 1768MB. Device memory is insufficient to use tactic. +[12/28/2023-19:11:22] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4239 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:07] [W] [TRT] Tactic Device request: 1638MB Available: 1546MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:07] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1638 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:07] [W] [TRT] Tactic Device request: 1638MB Available: 1546MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:07] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1638 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:08] [W] [TRT] Tactic Device request: 1637MB Available: 1557MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:08] [W] [TRT] Tactic Device request: 1637MB Available: 1557MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:08] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 1637 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:10] [W] [TRT] Tactic Device request: 3270MB Available: 1560MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:10] [W] [TRT] Tactic Device request: 3270MB Available: 1560MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:10] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:10] [W] [TRT] Tactic Device request: 3270MB Available: 1560MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:10] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3270 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:12] [W] [TRT] Tactic Device request: 3269MB Available: 1558MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:12] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3269 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:12] [W] [TRT] Tactic Device request: 3269MB Available: 1558MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:12] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3269 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:15] [W] [TRT] Tactic Device request: 4377MB Available: 1563MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:15] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4377 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:15] [W] [TRT] Tactic Device request: 2185MB Available: 1563MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:15] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2185 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:16] [W] [TRT] Tactic Device request: 4377MB Available: 1563MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:16] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4377 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:16] [W] [TRT] Tactic Device request: 2185MB Available: 1563MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:16] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 2185 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:16] [W] [TRT] Tactic Device request: 4377MB Available: 1561MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:16] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4377 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:16] [W] [TRT] Tactic Device request: 2185MB Available: 1561MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:16] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 2185 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:22] [W] [TRT] Tactic Device request: 4375MB Available: 1748MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:22] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4375 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:22] [W] [TRT] Tactic Device request: 2183MB Available: 1748MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:22] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2183 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:22] [W] [TRT] Tactic Device request: 4375MB Available: 1748MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:22] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4375 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:22] [W] [TRT] Tactic Device request: 2183MB Available: 1749MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:22] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2183 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:29] [W] [TRT] Tactic Device request: 4376MB Available: 1746MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4376 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:29] [W] [TRT] Tactic Device request: 2184MB Available: 1746MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:29] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2184 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:29] [W] [TRT] Tactic Device request: 4376MB Available: 1746MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:29] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4376 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:29] [W] [TRT] Tactic Device request: 2184MB Available: 1746MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:29] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 2184 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:30] [W] [TRT] Tactic Device request: 4376MB Available: 1746MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:30] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4376 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:30] [W] [TRT] Tactic Device request: 2184MB Available: 1746MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:30] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 2184 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:36] [W] [TRT] Tactic Device request: 4375MB Available: 1750MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:36] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4375 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:36] [W] [TRT] Tactic Device request: 2183MB Available: 1750MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:36] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2183 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:36] [W] [TRT] Tactic Device request: 4375MB Available: 1750MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:36] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4375 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:25:36] [W] [TRT] Tactic Device request: 2183MB Available: 1750MB. Device memory is insufficient to use tactic. +[12/28/2023-19:25:36] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2183 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-19:26:56] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-19:26:56] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-19:26:56] [W] * GPU compute time is unstable, with coefficient of variance = 6.77541%. +[12/28/2023-19:26:56] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_l_fp16.onnx.best.engine.log b/yolo_nas_pose_l_fp16.onnx.best.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..a8bbdf0a49c77689f97d77e121d023e690d260d9 --- /dev/null +++ b/yolo_nas_pose_l_fp16.onnx.best.engine.log @@ -0,0 +1,331 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp16.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp16.onnx.best.engine +[12/28/2023-17:15:16] [I] === Model Options === +[12/28/2023-17:15:16] [I] Format: ONNX +[12/28/2023-17:15:16] [I] Model: yolo_nas_pose_l_fp16.onnx +[12/28/2023-17:15:16] [I] Output: +[12/28/2023-17:15:16] [I] === Build Options === +[12/28/2023-17:15:16] [I] Max batch: explicit batch +[12/28/2023-17:15:16] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-17:15:16] [I] minTiming: 1 +[12/28/2023-17:15:16] [I] avgTiming: 8 +[12/28/2023-17:15:16] [I] Precision: FP32+FP16+INT8 +[12/28/2023-17:15:16] [I] LayerPrecisions: +[12/28/2023-17:15:16] [I] Calibration: Dynamic +[12/28/2023-17:15:16] [I] Refit: Disabled +[12/28/2023-17:15:16] [I] Sparsity: Disabled +[12/28/2023-17:15:16] [I] Safe mode: Disabled +[12/28/2023-17:15:16] [I] DirectIO mode: Disabled +[12/28/2023-17:15:16] [I] Restricted mode: Disabled +[12/28/2023-17:15:16] [I] Build only: Disabled +[12/28/2023-17:15:16] [I] Save engine: yolo_nas_pose_l_fp16.onnx.best.engine +[12/28/2023-17:15:16] [I] Load engine: +[12/28/2023-17:15:16] [I] Profiling verbosity: 0 +[12/28/2023-17:15:16] [I] Tactic sources: Using default tactic sources +[12/28/2023-17:15:16] [I] timingCacheMode: local +[12/28/2023-17:15:16] [I] timingCacheFile: +[12/28/2023-17:15:16] [I] Heuristic: Disabled +[12/28/2023-17:15:16] [I] Preview Features: Use default preview flags. +[12/28/2023-17:15:16] [I] Input(s)s format: fp32:CHW +[12/28/2023-17:15:16] [I] Output(s)s format: fp32:CHW +[12/28/2023-17:15:16] [I] Input build shapes: model +[12/28/2023-17:15:16] [I] Input calibration shapes: model +[12/28/2023-17:15:16] [I] === System Options === +[12/28/2023-17:15:16] [I] Device: 0 +[12/28/2023-17:15:16] [I] DLACore: +[12/28/2023-17:15:16] [I] Plugins: +[12/28/2023-17:15:16] [I] === Inference Options === +[12/28/2023-17:15:16] [I] Batch: Explicit +[12/28/2023-17:15:16] [I] Input inference shapes: model +[12/28/2023-17:15:16] [I] Iterations: 10 +[12/28/2023-17:15:16] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-17:15:16] [I] Sleep time: 0ms +[12/28/2023-17:15:16] [I] Idle time: 0ms +[12/28/2023-17:15:16] [I] Streams: 1 +[12/28/2023-17:15:16] [I] ExposeDMA: Disabled +[12/28/2023-17:15:16] [I] Data transfers: Enabled +[12/28/2023-17:15:16] [I] Spin-wait: Disabled +[12/28/2023-17:15:16] [I] Multithreading: Disabled +[12/28/2023-17:15:16] [I] CUDA Graph: Disabled +[12/28/2023-17:15:16] [I] Separate profiling: Disabled +[12/28/2023-17:15:16] [I] Time Deserialize: Disabled +[12/28/2023-17:15:16] [I] Time Refit: Disabled +[12/28/2023-17:15:16] [I] NVTX verbosity: 0 +[12/28/2023-17:15:16] [I] Persistent Cache Ratio: 0 +[12/28/2023-17:15:16] [I] Inputs: +[12/28/2023-17:15:16] [I] === Reporting Options === +[12/28/2023-17:15:16] [I] Verbose: Disabled +[12/28/2023-17:15:16] [I] Averages: 100 inferences +[12/28/2023-17:15:16] [I] Percentiles: 90,95,99 +[12/28/2023-17:15:16] [I] Dump refittable layers:Disabled +[12/28/2023-17:15:16] [I] Dump output: Disabled +[12/28/2023-17:15:16] [I] Profile: Disabled +[12/28/2023-17:15:16] [I] Export timing to JSON file: +[12/28/2023-17:15:16] [I] Export output to JSON file: +[12/28/2023-17:15:16] [I] Export profile to JSON file: +[12/28/2023-17:15:16] [I] +[12/28/2023-17:15:16] [I] === Device Information === +[12/28/2023-17:15:16] [I] Selected Device: Orin +[12/28/2023-17:15:16] [I] Compute Capability: 8.7 +[12/28/2023-17:15:16] [I] SMs: 8 +[12/28/2023-17:15:16] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-17:15:16] [I] Device Global Memory: 7471 MiB +[12/28/2023-17:15:16] [I] Shared Memory per SM: 164 KiB +[12/28/2023-17:15:16] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-17:15:16] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-17:15:16] [I] +[12/28/2023-17:15:16] [I] TensorRT version: 8.5.2 +[12/28/2023-17:15:21] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2974 (MiB) +[12/28/2023-17:15:25] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +286, now: CPU 574, GPU 3278 (MiB) +[12/28/2023-17:15:25] [I] Start parsing network model +[12/28/2023-17:15:27] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-17:15:27] [I] [TRT] Input filename: yolo_nas_pose_l_fp16.onnx +[12/28/2023-17:15:27] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-17:15:27] [I] [TRT] Opset version: 17 +[12/28/2023-17:15:27] [I] [TRT] Producer name: pytorch +[12/28/2023-17:15:27] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-17:15:27] [I] [TRT] Domain: +[12/28/2023-17:15:27] [I] [TRT] Model version: 0 +[12/28/2023-17:15:27] [I] [TRT] Doc string: +[12/28/2023-17:15:27] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-17:15:27] [I] Finish parsing network model +[12/28/2023-17:15:28] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-17:15:28] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 458) [Constant] +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 459) [Constant] +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 460) [Constant] +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.2.alpha + (Unnamed Layer* 187) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.3.alpha + (Unnamed Layer* 195) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 216) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 224) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 232) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.3.alpha + (Unnamed Layer* 240) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 271) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 294) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 302) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 310) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 341) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 364) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 372) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.3.alpha + (Unnamed Layer* 380) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1...cast_boxes_to_fp32]} +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 462) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-17:15:28] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-17:15:40] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +308, now: CPU 1231, GPU 3787 (MiB) +[12/28/2023-17:15:42] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +72, now: CPU 1314, GPU 3859 (MiB) +[12/28/2023-17:15:42] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-19:26:12] [I] [TRT] Total Activation Memory: 7966500352 +[12/28/2023-19:26:12] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-19:26:29] [I] [TRT] Total Host Persistent Memory: 330912 +[12/28/2023-19:26:29] [I] [TRT] Total Device Persistent Memory: 656384 +[12/28/2023-19:26:29] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-19:26:29] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 79 MiB, GPU 2398 MiB +[12/28/2023-19:26:29] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 176 steps to complete. +[12/28/2023-19:26:29] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 96.1396ms to assign 14 blocks to 176 nodes requiring 149010944 bytes. +[12/28/2023-19:26:29] [I] [TRT] Total Activation Memory: 149010944 +[12/28/2023-19:26:38] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -17, now: CPU 1718, GPU 5578 (MiB) +[12/28/2023-19:26:38] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +52, GPU +64, now: CPU 52, GPU 64 (MiB) +[12/28/2023-19:26:39] [I] Engine built in 7882.87 sec. +[12/28/2023-19:26:40] [I] [TRT] Loaded engine size: 54 MiB +[12/28/2023-19:26:41] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1299, GPU 5193 (MiB) +[12/28/2023-19:26:41] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +52, now: CPU 0, GPU 52 (MiB) +[12/28/2023-19:26:41] [I] Engine deserialized in 0.300217 sec. +[12/28/2023-19:26:41] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1300, GPU 5193 (MiB) +[12/28/2023-19:26:41] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +143, now: CPU 0, GPU 195 (MiB) +[12/28/2023-19:26:41] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-19:26:41] [I] Using random values for input onnx::Cast_0 +[12/28/2023-19:26:41] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-19:26:41] [I] Using random values for output graph2_flat_predictions +[12/28/2023-19:26:41] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-19:26:41] [I] Starting inference +[12/28/2023-19:26:56] [I] Warmup completed 6 queries over 200 ms +[12/28/2023-19:26:56] [I] Timing trace has 703 queries over 15.0461 s +[12/28/2023-19:26:56] [I] +[12/28/2023-19:26:56] [I] === Trace details === +[12/28/2023-19:26:56] [I] Trace averages of 100 runs: +[12/28/2023-19:26:56] [I] Average on 100 runs - GPU latency: 20.9395 ms - Host latency: 21.0535 ms (enqueue 21.0062 ms) +[12/28/2023-19:26:56] [I] Average on 100 runs - GPU latency: 21.1664 ms - Host latency: 21.2792 ms (enqueue 21.2275 ms) +[12/28/2023-19:26:56] [I] Average on 100 runs - GPU latency: 21.3262 ms - Host latency: 21.4444 ms (enqueue 21.3939 ms) +[12/28/2023-19:26:56] [I] Average on 100 runs - GPU latency: 21.3667 ms - Host latency: 21.4831 ms (enqueue 21.4306 ms) +[12/28/2023-19:26:56] [I] Average on 100 runs - GPU latency: 21.2193 ms - Host latency: 21.3339 ms (enqueue 21.2934 ms) +[12/28/2023-19:26:56] [I] Average on 100 runs - GPU latency: 21.8831 ms - Host latency: 22.0036 ms (enqueue 21.9394 ms) +[12/28/2023-19:26:56] [I] Average on 100 runs - GPU latency: 20.8839 ms - Host latency: 20.9973 ms (enqueue 20.9568 ms) +[12/28/2023-19:26:56] [I] +[12/28/2023-19:26:56] [I] === Performance summary === +[12/28/2023-19:26:56] [I] Throughput: 46.7231 qps +[12/28/2023-19:26:56] [I] Latency: min = 19.8525 ms, max = 30.6865 ms, mean = 21.368 ms, median = 21.1562 ms, percentile(90%) = 22.2715 ms, percentile(95%) = 23.1201 ms, percentile(99%) = 28.8721 ms +[12/28/2023-19:26:56] [I] Enqueue Time: min = 19.8208 ms, max = 30.626 ms, mean = 21.3184 ms, median = 21.1201 ms, percentile(90%) = 22.2363 ms, percentile(95%) = 23.2373 ms, percentile(99%) = 28.8086 ms +[12/28/2023-19:26:56] [I] H2D Latency: min = 0.0810547 ms, max = 0.134766 ms, mean = 0.0972954 ms, median = 0.0981445 ms, percentile(90%) = 0.100098 ms, percentile(95%) = 0.100586 ms, percentile(99%) = 0.111328 ms +[12/28/2023-19:26:56] [I] GPU Compute Time: min = 19.7407 ms, max = 30.5723 ms, mean = 21.2522 ms, median = 21.042 ms, percentile(90%) = 22.1533 ms, percentile(95%) = 23.0203 ms, percentile(99%) = 28.7578 ms +[12/28/2023-19:26:56] [I] D2H Latency: min = 0.00390625 ms, max = 0.0776367 ms, mean = 0.0184049 ms, median = 0.0155029 ms, percentile(90%) = 0.0283203 ms, percentile(95%) = 0.0302734 ms, percentile(99%) = 0.046875 ms +[12/28/2023-19:26:56] [I] Total Host Walltime: 15.0461 s +[12/28/2023-19:26:56] [I] Total GPU Compute Time: 14.9403 s +[12/28/2023-19:26:56] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-19:26:56] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp16.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp16.onnx.best.engine diff --git a/yolo_nas_pose_l_fp16.onnx.engine.err b/yolo_nas_pose_l_fp16.onnx.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..19a8024bba06bc35c5f517ede9634c66d0ceecc3 --- /dev/null +++ b/yolo_nas_pose_l_fp16.onnx.engine.err @@ -0,0 +1,8 @@ +[12/28/2023-16:09:44] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-16:09:44] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-16:09:45] [E] Error[4]: [network.cpp::validate::2925] Error Code 4: Internal Error (fp16 precision has been set for a layer or layer output, but fp16 is not configured in the builder) +[12/28/2023-16:09:45] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/28/2023-16:09:45] [E] Engine could not be created from network +[12/28/2023-16:09:45] [E] Building engine failed +[12/28/2023-16:09:45] [E] Failed to create engine from model or file. +[12/28/2023-16:09:45] [E] Engine set up failed diff --git a/yolo_nas_pose_l_fp16.onnx.engine.log b/yolo_nas_pose_l_fp16.onnx.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..556e22ba2899951d50996f7e85b8d21df1488c24 --- /dev/null +++ b/yolo_nas_pose_l_fp16.onnx.engine.log @@ -0,0 +1,91 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp16.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp16.onnx.engine +[12/28/2023-16:09:34] [I] === Model Options === +[12/28/2023-16:09:34] [I] Format: ONNX +[12/28/2023-16:09:34] [I] Model: yolo_nas_pose_l_fp16.onnx +[12/28/2023-16:09:34] [I] Output: +[12/28/2023-16:09:34] [I] === Build Options === +[12/28/2023-16:09:34] [I] Max batch: explicit batch +[12/28/2023-16:09:34] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-16:09:34] [I] minTiming: 1 +[12/28/2023-16:09:34] [I] avgTiming: 8 +[12/28/2023-16:09:34] [I] Precision: FP32 +[12/28/2023-16:09:34] [I] LayerPrecisions: +[12/28/2023-16:09:34] [I] Calibration: +[12/28/2023-16:09:34] [I] Refit: Disabled +[12/28/2023-16:09:34] [I] Sparsity: Disabled +[12/28/2023-16:09:34] [I] Safe mode: Disabled +[12/28/2023-16:09:34] [I] DirectIO mode: Disabled +[12/28/2023-16:09:34] [I] Restricted mode: Disabled +[12/28/2023-16:09:34] [I] Build only: Disabled +[12/28/2023-16:09:34] [I] Save engine: yolo_nas_pose_l_fp16.onnx.engine +[12/28/2023-16:09:34] [I] Load engine: +[12/28/2023-16:09:34] [I] Profiling verbosity: 0 +[12/28/2023-16:09:34] [I] Tactic sources: Using default tactic sources +[12/28/2023-16:09:34] [I] timingCacheMode: local +[12/28/2023-16:09:34] [I] timingCacheFile: +[12/28/2023-16:09:34] [I] Heuristic: Disabled +[12/28/2023-16:09:34] [I] Preview Features: Use default preview flags. +[12/28/2023-16:09:34] [I] Input(s)s format: fp32:CHW +[12/28/2023-16:09:34] [I] Output(s)s format: fp32:CHW +[12/28/2023-16:09:34] [I] Input build shapes: model +[12/28/2023-16:09:34] [I] Input calibration shapes: model +[12/28/2023-16:09:34] [I] === System Options === +[12/28/2023-16:09:34] [I] Device: 0 +[12/28/2023-16:09:34] [I] DLACore: +[12/28/2023-16:09:34] [I] Plugins: +[12/28/2023-16:09:34] [I] === Inference Options === +[12/28/2023-16:09:34] [I] Batch: Explicit +[12/28/2023-16:09:34] [I] Input inference shapes: model +[12/28/2023-16:09:34] [I] Iterations: 10 +[12/28/2023-16:09:34] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-16:09:34] [I] Sleep time: 0ms +[12/28/2023-16:09:34] [I] Idle time: 0ms +[12/28/2023-16:09:34] [I] Streams: 1 +[12/28/2023-16:09:34] [I] ExposeDMA: Disabled +[12/28/2023-16:09:34] [I] Data transfers: Enabled +[12/28/2023-16:09:34] [I] Spin-wait: Disabled +[12/28/2023-16:09:34] [I] Multithreading: Disabled +[12/28/2023-16:09:34] [I] CUDA Graph: Disabled +[12/28/2023-16:09:34] [I] Separate profiling: Disabled +[12/28/2023-16:09:34] [I] Time Deserialize: Disabled +[12/28/2023-16:09:34] [I] Time Refit: Disabled +[12/28/2023-16:09:34] [I] NVTX verbosity: 0 +[12/28/2023-16:09:34] [I] Persistent Cache Ratio: 0 +[12/28/2023-16:09:34] [I] Inputs: +[12/28/2023-16:09:34] [I] === Reporting Options === +[12/28/2023-16:09:34] [I] Verbose: Disabled +[12/28/2023-16:09:34] [I] Averages: 100 inferences +[12/28/2023-16:09:34] [I] Percentiles: 90,95,99 +[12/28/2023-16:09:34] [I] Dump refittable layers:Disabled +[12/28/2023-16:09:34] [I] Dump output: Disabled +[12/28/2023-16:09:34] [I] Profile: Disabled +[12/28/2023-16:09:34] [I] Export timing to JSON file: +[12/28/2023-16:09:34] [I] Export output to JSON file: +[12/28/2023-16:09:34] [I] Export profile to JSON file: +[12/28/2023-16:09:34] [I] +[12/28/2023-16:09:34] [I] === Device Information === +[12/28/2023-16:09:34] [I] Selected Device: Orin +[12/28/2023-16:09:34] [I] Compute Capability: 8.7 +[12/28/2023-16:09:34] [I] SMs: 8 +[12/28/2023-16:09:34] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-16:09:34] [I] Device Global Memory: 7471 MiB +[12/28/2023-16:09:34] [I] Shared Memory per SM: 164 KiB +[12/28/2023-16:09:34] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-16:09:34] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-16:09:34] [I] +[12/28/2023-16:09:34] [I] TensorRT version: 8.5.2 +[12/28/2023-16:09:39] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2974 (MiB) +[12/28/2023-16:09:43] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +283, now: CPU 574, GPU 3281 (MiB) +[12/28/2023-16:09:43] [I] Start parsing network model +[12/28/2023-16:09:44] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-16:09:44] [I] [TRT] Input filename: yolo_nas_pose_l_fp16.onnx +[12/28/2023-16:09:44] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-16:09:44] [I] [TRT] Opset version: 17 +[12/28/2023-16:09:44] [I] [TRT] Producer name: pytorch +[12/28/2023-16:09:44] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-16:09:44] [I] [TRT] Domain: +[12/28/2023-16:09:44] [I] [TRT] Model version: 0 +[12/28/2023-16:09:44] [I] [TRT] Doc string: +[12/28/2023-16:09:44] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-16:09:45] [I] Finish parsing network model +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp16.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp16.onnx.engine diff --git a/yolo_nas_pose_l_fp16.onnx.fp16.engine b/yolo_nas_pose_l_fp16.onnx.fp16.engine new file mode 100644 index 0000000000000000000000000000000000000000..129d6839ee6b8f573fff8a5f120196b5a26962ab --- /dev/null +++ b/yolo_nas_pose_l_fp16.onnx.fp16.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:467ff31eefb68768b955df2011c89f375ab57c09a32840d671def8b3ad75de34 +size 110752021 diff --git a/yolo_nas_pose_l_fp16.onnx.fp16.engine.err b/yolo_nas_pose_l_fp16.onnx.fp16.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..130cad38db010e64ad51116bbcad6ccd21cc100a --- /dev/null +++ b/yolo_nas_pose_l_fp16.onnx.fp16.engine.err @@ -0,0 +1,423 @@ +[12/28/2023-16:09:49] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-16:09:49] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-16:15:22] [W] [TRT] Tactic Device request: 4720MB Available: 3156MB. Device memory is insufficient to use tactic. +[12/28/2023-16:15:22] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4720 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:15:23] [W] [TRT] Tactic Device request: 4720MB Available: 3155MB. Device memory is insufficient to use tactic. +[12/28/2023-16:15:23] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4720 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:15:23] [W] [TRT] Tactic Device request: 4720MB Available: 3155MB. Device memory is insufficient to use tactic. +[12/28/2023-16:15:23] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4720 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:15:29] [W] [TRT] Tactic Device request: 4706MB Available: 3154MB. Device memory is insufficient to use tactic. +[12/28/2023-16:15:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4706 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:15:29] [W] [TRT] Tactic Device request: 4706MB Available: 3155MB. Device memory is insufficient to use tactic. +[12/28/2023-16:15:29] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4706 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:15:32] [W] [TRT] Tactic Device request: 4711MB Available: 3148MB. Device memory is insufficient to use tactic. +[12/28/2023-16:15:32] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:15:32] [W] [TRT] Tactic Device request: 4711MB Available: 3147MB. Device memory is insufficient to use tactic. +[12/28/2023-16:15:32] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:15:33] [W] [TRT] Tactic Device request: 4711MB Available: 3146MB. Device memory is insufficient to use tactic. +[12/28/2023-16:15:33] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:15:38] [W] [TRT] Tactic Device request: 4702MB Available: 3147MB. Device memory is insufficient to use tactic. +[12/28/2023-16:15:38] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4702 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:15:38] [W] [TRT] Tactic Device request: 4702MB Available: 3147MB. Device memory is insufficient to use tactic. +[12/28/2023-16:15:39] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4702 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:20:16] [W] [TRT] Tactic Device request: 4711MB Available: 3039MB. Device memory is insufficient to use tactic. +[12/28/2023-16:20:16] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:20:16] [W] [TRT] Tactic Device request: 4711MB Available: 3039MB. Device memory is insufficient to use tactic. +[12/28/2023-16:20:16] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:20:16] [W] [TRT] Tactic Device request: 4711MB Available: 3038MB. Device memory is insufficient to use tactic. +[12/28/2023-16:20:16] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:20:18] [W] [TRT] Tactic Device request: 4701MB Available: 3039MB. Device memory is insufficient to use tactic. +[12/28/2023-16:20:18] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4701 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:20:18] [W] [TRT] Tactic Device request: 4701MB Available: 3039MB. Device memory is insufficient to use tactic. +[12/28/2023-16:20:18] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4701 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:20:33] [W] [TRT] Tactic Device request: 6275MB Available: 3038MB. Device memory is insufficient to use tactic. +[12/28/2023-16:20:33] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:20:33] [W] [TRT] Tactic Device request: 6275MB Available: 3039MB. Device memory is insufficient to use tactic. +[12/28/2023-16:20:33] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6275 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:20:33] [W] [TRT] Tactic Device request: 6275MB Available: 3038MB. Device memory is insufficient to use tactic. +[12/28/2023-16:20:33] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:20:35] [W] [TRT] Tactic Device request: 6270MB Available: 3039MB. Device memory is insufficient to use tactic. +[12/28/2023-16:20:35] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:20:35] [W] [TRT] Tactic Device request: 6270MB Available: 3039MB. Device memory is insufficient to use tactic. +[12/28/2023-16:20:35] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:27:09] [W] [TRT] Tactic Device request: 7056MB Available: 2339MB. Device memory is insufficient to use tactic. +[12/28/2023-16:27:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:27:09] [W] [TRT] Tactic Device request: 7056MB Available: 2340MB. Device memory is insufficient to use tactic. +[12/28/2023-16:27:09] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:27:09] [W] [TRT] Tactic Device request: 7056MB Available: 2340MB. Device memory is insufficient to use tactic. +[12/28/2023-16:27:09] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:27:11] [W] [TRT] Tactic Device request: 7050MB Available: 2338MB. Device memory is insufficient to use tactic. +[12/28/2023-16:27:11] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7050 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:27:11] [W] [TRT] Tactic Device request: 7050MB Available: 2338MB. Device memory is insufficient to use tactic. +[12/28/2023-16:27:11] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 7050 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:27:27] [W] [TRT] Tactic Device request: 6354MB Available: 2340MB. Device memory is insufficient to use tactic. +[12/28/2023-16:27:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:27:27] [W] [TRT] Tactic Device request: 6354MB Available: 2338MB. Device memory is insufficient to use tactic. +[12/28/2023-16:27:28] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6354 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:27:28] [W] [TRT] Tactic Device request: 6354MB Available: 2339MB. Device memory is insufficient to use tactic. +[12/28/2023-16:27:28] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:27:29] [W] [TRT] Tactic Device request: 6351MB Available: 2341MB. Device memory is insufficient to use tactic. +[12/28/2023-16:27:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6351 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:27:29] [W] [TRT] Tactic Device request: 6351MB Available: 2341MB. Device memory is insufficient to use tactic. +[12/28/2023-16:27:29] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6351 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:16] [W] [TRT] Tactic Device request: 2394MB Available: 2304MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:16] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:16] [W] [TRT] Tactic Device request: 2394MB Available: 2304MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:16] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:16] [W] [TRT] Tactic Device request: 2394MB Available: 2304MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:16] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:17] [W] [TRT] Tactic Device request: 2392MB Available: 2304MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:17] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:17] [W] [TRT] Tactic Device request: 2392MB Available: 2304MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:17] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:17] [W] [TRT] Tactic Device request: 2392MB Available: 2304MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:17] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:18] [W] [TRT] Tactic Device request: 2391MB Available: 2305MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:18] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2391 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:18] [W] [TRT] Tactic Device request: 2391MB Available: 2305MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:18] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2391 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:18] [W] [TRT] Tactic Device request: 2390MB Available: 2305MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:18] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:18] [W] [TRT] Tactic Device request: 2390MB Available: 2305MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:18] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:32] [W] [TRT] Tactic Device request: 6540MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:32] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6540 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:32] [W] [TRT] Tactic Device request: 6540MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:32] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6540 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:33] [W] [TRT] Tactic Device request: 6540MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:33] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6540 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:34] [W] [TRT] Tactic Device request: 6538MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:34] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6538 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:35] [W] [TRT] Tactic Device request: 6538MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:35] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6538 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:36] [W] [TRT] Tactic Device request: 2191MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:36] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2191 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:37] [W] [TRT] Tactic Device request: 2191MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:37] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2191 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:38] [W] [TRT] Tactic Device request: 2191MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:38] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2191 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:42] [W] [TRT] Tactic Device request: 2190MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:42] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:43] [W] [TRT] Tactic Device request: 2190MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:43] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:45] [W] [TRT] Tactic Device request: 2190MB Available: 2130MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:45] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:46] [W] [TRT] Tactic Device request: 2190MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:46] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:47] [W] [TRT] Tactic Device request: 2190MB Available: 2127MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:47] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:51] [W] [TRT] Tactic Device request: 2190MB Available: 2127MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:51] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:36:52] [W] [TRT] Tactic Device request: 2190MB Available: 2126MB. Device memory is insufficient to use tactic. +[12/28/2023-16:36:52] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:41:41] [W] [TRT] Tactic Device request: 2457MB Available: 2417MB. Device memory is insufficient to use tactic. +[12/28/2023-16:41:41] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:41:42] [W] [TRT] Tactic Device request: 2457MB Available: 2417MB. Device memory is insufficient to use tactic. +[12/28/2023-16:41:42] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:41:42] [W] [TRT] Tactic Device request: 2457MB Available: 2417MB. Device memory is insufficient to use tactic. +[12/28/2023-16:41:42] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:41:43] [W] [TRT] Tactic Device request: 2456MB Available: 2418MB. Device memory is insufficient to use tactic. +[12/28/2023-16:41:43] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:41:43] [W] [TRT] Tactic Device request: 2456MB Available: 2418MB. Device memory is insufficient to use tactic. +[12/28/2023-16:41:43] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:42:04] [W] [TRT] Tactic Device request: 3587MB Available: 2216MB. Device memory is insufficient to use tactic. +[12/28/2023-16:42:04] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:42:04] [W] [TRT] Tactic Device request: 3587MB Available: 2216MB. Device memory is insufficient to use tactic. +[12/28/2023-16:42:04] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:42:04] [W] [TRT] Tactic Device request: 3587MB Available: 2217MB. Device memory is insufficient to use tactic. +[12/28/2023-16:42:04] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:42:06] [W] [TRT] Tactic Device request: 3585MB Available: 2233MB. Device memory is insufficient to use tactic. +[12/28/2023-16:42:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3585 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:42:06] [W] [TRT] Tactic Device request: 3585MB Available: 2233MB. Device memory is insufficient to use tactic. +[12/28/2023-16:42:06] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3585 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:49:44] [W] [TRT] Tactic Device request: 3556MB Available: 1928MB. Device memory is insufficient to use tactic. +[12/28/2023-16:49:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:49:44] [W] [TRT] Tactic Device request: 3556MB Available: 1928MB. Device memory is insufficient to use tactic. +[12/28/2023-16:49:44] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:49:44] [W] [TRT] Tactic Device request: 3556MB Available: 1928MB. Device memory is insufficient to use tactic. +[12/28/2023-16:49:44] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:49:45] [W] [TRT] Tactic Device request: 3551MB Available: 1929MB. Device memory is insufficient to use tactic. +[12/28/2023-16:49:45] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3551 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:49:45] [W] [TRT] Tactic Device request: 3551MB Available: 1929MB. Device memory is insufficient to use tactic. +[12/28/2023-16:49:45] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3551 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:49:46] [W] [TRT] Tactic Device request: 3140MB Available: 1930MB. Device memory is insufficient to use tactic. +[12/28/2023-16:49:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:49:46] [W] [TRT] Tactic Device request: 3140MB Available: 1929MB. Device memory is insufficient to use tactic. +[12/28/2023-16:49:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:49:46] [W] [TRT] Tactic Device request: 3140MB Available: 1929MB. Device memory is insufficient to use tactic. +[12/28/2023-16:49:46] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:49:47] [W] [TRT] Tactic Device request: 3136MB Available: 1929MB. Device memory is insufficient to use tactic. +[12/28/2023-16:49:48] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3136 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:49:48] [W] [TRT] Tactic Device request: 3136MB Available: 1929MB. Device memory is insufficient to use tactic. +[12/28/2023-16:49:48] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3136 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:56:57] [W] [TRT] Tactic Device request: 3161MB Available: 2194MB. Device memory is insufficient to use tactic. +[12/28/2023-16:56:57] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3161 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:56:57] [W] [TRT] Tactic Device request: 3161MB Available: 2194MB. Device memory is insufficient to use tactic. +[12/28/2023-16:56:57] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3161 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:56:57] [W] [TRT] Tactic Device request: 3161MB Available: 2194MB. Device memory is insufficient to use tactic. +[12/28/2023-16:56:57] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3161 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:56:58] [W] [TRT] Tactic Device request: 3156MB Available: 2195MB. Device memory is insufficient to use tactic. +[12/28/2023-16:56:58] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3156 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:56:58] [W] [TRT] Tactic Device request: 3156MB Available: 2195MB. Device memory is insufficient to use tactic. +[12/28/2023-16:56:58] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3156 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:57:03] [W] [TRT] Tactic Device request: 4189MB Available: 2194MB. Device memory is insufficient to use tactic. +[12/28/2023-16:57:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4189 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:57:03] [W] [TRT] Tactic Device request: 4189MB Available: 2194MB. Device memory is insufficient to use tactic. +[12/28/2023-16:57:03] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4189 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:57:04] [W] [TRT] Tactic Device request: 4189MB Available: 2193MB. Device memory is insufficient to use tactic. +[12/28/2023-16:57:04] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4189 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:57:08] [W] [TRT] Tactic Device request: 4183MB Available: 2193MB. Device memory is insufficient to use tactic. +[12/28/2023-16:57:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4183 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:57:08] [W] [TRT] Tactic Device request: 4183MB Available: 2193MB. Device memory is insufficient to use tactic. +[12/28/2023-16:57:08] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4183 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:57:10] [W] [TRT] Tactic Device request: 4186MB Available: 2194MB. Device memory is insufficient to use tactic. +[12/28/2023-16:57:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4186 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:57:11] [W] [TRT] Tactic Device request: 4186MB Available: 2194MB. Device memory is insufficient to use tactic. +[12/28/2023-16:57:11] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4186 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:57:11] [W] [TRT] Tactic Device request: 4186MB Available: 2194MB. Device memory is insufficient to use tactic. +[12/28/2023-16:57:11] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4186 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:57:15] [W] [TRT] Tactic Device request: 4182MB Available: 2193MB. Device memory is insufficient to use tactic. +[12/28/2023-16:57:15] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4182 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:57:16] [W] [TRT] Tactic Device request: 4182MB Available: 2194MB. Device memory is insufficient to use tactic. +[12/28/2023-16:57:16] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4182 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:02] [W] [TRT] Tactic Device request: 4764MB Available: 2303MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:02] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4764 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:02] [W] [TRT] Tactic Device request: 4764MB Available: 2303MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:02] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4764 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:02] [W] [TRT] Tactic Device request: 4764MB Available: 2303MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:02] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4764 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:04] [W] [TRT] Tactic Device request: 4761MB Available: 2316MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:04] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4761 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:04] [W] [TRT] Tactic Device request: 4761MB Available: 2317MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:04] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4761 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:10] [W] [TRT] Tactic Device request: 4244MB Available: 2261MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4244 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:11] [W] [TRT] Tactic Device request: 4244MB Available: 1947MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:11] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4244 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:12] [W] [TRT] Tactic Device request: 4244MB Available: 1942MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:12] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4244 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:17] [W] [TRT] Tactic Device request: 4240MB Available: 1944MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:17] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4240 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:17] [W] [TRT] Tactic Device request: 4240MB Available: 1931MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:17] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4240 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:20] [W] [TRT] Tactic Device request: 4241MB Available: 1926MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4241 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:21] [W] [TRT] Tactic Device request: 4241MB Available: 1894MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:21] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4241 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:22] [W] [TRT] Tactic Device request: 4241MB Available: 1891MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:22] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4241 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:27] [W] [TRT] Tactic Device request: 4239MB Available: 1889MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4239 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:05:27] [W] [TRT] Tactic Device request: 4239MB Available: 1877MB. Device memory is insufficient to use tactic. +[12/28/2023-17:05:27] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4239 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:06:08] [W] [TRT] Tactic Device request: 1637MB Available: 1622MB. Device memory is insufficient to use tactic. +[12/28/2023-17:06:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:06:08] [W] [TRT] Tactic Device request: 1637MB Available: 1622MB. Device memory is insufficient to use tactic. +[12/28/2023-17:06:08] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1637 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:06:08] [W] [TRT] Tactic Device request: 1637MB Available: 1622MB. Device memory is insufficient to use tactic. +[12/28/2023-17:06:08] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:06:09] [W] [TRT] Tactic Device request: 1636MB Available: 1622MB. Device memory is insufficient to use tactic. +[12/28/2023-17:06:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1636 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:06:09] [W] [TRT] Tactic Device request: 1636MB Available: 1622MB. Device memory is insufficient to use tactic. +[12/28/2023-17:06:09] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 1636 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:44] [W] [TRT] Tactic Device request: 1638MB Available: 1488MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:44] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1638 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:44] [W] [TRT] Tactic Device request: 1638MB Available: 1488MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:44] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1638 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:44] [W] [TRT] Tactic Device request: 1637MB Available: 1489MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:45] [W] [TRT] Tactic Device request: 1637MB Available: 1489MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:45] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 1637 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:46] [W] [TRT] Tactic Device request: 3270MB Available: 1488MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:46] [W] [TRT] Tactic Device request: 3270MB Available: 1488MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:46] [W] [TRT] Tactic Device request: 3270MB Available: 1488MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:46] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3270 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:47] [W] [TRT] Tactic Device request: 3269MB Available: 1519MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3269 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:47] [W] [TRT] Tactic Device request: 3269MB Available: 1518MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:47] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3269 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:50] [W] [TRT] Tactic Device request: 4377MB Available: 1700MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:50] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4377 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:50] [W] [TRT] Tactic Device request: 2185MB Available: 1700MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:50] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2185 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:50] [W] [TRT] Tactic Device request: 4377MB Available: 1700MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:50] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4377 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:50] [W] [TRT] Tactic Device request: 2185MB Available: 1699MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:50] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 2185 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:51] [W] [TRT] Tactic Device request: 4377MB Available: 1699MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:51] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4377 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:51] [W] [TRT] Tactic Device request: 2185MB Available: 1699MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:51] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 2185 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:56] [W] [TRT] Tactic Device request: 4375MB Available: 1824MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4375 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:56] [W] [TRT] Tactic Device request: 2183MB Available: 1824MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:57] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2183 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:57] [W] [TRT] Tactic Device request: 4375MB Available: 1824MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:57] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4375 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:13:57] [W] [TRT] Tactic Device request: 2183MB Available: 1824MB. Device memory is insufficient to use tactic. +[12/28/2023-17:13:57] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2183 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:14:01] [W] [TRT] Tactic Device request: 4376MB Available: 1823MB. Device memory is insufficient to use tactic. +[12/28/2023-17:14:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4376 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:14:01] [W] [TRT] Tactic Device request: 2184MB Available: 1823MB. Device memory is insufficient to use tactic. +[12/28/2023-17:14:01] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2184 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:14:01] [W] [TRT] Tactic Device request: 4376MB Available: 1823MB. Device memory is insufficient to use tactic. +[12/28/2023-17:14:01] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4376 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:14:01] [W] [TRT] Tactic Device request: 2184MB Available: 1823MB. Device memory is insufficient to use tactic. +[12/28/2023-17:14:01] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 2184 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:14:01] [W] [TRT] Tactic Device request: 4376MB Available: 1822MB. Device memory is insufficient to use tactic. +[12/28/2023-17:14:01] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4376 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:14:01] [W] [TRT] Tactic Device request: 2184MB Available: 1823MB. Device memory is insufficient to use tactic. +[12/28/2023-17:14:01] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 2184 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:14:07] [W] [TRT] Tactic Device request: 4375MB Available: 1822MB. Device memory is insufficient to use tactic. +[12/28/2023-17:14:07] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4375 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:14:07] [W] [TRT] Tactic Device request: 2183MB Available: 1822MB. Device memory is insufficient to use tactic. +[12/28/2023-17:14:07] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2183 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:14:07] [W] [TRT] Tactic Device request: 4375MB Available: 1822MB. Device memory is insufficient to use tactic. +[12/28/2023-17:14:07] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4375 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:14:07] [W] [TRT] Tactic Device request: 2183MB Available: 1821MB. Device memory is insufficient to use tactic. +[12/28/2023-17:14:07] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2183 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-17:15:12] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-17:15:12] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-17:15:12] [W] * GPU compute time is unstable, with coefficient of variance = 5.39183%. +[12/28/2023-17:15:12] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_l_fp16.onnx.fp16.engine.log b/yolo_nas_pose_l_fp16.onnx.fp16.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..f84612c2f617ac6b49efd3fb5e0ce2efa70d1eec --- /dev/null +++ b/yolo_nas_pose_l_fp16.onnx.fp16.engine.log @@ -0,0 +1,328 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp16.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp16.onnx.fp16.engine +[12/28/2023-16:09:46] [I] === Model Options === +[12/28/2023-16:09:46] [I] Format: ONNX +[12/28/2023-16:09:46] [I] Model: yolo_nas_pose_l_fp16.onnx +[12/28/2023-16:09:46] [I] Output: +[12/28/2023-16:09:46] [I] === Build Options === +[12/28/2023-16:09:46] [I] Max batch: explicit batch +[12/28/2023-16:09:46] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-16:09:46] [I] minTiming: 1 +[12/28/2023-16:09:46] [I] avgTiming: 8 +[12/28/2023-16:09:46] [I] Precision: FP32+FP16 +[12/28/2023-16:09:46] [I] LayerPrecisions: +[12/28/2023-16:09:46] [I] Calibration: +[12/28/2023-16:09:46] [I] Refit: Disabled +[12/28/2023-16:09:46] [I] Sparsity: Disabled +[12/28/2023-16:09:46] [I] Safe mode: Disabled +[12/28/2023-16:09:46] [I] DirectIO mode: Disabled +[12/28/2023-16:09:46] [I] Restricted mode: Disabled +[12/28/2023-16:09:46] [I] Build only: Disabled +[12/28/2023-16:09:46] [I] Save engine: yolo_nas_pose_l_fp16.onnx.fp16.engine +[12/28/2023-16:09:46] [I] Load engine: +[12/28/2023-16:09:46] [I] Profiling verbosity: 0 +[12/28/2023-16:09:46] [I] Tactic sources: Using default tactic sources +[12/28/2023-16:09:46] [I] timingCacheMode: local +[12/28/2023-16:09:46] [I] timingCacheFile: +[12/28/2023-16:09:46] [I] Heuristic: Disabled +[12/28/2023-16:09:46] [I] Preview Features: Use default preview flags. +[12/28/2023-16:09:46] [I] Input(s)s format: fp32:CHW +[12/28/2023-16:09:46] [I] Output(s)s format: fp32:CHW +[12/28/2023-16:09:46] [I] Input build shapes: model +[12/28/2023-16:09:46] [I] Input calibration shapes: model +[12/28/2023-16:09:46] [I] === System Options === +[12/28/2023-16:09:46] [I] Device: 0 +[12/28/2023-16:09:46] [I] DLACore: +[12/28/2023-16:09:46] [I] Plugins: +[12/28/2023-16:09:46] [I] === Inference Options === +[12/28/2023-16:09:46] [I] Batch: Explicit +[12/28/2023-16:09:46] [I] Input inference shapes: model +[12/28/2023-16:09:46] [I] Iterations: 10 +[12/28/2023-16:09:46] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-16:09:46] [I] Sleep time: 0ms +[12/28/2023-16:09:46] [I] Idle time: 0ms +[12/28/2023-16:09:46] [I] Streams: 1 +[12/28/2023-16:09:46] [I] ExposeDMA: Disabled +[12/28/2023-16:09:46] [I] Data transfers: Enabled +[12/28/2023-16:09:46] [I] Spin-wait: Disabled +[12/28/2023-16:09:46] [I] Multithreading: Disabled +[12/28/2023-16:09:46] [I] CUDA Graph: Disabled +[12/28/2023-16:09:46] [I] Separate profiling: Disabled +[12/28/2023-16:09:46] [I] Time Deserialize: Disabled +[12/28/2023-16:09:46] [I] Time Refit: Disabled +[12/28/2023-16:09:46] [I] NVTX verbosity: 0 +[12/28/2023-16:09:46] [I] Persistent Cache Ratio: 0 +[12/28/2023-16:09:46] [I] Inputs: +[12/28/2023-16:09:46] [I] === Reporting Options === +[12/28/2023-16:09:46] [I] Verbose: Disabled +[12/28/2023-16:09:46] [I] Averages: 100 inferences +[12/28/2023-16:09:46] [I] Percentiles: 90,95,99 +[12/28/2023-16:09:46] [I] Dump refittable layers:Disabled +[12/28/2023-16:09:46] [I] Dump output: Disabled +[12/28/2023-16:09:46] [I] Profile: Disabled +[12/28/2023-16:09:46] [I] Export timing to JSON file: +[12/28/2023-16:09:46] [I] Export output to JSON file: +[12/28/2023-16:09:46] [I] Export profile to JSON file: +[12/28/2023-16:09:46] [I] +[12/28/2023-16:09:46] [I] === Device Information === +[12/28/2023-16:09:46] [I] Selected Device: Orin +[12/28/2023-16:09:46] [I] Compute Capability: 8.7 +[12/28/2023-16:09:46] [I] SMs: 8 +[12/28/2023-16:09:46] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-16:09:46] [I] Device Global Memory: 7471 MiB +[12/28/2023-16:09:46] [I] Shared Memory per SM: 164 KiB +[12/28/2023-16:09:46] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-16:09:46] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-16:09:46] [I] +[12/28/2023-16:09:46] [I] TensorRT version: 8.5.2 +[12/28/2023-16:09:46] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2974 (MiB) +[12/28/2023-16:09:49] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +283, now: CPU 574, GPU 3280 (MiB) +[12/28/2023-16:09:49] [I] Start parsing network model +[12/28/2023-16:09:49] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-16:09:49] [I] [TRT] Input filename: yolo_nas_pose_l_fp16.onnx +[12/28/2023-16:09:49] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-16:09:49] [I] [TRT] Opset version: 17 +[12/28/2023-16:09:49] [I] [TRT] Producer name: pytorch +[12/28/2023-16:09:49] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-16:09:49] [I] [TRT] Domain: +[12/28/2023-16:09:49] [I] [TRT] Model version: 0 +[12/28/2023-16:09:49] [I] [TRT] Doc string: +[12/28/2023-16:09:49] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-16:09:49] [I] Finish parsing network model +[12/28/2023-16:09:50] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-16:09:50] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 458) [Constant] +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 459) [Constant] +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 460) [Constant] +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.2.alpha + (Unnamed Layer* 187) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.3.alpha + (Unnamed Layer* 195) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 216) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 224) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 232) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.3.alpha + (Unnamed Layer* 240) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 271) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 294) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 302) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 310) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 341) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 364) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 372) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.3.alpha + (Unnamed Layer* 380) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1...cast_boxes_to_fp32]} +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 462) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-16:09:50] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-16:10:01] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +353, now: CPU 1231, GPU 3827 (MiB) +[12/28/2023-16:10:03] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +65, now: CPU 1313, GPU 3892 (MiB) +[12/28/2023-16:10:03] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-17:14:35] [I] [TRT] Total Activation Memory: 8058785792 +[12/28/2023-17:14:35] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-17:14:49] [I] [TRT] Total Host Persistent Memory: 387328 +[12/28/2023-17:14:49] [I] [TRT] Total Device Persistent Memory: 51712 +[12/28/2023-17:14:49] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-17:14:49] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 69 MiB, GPU 2131 MiB +[12/28/2023-17:14:49] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 196 steps to complete. +[12/28/2023-17:14:49] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 90.8216ms to assign 13 blocks to 196 nodes requiring 160489472 bytes. +[12/28/2023-17:14:49] [I] [TRT] Total Activation Memory: 160489472 +[12/28/2023-17:14:55] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -16, now: CPU 1661, GPU 5679 (MiB) +[12/28/2023-17:14:55] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +16, GPU +128, now: CPU 16, GPU 128 (MiB) +[12/28/2023-17:14:55] [I] Engine built in 3909.9 sec. +[12/28/2023-17:14:56] [I] [TRT] Loaded engine size: 105 MiB +[12/28/2023-17:14:56] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1347, GPU 5281 (MiB) +[12/28/2023-17:14:56] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +103, now: CPU 0, GPU 103 (MiB) +[12/28/2023-17:14:56] [I] Engine deserialized in 0.281193 sec. +[12/28/2023-17:14:56] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -1, now: CPU 1348, GPU 5280 (MiB) +[12/28/2023-17:14:56] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +154, now: CPU 0, GPU 257 (MiB) +[12/28/2023-17:14:56] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-17:14:56] [I] Using random values for input onnx::Cast_0 +[12/28/2023-17:14:56] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-17:14:56] [I] Using random values for output graph2_flat_predictions +[12/28/2023-17:14:56] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-17:14:56] [I] Starting inference +[12/28/2023-17:15:12] [I] Warmup completed 1 queries over 200 ms +[12/28/2023-17:15:12] [I] Timing trace has 446 queries over 15.0628 s +[12/28/2023-17:15:12] [I] +[12/28/2023-17:15:12] [I] === Trace details === +[12/28/2023-17:15:12] [I] Trace averages of 100 runs: +[12/28/2023-17:15:12] [I] Average on 100 runs - GPU latency: 33.8846 ms - Host latency: 34.0014 ms (enqueue 33.9409 ms) +[12/28/2023-17:15:12] [I] Average on 100 runs - GPU latency: 33.6823 ms - Host latency: 33.7942 ms (enqueue 33.7436 ms) +[12/28/2023-17:15:12] [I] Average on 100 runs - GPU latency: 33.5307 ms - Host latency: 33.6433 ms (enqueue 33.5802 ms) +[12/28/2023-17:15:12] [I] Average on 100 runs - GPU latency: 33.377 ms - Host latency: 33.4884 ms (enqueue 33.4514 ms) +[12/28/2023-17:15:12] [I] +[12/28/2023-17:15:12] [I] === Performance summary === +[12/28/2023-17:15:12] [I] Throughput: 29.6093 qps +[12/28/2023-17:15:12] [I] Latency: min = 31.7529 ms, max = 44.1514 ms, mean = 33.7469 ms, median = 33.4268 ms, percentile(90%) = 34.5583 ms, percentile(95%) = 37.8339 ms, percentile(99%) = 42.0027 ms +[12/28/2023-17:15:12] [I] Enqueue Time: min = 31.7227 ms, max = 44.1133 ms, mean = 33.6924 ms, median = 33.4424 ms, percentile(90%) = 34.3545 ms, percentile(95%) = 37.7542 ms, percentile(99%) = 41.9526 ms +[12/28/2023-17:15:12] [I] H2D Latency: min = 0.0800781 ms, max = 0.114746 ms, mean = 0.0954138 ms, median = 0.0969238 ms, percentile(90%) = 0.100098 ms, percentile(95%) = 0.100586 ms, percentile(99%) = 0.103027 ms +[12/28/2023-17:15:12] [I] GPU Compute Time: min = 31.6406 ms, max = 44.0381 ms, mean = 33.6343 ms, median = 33.311 ms, percentile(90%) = 34.4478 ms, percentile(95%) = 37.7091 ms, percentile(99%) = 41.8918 ms +[12/28/2023-17:15:12] [I] D2H Latency: min = 0.00292969 ms, max = 0.0541992 ms, mean = 0.017179 ms, median = 0.0146484 ms, percentile(90%) = 0.0290527 ms, percentile(95%) = 0.0314941 ms, percentile(99%) = 0.0361328 ms +[12/28/2023-17:15:12] [I] Total Host Walltime: 15.0628 s +[12/28/2023-17:15:12] [I] Total GPU Compute Time: 15.0009 s +[12/28/2023-17:15:12] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-17:15:12] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp16.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp16.onnx.fp16.engine diff --git a/yolo_nas_pose_l_fp16.onnx.int8.engine.err b/yolo_nas_pose_l_fp16.onnx.int8.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..463359b8a75d28e799620bd76b680a934cca2593 --- /dev/null +++ b/yolo_nas_pose_l_fp16.onnx.int8.engine.err @@ -0,0 +1,8 @@ +[12/28/2023-19:27:13] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-19:27:13] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-19:27:13] [E] Error[4]: [network.cpp::validate::2925] Error Code 4: Internal Error (fp16 precision has been set for a layer or layer output, but fp16 is not configured in the builder) +[12/28/2023-19:27:13] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/28/2023-19:27:13] [E] Engine could not be created from network +[12/28/2023-19:27:13] [E] Building engine failed +[12/28/2023-19:27:13] [E] Failed to create engine from model or file. +[12/28/2023-19:27:13] [E] Engine set up failed diff --git a/yolo_nas_pose_l_fp16.onnx.int8.engine.log b/yolo_nas_pose_l_fp16.onnx.int8.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..1ffdcbc51418fcc99607da618986cc8722e4237b --- /dev/null +++ b/yolo_nas_pose_l_fp16.onnx.int8.engine.log @@ -0,0 +1,92 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp16.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp16.onnx.int8.engine +[12/28/2023-19:27:02] [I] === Model Options === +[12/28/2023-19:27:02] [I] Format: ONNX +[12/28/2023-19:27:02] [I] Model: yolo_nas_pose_l_fp16.onnx +[12/28/2023-19:27:02] [I] Output: +[12/28/2023-19:27:02] [I] === Build Options === +[12/28/2023-19:27:02] [I] Max batch: explicit batch +[12/28/2023-19:27:02] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-19:27:02] [I] minTiming: 1 +[12/28/2023-19:27:02] [I] avgTiming: 8 +[12/28/2023-19:27:02] [I] Precision: FP32+INT8 +[12/28/2023-19:27:02] [I] LayerPrecisions: +[12/28/2023-19:27:02] [I] Calibration: Dynamic +[12/28/2023-19:27:02] [I] Refit: Disabled +[12/28/2023-19:27:02] [I] Sparsity: Disabled +[12/28/2023-19:27:02] [I] Safe mode: Disabled +[12/28/2023-19:27:02] [I] DirectIO mode: Disabled +[12/28/2023-19:27:02] [I] Restricted mode: Disabled +[12/28/2023-19:27:02] [I] Build only: Disabled +[12/28/2023-19:27:02] [I] Save engine: yolo_nas_pose_l_fp16.onnx.int8.engine +[12/28/2023-19:27:02] [I] Load engine: +[12/28/2023-19:27:02] [I] Profiling verbosity: 0 +[12/28/2023-19:27:02] [I] Tactic sources: Using default tactic sources +[12/28/2023-19:27:02] [I] timingCacheMode: local +[12/28/2023-19:27:02] [I] timingCacheFile: +[12/28/2023-19:27:02] [I] Heuristic: Disabled +[12/28/2023-19:27:02] [I] Preview Features: Use default preview flags. +[12/28/2023-19:27:02] [I] Input(s)s format: fp32:CHW +[12/28/2023-19:27:02] [I] Output(s)s format: fp32:CHW +[12/28/2023-19:27:02] [I] Input build shapes: model +[12/28/2023-19:27:02] [I] Input calibration shapes: model +[12/28/2023-19:27:02] [I] === System Options === +[12/28/2023-19:27:02] [I] Device: 0 +[12/28/2023-19:27:02] [I] DLACore: +[12/28/2023-19:27:02] [I] Plugins: +[12/28/2023-19:27:02] [I] === Inference Options === +[12/28/2023-19:27:02] [I] Batch: Explicit +[12/28/2023-19:27:02] [I] Input inference shapes: model +[12/28/2023-19:27:02] [I] Iterations: 10 +[12/28/2023-19:27:02] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-19:27:02] [I] Sleep time: 0ms +[12/28/2023-19:27:02] [I] Idle time: 0ms +[12/28/2023-19:27:02] [I] Streams: 1 +[12/28/2023-19:27:02] [I] ExposeDMA: Disabled +[12/28/2023-19:27:02] [I] Data transfers: Enabled +[12/28/2023-19:27:02] [I] Spin-wait: Disabled +[12/28/2023-19:27:02] [I] Multithreading: Disabled +[12/28/2023-19:27:02] [I] CUDA Graph: Disabled +[12/28/2023-19:27:02] [I] Separate profiling: Disabled +[12/28/2023-19:27:02] [I] Time Deserialize: Disabled +[12/28/2023-19:27:02] [I] Time Refit: Disabled +[12/28/2023-19:27:02] [I] NVTX verbosity: 0 +[12/28/2023-19:27:02] [I] Persistent Cache Ratio: 0 +[12/28/2023-19:27:02] [I] Inputs: +[12/28/2023-19:27:02] [I] === Reporting Options === +[12/28/2023-19:27:02] [I] Verbose: Disabled +[12/28/2023-19:27:02] [I] Averages: 100 inferences +[12/28/2023-19:27:02] [I] Percentiles: 90,95,99 +[12/28/2023-19:27:02] [I] Dump refittable layers:Disabled +[12/28/2023-19:27:02] [I] Dump output: Disabled +[12/28/2023-19:27:02] [I] Profile: Disabled +[12/28/2023-19:27:02] [I] Export timing to JSON file: +[12/28/2023-19:27:02] [I] Export output to JSON file: +[12/28/2023-19:27:02] [I] Export profile to JSON file: +[12/28/2023-19:27:02] [I] +[12/28/2023-19:27:02] [I] === Device Information === +[12/28/2023-19:27:02] [I] Selected Device: Orin +[12/28/2023-19:27:02] [I] Compute Capability: 8.7 +[12/28/2023-19:27:02] [I] SMs: 8 +[12/28/2023-19:27:02] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-19:27:02] [I] Device Global Memory: 7471 MiB +[12/28/2023-19:27:02] [I] Shared Memory per SM: 164 KiB +[12/28/2023-19:27:02] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-19:27:02] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-19:27:02] [I] +[12/28/2023-19:27:02] [I] TensorRT version: 8.5.2 +[12/28/2023-19:27:07] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2837 (MiB) +[12/28/2023-19:27:11] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +285, now: CPU 574, GPU 3142 (MiB) +[12/28/2023-19:27:11] [I] Start parsing network model +[12/28/2023-19:27:12] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-19:27:12] [I] [TRT] Input filename: yolo_nas_pose_l_fp16.onnx +[12/28/2023-19:27:12] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-19:27:12] [I] [TRT] Opset version: 17 +[12/28/2023-19:27:12] [I] [TRT] Producer name: pytorch +[12/28/2023-19:27:12] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-19:27:12] [I] [TRT] Domain: +[12/28/2023-19:27:12] [I] [TRT] Model version: 0 +[12/28/2023-19:27:12] [I] [TRT] Doc string: +[12/28/2023-19:27:12] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-19:27:13] [I] Finish parsing network model +[12/28/2023-19:27:13] [I] FP32 and INT8 precisions have been specified - more performance might be enabled by additionally specifying --fp16 or --best +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp16.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp16.onnx.int8.engine diff --git a/yolo_nas_pose_l_fp16.onnx.usage.txt b/yolo_nas_pose_l_fp16.onnx.usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..8f8c82519265f94f4c923354bb24787a90c25c43 --- /dev/null +++ b/yolo_nas_pose_l_fp16.onnx.usage.txt @@ -0,0 +1,58 @@ + +Model exported successfully to yolo_nas_pose_l_fp16.onnx +Model expects input image of shape [1, 3, 640, 640] +Input image dtype is torch.uint8 + +Exported model already contains preprocessing (normalization) step, so you don't need to do it manually. +Preprocessing steps to be applied to input image are: +Sequential( + (0): CastTensorTo(dtype=torch.float16) + (1): ChannelSelect(channels_indexes=tensor([2, 1, 0], device='cuda:0')) + (2): ApplyMeanStd(mean=[0.], scale=[255.]) +) + + +Exported model contains postprocessing (NMS) step with the following parameters: + num_pre_nms_predictions=1000 + max_predictions_per_image=10 + nms_threshold=0.5 + confidence_threshold=0.15 + output_predictions_format=flat + + +Exported model is in ONNX format and can be used with ONNXRuntime +To run inference with ONNXRuntime, please use the following code snippet: + + import onnxruntime + import numpy as np + session = onnxruntime.InferenceSession("yolo_nas_pose_l_fp16.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + example_input_image = np.zeros((1, 3, 640, 640)).astype(np.uint8) + predictions = session.run(outputs, {inputs[0]: example_input_image}) + +Exported model can also be used with TensorRT +To run inference with TensorRT, please see TensorRT deployment documentation +You can benchmark the model using the following code snippet: + + trtexec --onnx=yolo_nas_pose_l_fp16.onnx --fp16 --avgRuns=100 --duration=15 + + +Exported model has predictions in flat format: + +# flat_predictions is a 2D array of [N,K] shape +# Each row represents (image_index, x_min, y_min, x_max, y_max, confidence, joints...) +# Please note all values are floats, so you have to convert them to integers if needed + +[flat_predictions] = predictions +pred_bboxes = flat_predictions[:, 1:5] +pred_scores = flat_predictions[:, 5] +pred_joints = flat_predictions[:, 6:].reshape((len(pred_bboxes), -1, 3)) +for i in range(len(pred_bboxes)): + confidence = pred_scores[i] + x_min, y_min, x_max, y_max = pred_bboxes[i] + print(f"Detected pose with confidence={{confidence}}, x_min={{x_min}}, y_min={{y_min}}, x_max={{x_max}}, y_max={{y_max}}") + for joint_index, (x, y, confidence) in enumerate(pred_joints[i]):") + print(f"Joint {{joint_index}} has coordinates x={{x}}, y={{y}}, confidence={{confidence}}") + diff --git a/yolo_nas_pose_l_fp32.onnx b/yolo_nas_pose_l_fp32.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5e1363c8a27fda011e106b62f4e7f45a745c4f0d --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd7c12de1275152ce64af33b734c74c085ed45403fb2722385718850a71f363 +size 217830699 diff --git a/yolo_nas_pose_l_fp32.onnx.best.engine b/yolo_nas_pose_l_fp32.onnx.best.engine new file mode 100644 index 0000000000000000000000000000000000000000..66c83c96a93264af76ac99c40f5785506fd62ceb --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.best.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dea7e820c1d67f86f3b4c5749752351cb53d4f31886b72214b78d0621b27957f +size 57142878 diff --git a/yolo_nas_pose_l_fp32.onnx.best.engine.err b/yolo_nas_pose_l_fp32.onnx.best.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..7c902293f764bee0347d21824f91a53d9a6449ec --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.best.engine.err @@ -0,0 +1,504 @@ +[12/28/2023-12:58:51] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-12:58:51] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-12:58:51] [W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool. +[12/28/2023-13:32:07] [W] [TRT] Tactic Device request: 4720MB Available: 2945MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:07] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4720 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:07] [W] [TRT] Tactic Device request: 4720MB Available: 2943MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:07] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4720 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:08] [W] [TRT] Tactic Device request: 4720MB Available: 2943MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:08] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4720 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:13] [W] [TRT] Tactic Device request: 4706MB Available: 2945MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:13] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4706 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:14] [W] [TRT] Tactic Device request: 4706MB Available: 2944MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:14] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4706 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:18] [W] [TRT] Tactic Device request: 4711MB Available: 2921MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:18] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:19] [W] [TRT] Tactic Device request: 4711MB Available: 2920MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:19] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:19] [W] [TRT] Tactic Device request: 4711MB Available: 2919MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:19] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:24] [W] [TRT] Tactic Device request: 4702MB Available: 2920MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:24] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4702 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:25] [W] [TRT] Tactic Device request: 4702MB Available: 2921MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:25] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4702 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:29] [W] [TRT] Tactic Device request: 4711MB Available: 2875MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:30] [W] [TRT] Tactic Device request: 4711MB Available: 2875MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:30] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:30] [W] [TRT] Tactic Device request: 4711MB Available: 2874MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:30] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:35] [W] [TRT] Tactic Device request: 4702MB Available: 2874MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:35] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4702 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:32:36] [W] [TRT] Tactic Device request: 4702MB Available: 2874MB. Device memory is insufficient to use tactic. +[12/28/2023-13:32:36] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4702 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:40:10] [W] [TRT] Tactic Device request: 4711MB Available: 2317MB. Device memory is insufficient to use tactic. +[12/28/2023-13:40:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:40:10] [W] [TRT] Tactic Device request: 4711MB Available: 2317MB. Device memory is insufficient to use tactic. +[12/28/2023-13:40:11] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:40:11] [W] [TRT] Tactic Device request: 4711MB Available: 2317MB. Device memory is insufficient to use tactic. +[12/28/2023-13:40:11] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:40:13] [W] [TRT] Tactic Device request: 4701MB Available: 2317MB. Device memory is insufficient to use tactic. +[12/28/2023-13:40:13] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4701 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:40:13] [W] [TRT] Tactic Device request: 4701MB Available: 2316MB. Device memory is insufficient to use tactic. +[12/28/2023-13:40:13] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4701 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:40:32] [W] [TRT] Tactic Device request: 6275MB Available: 2317MB. Device memory is insufficient to use tactic. +[12/28/2023-13:40:32] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:40:32] [W] [TRT] Tactic Device request: 6275MB Available: 2317MB. Device memory is insufficient to use tactic. +[12/28/2023-13:40:32] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6275 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:40:32] [W] [TRT] Tactic Device request: 6275MB Available: 2317MB. Device memory is insufficient to use tactic. +[12/28/2023-13:40:32] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:40:34] [W] [TRT] Tactic Device request: 6270MB Available: 2318MB. Device memory is insufficient to use tactic. +[12/28/2023-13:40:34] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:40:34] [W] [TRT] Tactic Device request: 6270MB Available: 2318MB. Device memory is insufficient to use tactic. +[12/28/2023-13:40:34] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:21] [W] [TRT] Tactic Device request: 7056MB Available: 2081MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:21] [W] [TRT] Tactic Device request: 7056MB Available: 2081MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:21] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:21] [W] [TRT] Tactic Device request: 7056MB Available: 2081MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:21] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:23] [W] [TRT] Tactic Device request: 7050MB Available: 2080MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:23] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7050 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:23] [W] [TRT] Tactic Device request: 7050MB Available: 2079MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:23] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 7050 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:44] [W] [TRT] Tactic Device request: 6354MB Available: 2081MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:45] [W] [TRT] Tactic Device request: 6354MB Available: 2081MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:45] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6354 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:45] [W] [TRT] Tactic Device request: 6354MB Available: 2081MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:45] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:47] [W] [TRT] Tactic Device request: 6351MB Available: 2080MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6351 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:47] [W] [TRT] Tactic Device request: 6351MB Available: 2080MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:47] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6351 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:49] [W] [TRT] Tactic Device request: 2127MB Available: 2079MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2127 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:50] [W] [TRT] Tactic Device request: 2127MB Available: 2078MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:50] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2127 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:50] [W] [TRT] Tactic Device request: 2127MB Available: 2078MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:50] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2127 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:53] [W] [TRT] Tactic Device request: 2124MB Available: 2081MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:53] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:53] [W] [TRT] Tactic Device request: 2124MB Available: 2072MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:53] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:56] [W] [TRT] Tactic Device request: 2125MB Available: 2070MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:56] [W] [TRT] Tactic Device request: 2125MB Available: 2052MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:56] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2125 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:57] [W] [TRT] Tactic Device request: 2125MB Available: 2052MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:57] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:51:59] [W] [TRT] Tactic Device request: 2124MB Available: 2053MB. Device memory is insufficient to use tactic. +[12/28/2023-13:51:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:52:00] [W] [TRT] Tactic Device request: 2124MB Available: 2051MB. Device memory is insufficient to use tactic. +[12/28/2023-13:52:00] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:52:02] [W] [TRT] Tactic Device request: 2125MB Available: 2051MB. Device memory is insufficient to use tactic. +[12/28/2023-13:52:02] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:52:03] [W] [TRT] Tactic Device request: 2125MB Available: 2051MB. Device memory is insufficient to use tactic. +[12/28/2023-13:52:03] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2125 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:52:03] [W] [TRT] Tactic Device request: 2125MB Available: 2051MB. Device memory is insufficient to use tactic. +[12/28/2023-13:52:03] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:52:06] [W] [TRT] Tactic Device request: 2124MB Available: 2039MB. Device memory is insufficient to use tactic. +[12/28/2023-13:52:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-13:52:06] [W] [TRT] Tactic Device request: 2124MB Available: 2038MB. Device memory is insufficient to use tactic. +[12/28/2023-13:52:06] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:20] [W] [TRT] Tactic Device request: 2394MB Available: 1765MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:20] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:20] [W] [TRT] Tactic Device request: 2394MB Available: 1764MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:20] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:20] [W] [TRT] Tactic Device request: 2394MB Available: 1764MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:20] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:21] [W] [TRT] Tactic Device request: 2392MB Available: 1765MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:21] [W] [TRT] Tactic Device request: 2392MB Available: 1764MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:21] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:21] [W] [TRT] Tactic Device request: 2392MB Available: 1764MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:21] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:22] [W] [TRT] Tactic Device request: 2391MB Available: 1764MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:22] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2391 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:23] [W] [TRT] Tactic Device request: 2391MB Available: 1763MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:23] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2391 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:23] [W] [TRT] Tactic Device request: 2390MB Available: 1763MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:23] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:23] [W] [TRT] Tactic Device request: 2390MB Available: 1763MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:23] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:41] [W] [TRT] Tactic Device request: 6540MB Available: 1782MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:41] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6540 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:41] [W] [TRT] Tactic Device request: 6540MB Available: 1781MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:41] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6540 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:42] [W] [TRT] Tactic Device request: 6540MB Available: 1781MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:42] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6540 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:44] [W] [TRT] Tactic Device request: 6538MB Available: 1781MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6538 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:44] [W] [TRT] Tactic Device request: 6538MB Available: 1781MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:44] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6538 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:47] [W] [TRT] Tactic Device request: 2191MB Available: 1783MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2191 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:48] [W] [TRT] Tactic Device request: 2191MB Available: 1783MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:48] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2191 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:48] [W] [TRT] Tactic Device request: 2191MB Available: 1782MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:49] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2191 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:52] [W] [TRT] Tactic Device request: 2190MB Available: 1779MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:53] [W] [TRT] Tactic Device request: 2190MB Available: 1779MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:53] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:57] [W] [TRT] Tactic Device request: 2190MB Available: 1780MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:57] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:58] [W] [TRT] Tactic Device request: 2190MB Available: 1778MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:58] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:06:59] [W] [TRT] Tactic Device request: 2190MB Available: 1775MB. Device memory is insufficient to use tactic. +[12/28/2023-14:06:59] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:07:03] [W] [TRT] Tactic Device request: 2190MB Available: 1778MB. Device memory is insufficient to use tactic. +[12/28/2023-14:07:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:07:03] [W] [TRT] Tactic Device request: 2190MB Available: 1777MB. Device memory is insufficient to use tactic. +[12/28/2023-14:07:03] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:07:08] [W] [TRT] Tactic Device request: 2190MB Available: 1778MB. Device memory is insufficient to use tactic. +[12/28/2023-14:07:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:07:09] [W] [TRT] Tactic Device request: 2190MB Available: 1779MB. Device memory is insufficient to use tactic. +[12/28/2023-14:07:09] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:07:09] [W] [TRT] Tactic Device request: 2190MB Available: 1778MB. Device memory is insufficient to use tactic. +[12/28/2023-14:07:09] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:07:13] [W] [TRT] Tactic Device request: 2190MB Available: 1778MB. Device memory is insufficient to use tactic. +[12/28/2023-14:07:13] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:07:14] [W] [TRT] Tactic Device request: 2190MB Available: 1776MB. Device memory is insufficient to use tactic. +[12/28/2023-14:07:14] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:15:03] [W] [TRT] Tactic Device request: 2457MB Available: 2222MB. Device memory is insufficient to use tactic. +[12/28/2023-14:15:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:15:03] [W] [TRT] Tactic Device request: 2457MB Available: 2222MB. Device memory is insufficient to use tactic. +[12/28/2023-14:15:03] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:15:03] [W] [TRT] Tactic Device request: 2457MB Available: 2222MB. Device memory is insufficient to use tactic. +[12/28/2023-14:15:03] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:15:05] [W] [TRT] Tactic Device request: 2456MB Available: 2224MB. Device memory is insufficient to use tactic. +[12/28/2023-14:15:05] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:15:05] [W] [TRT] Tactic Device request: 2456MB Available: 2224MB. Device memory is insufficient to use tactic. +[12/28/2023-14:15:05] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:15:32] [W] [TRT] Tactic Device request: 3587MB Available: 1846MB. Device memory is insufficient to use tactic. +[12/28/2023-14:15:32] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:15:32] [W] [TRT] Tactic Device request: 3587MB Available: 1846MB. Device memory is insufficient to use tactic. +[12/28/2023-14:15:32] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:15:32] [W] [TRT] Tactic Device request: 3587MB Available: 1846MB. Device memory is insufficient to use tactic. +[12/28/2023-14:15:32] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:15:33] [W] [TRT] Tactic Device request: 3585MB Available: 1846MB. Device memory is insufficient to use tactic. +[12/28/2023-14:15:33] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3585 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:15:34] [W] [TRT] Tactic Device request: 3585MB Available: 1846MB. Device memory is insufficient to use tactic. +[12/28/2023-14:15:34] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3585 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:28:21] [W] [TRT] Tactic Device request: 3556MB Available: 1715MB. Device memory is insufficient to use tactic. +[12/28/2023-14:28:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:28:21] [W] [TRT] Tactic Device request: 3556MB Available: 1715MB. Device memory is insufficient to use tactic. +[12/28/2023-14:28:21] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:28:22] [W] [TRT] Tactic Device request: 3556MB Available: 1714MB. Device memory is insufficient to use tactic. +[12/28/2023-14:28:22] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:28:23] [W] [TRT] Tactic Device request: 3551MB Available: 1715MB. Device memory is insufficient to use tactic. +[12/28/2023-14:28:23] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3551 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:28:23] [W] [TRT] Tactic Device request: 3551MB Available: 1715MB. Device memory is insufficient to use tactic. +[12/28/2023-14:28:23] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3551 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:28:25] [W] [TRT] Tactic Device request: 3140MB Available: 1715MB. Device memory is insufficient to use tactic. +[12/28/2023-14:28:25] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:28:25] [W] [TRT] Tactic Device request: 3140MB Available: 1715MB. Device memory is insufficient to use tactic. +[12/28/2023-14:28:25] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:28:25] [W] [TRT] Tactic Device request: 3140MB Available: 1715MB. Device memory is insufficient to use tactic. +[12/28/2023-14:28:25] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:28:26] [W] [TRT] Tactic Device request: 3136MB Available: 1715MB. Device memory is insufficient to use tactic. +[12/28/2023-14:28:26] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3136 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:28:26] [W] [TRT] Tactic Device request: 3136MB Available: 1715MB. Device memory is insufficient to use tactic. +[12/28/2023-14:28:26] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3136 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:35] [W] [TRT] Tactic Device request: 3161MB Available: 2110MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:35] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3161 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:36] [W] [TRT] Tactic Device request: 3161MB Available: 2110MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:36] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3161 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:36] [W] [TRT] Tactic Device request: 3161MB Available: 2110MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:36] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3161 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:37] [W] [TRT] Tactic Device request: 3156MB Available: 2109MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:37] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3156 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:37] [W] [TRT] Tactic Device request: 3156MB Available: 2109MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:37] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3156 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:43] [W] [TRT] Tactic Device request: 4189MB Available: 2109MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4189 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:44] [W] [TRT] Tactic Device request: 4189MB Available: 2088MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:44] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4189 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:44] [W] [TRT] Tactic Device request: 4189MB Available: 2087MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:44] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4189 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:49] [W] [TRT] Tactic Device request: 4183MB Available: 2085MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4183 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:49] [W] [TRT] Tactic Device request: 4183MB Available: 2082MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:49] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4183 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:52] [W] [TRT] Tactic Device request: 4186MB Available: 2082MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4186 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:53] [W] [TRT] Tactic Device request: 4186MB Available: 2061MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:53] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4186 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:53] [W] [TRT] Tactic Device request: 4186MB Available: 2060MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:53] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4186 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:57] [W] [TRT] Tactic Device request: 4182MB Available: 2059MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:57] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4182 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:40:58] [W] [TRT] Tactic Device request: 4182MB Available: 2054MB. Device memory is insufficient to use tactic. +[12/28/2023-14:40:58] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4182 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:53:46] [W] [TRT] Tactic Device request: 4764MB Available: 2038MB. Device memory is insufficient to use tactic. +[12/28/2023-14:53:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4764 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:53:46] [W] [TRT] Tactic Device request: 4764MB Available: 2038MB. Device memory is insufficient to use tactic. +[12/28/2023-14:53:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4764 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:53:46] [W] [TRT] Tactic Device request: 4764MB Available: 2039MB. Device memory is insufficient to use tactic. +[12/28/2023-14:53:46] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4764 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:53:48] [W] [TRT] Tactic Device request: 4761MB Available: 2038MB. Device memory is insufficient to use tactic. +[12/28/2023-14:53:48] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4761 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:53:48] [W] [TRT] Tactic Device request: 4761MB Available: 2038MB. Device memory is insufficient to use tactic. +[12/28/2023-14:53:48] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4761 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:53:56] [W] [TRT] Tactic Device request: 4244MB Available: 1833MB. Device memory is insufficient to use tactic. +[12/28/2023-14:53:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4244 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:53:57] [W] [TRT] Tactic Device request: 4244MB Available: 1504MB. Device memory is insufficient to use tactic. +[12/28/2023-14:53:57] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4244 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:53:58] [W] [TRT] Tactic Device request: 4244MB Available: 1500MB. Device memory is insufficient to use tactic. +[12/28/2023-14:53:58] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4244 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:54:03] [W] [TRT] Tactic Device request: 4240MB Available: 1498MB. Device memory is insufficient to use tactic. +[12/28/2023-14:54:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4240 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:54:03] [W] [TRT] Tactic Device request: 4240MB Available: 1494MB. Device memory is insufficient to use tactic. +[12/28/2023-14:54:03] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4240 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:54:08] [W] [TRT] Tactic Device request: 4241MB Available: 1553MB. Device memory is insufficient to use tactic. +[12/28/2023-14:54:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4241 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:54:09] [W] [TRT] Tactic Device request: 4241MB Available: 1514MB. Device memory is insufficient to use tactic. +[12/28/2023-14:54:09] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4241 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:54:10] [W] [TRT] Tactic Device request: 4241MB Available: 1508MB. Device memory is insufficient to use tactic. +[12/28/2023-14:54:10] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4241 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:54:14] [W] [TRT] Tactic Device request: 4239MB Available: 1510MB. Device memory is insufficient to use tactic. +[12/28/2023-14:54:14] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4239 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:54:15] [W] [TRT] Tactic Device request: 4239MB Available: 1496MB. Device memory is insufficient to use tactic. +[12/28/2023-14:54:15] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4239 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:55:04] [W] [TRT] Tactic Device request: 1637MB Available: 1234MB. Device memory is insufficient to use tactic. +[12/28/2023-14:55:04] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:55:04] [W] [TRT] Tactic Device request: 1637MB Available: 1234MB. Device memory is insufficient to use tactic. +[12/28/2023-14:55:04] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1637 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:55:04] [W] [TRT] Tactic Device request: 1637MB Available: 1234MB. Device memory is insufficient to use tactic. +[12/28/2023-14:55:04] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:55:05] [W] [TRT] Tactic Device request: 1636MB Available: 1268MB. Device memory is insufficient to use tactic. +[12/28/2023-14:55:05] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1636 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-14:55:05] [W] [TRT] Tactic Device request: 1636MB Available: 1268MB. Device memory is insufficient to use tactic. +[12/28/2023-14:55:05] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 1636 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:11] [W] [TRT] Tactic Device request: 1638MB Available: 1062MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:12] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1638 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:12] [W] [TRT] Tactic Device request: 1638MB Available: 1063MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:12] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1638 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:14] [W] [TRT] Tactic Device request: 1637MB Available: 1065MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:14] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:14] [W] [TRT] Tactic Device request: 1637MB Available: 1065MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:14] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 1637 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:16] [W] [TRT] Tactic Device request: 3270MB Available: 1064MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:16] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:17] [W] [TRT] Tactic Device request: 3270MB Available: 1064MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:17] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:17] [W] [TRT] Tactic Device request: 3270MB Available: 1064MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:17] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3270 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:18] [W] [TRT] Tactic Device request: 3269MB Available: 1063MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:18] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3269 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:19] [W] [TRT] Tactic Device request: 3269MB Available: 1063MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:19] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3269 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:22] [W] [TRT] Tactic Device request: 4377MB Available: 1060MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:22] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4377 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:22] [W] [TRT] Tactic Device request: 2185MB Available: 1060MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:22] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2185 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:23] [W] [TRT] Tactic Device request: 4377MB Available: 1061MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:23] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4377 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:23] [W] [TRT] Tactic Device request: 2185MB Available: 1061MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:23] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 2185 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:23] [W] [TRT] Tactic Device request: 4377MB Available: 1060MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:23] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4377 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:23] [W] [TRT] Tactic Device request: 2185MB Available: 1060MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:23] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 2185 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:29] [W] [TRT] Tactic Device request: 4375MB Available: 1059MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4375 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:29] [W] [TRT] Tactic Device request: 2183MB Available: 1059MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:29] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2183 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:29] [W] [TRT] Tactic Device request: 4375MB Available: 1058MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:29] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4375 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:29] [W] [TRT] Tactic Device request: 2183MB Available: 1058MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:29] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2183 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:36] [W] [TRT] Tactic Device request: 4376MB Available: 1050MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:36] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4376 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:36] [W] [TRT] Tactic Device request: 2184MB Available: 1049MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:36] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2184 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:36] [W] [TRT] Tactic Device request: 4376MB Available: 1037MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:36] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4376 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:36] [W] [TRT] Tactic Device request: 2184MB Available: 1037MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:36] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 2184 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:37] [W] [TRT] Tactic Device request: 4376MB Available: 1036MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:37] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4376 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:37] [W] [TRT] Tactic Device request: 2184MB Available: 1036MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:37] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 2184 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:42] [W] [TRT] Tactic Device request: 4375MB Available: 1058MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:42] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4375 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:42] [W] [TRT] Tactic Device request: 2183MB Available: 1058MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:42] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2183 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:43] [W] [TRT] Tactic Device request: 4375MB Available: 1058MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:43] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4375 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:08:43] [W] [TRT] Tactic Device request: 2183MB Available: 1058MB. Device memory is insufficient to use tactic. +[12/28/2023-15:08:43] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2183 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:09:47] [W] [TRT] TensorRT encountered issues when converting weights between types and that could affect accuracy. +[12/28/2023-15:09:47] [W] [TRT] If this is not the desired behavior, please modify the weights or retrain with regularization to adjust the magnitude of the weights. +[12/28/2023-15:09:47] [W] [TRT] Check verbose logs for the list of affected weights. +[12/28/2023-15:09:47] [W] [TRT] - 114 weights are affected by this issue: Detected subnormal FP16 values. +[12/28/2023-15:09:47] [W] [TRT] - 40 weights are affected by this issue: Detected values less than smallest positive FP16 subnormal value and converted them to the FP16 minimum subnormalized value. +[12/28/2023-15:10:04] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-15:10:04] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-15:10:04] [W] * GPU compute time is unstable, with coefficient of variance = 7.00146%. +[12/28/2023-15:10:04] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_l_fp32.onnx.best.engine.log b/yolo_nas_pose_l_fp32.onnx.best.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..906c75132d9ee3a343ff56b55c99d27734c7da17 --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.best.engine.log @@ -0,0 +1,331 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp32.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp32.onnx.best.engine +[12/28/2023-12:58:38] [I] === Model Options === +[12/28/2023-12:58:38] [I] Format: ONNX +[12/28/2023-12:58:38] [I] Model: yolo_nas_pose_l_fp32.onnx +[12/28/2023-12:58:38] [I] Output: +[12/28/2023-12:58:38] [I] === Build Options === +[12/28/2023-12:58:38] [I] Max batch: explicit batch +[12/28/2023-12:58:38] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-12:58:38] [I] minTiming: 1 +[12/28/2023-12:58:38] [I] avgTiming: 8 +[12/28/2023-12:58:38] [I] Precision: FP32+FP16+INT8 +[12/28/2023-12:58:38] [I] LayerPrecisions: +[12/28/2023-12:58:38] [I] Calibration: Dynamic +[12/28/2023-12:58:38] [I] Refit: Disabled +[12/28/2023-12:58:38] [I] Sparsity: Disabled +[12/28/2023-12:58:38] [I] Safe mode: Disabled +[12/28/2023-12:58:38] [I] DirectIO mode: Disabled +[12/28/2023-12:58:38] [I] Restricted mode: Disabled +[12/28/2023-12:58:38] [I] Build only: Disabled +[12/28/2023-12:58:38] [I] Save engine: yolo_nas_pose_l_fp32.onnx.best.engine +[12/28/2023-12:58:38] [I] Load engine: +[12/28/2023-12:58:38] [I] Profiling verbosity: 0 +[12/28/2023-12:58:38] [I] Tactic sources: Using default tactic sources +[12/28/2023-12:58:38] [I] timingCacheMode: local +[12/28/2023-12:58:38] [I] timingCacheFile: +[12/28/2023-12:58:38] [I] Heuristic: Disabled +[12/28/2023-12:58:38] [I] Preview Features: Use default preview flags. +[12/28/2023-12:58:38] [I] Input(s)s format: fp32:CHW +[12/28/2023-12:58:38] [I] Output(s)s format: fp32:CHW +[12/28/2023-12:58:38] [I] Input build shapes: model +[12/28/2023-12:58:38] [I] Input calibration shapes: model +[12/28/2023-12:58:38] [I] === System Options === +[12/28/2023-12:58:38] [I] Device: 0 +[12/28/2023-12:58:38] [I] DLACore: +[12/28/2023-12:58:38] [I] Plugins: +[12/28/2023-12:58:38] [I] === Inference Options === +[12/28/2023-12:58:38] [I] Batch: Explicit +[12/28/2023-12:58:38] [I] Input inference shapes: model +[12/28/2023-12:58:38] [I] Iterations: 10 +[12/28/2023-12:58:38] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-12:58:38] [I] Sleep time: 0ms +[12/28/2023-12:58:38] [I] Idle time: 0ms +[12/28/2023-12:58:38] [I] Streams: 1 +[12/28/2023-12:58:38] [I] ExposeDMA: Disabled +[12/28/2023-12:58:38] [I] Data transfers: Enabled +[12/28/2023-12:58:38] [I] Spin-wait: Disabled +[12/28/2023-12:58:38] [I] Multithreading: Disabled +[12/28/2023-12:58:38] [I] CUDA Graph: Disabled +[12/28/2023-12:58:38] [I] Separate profiling: Disabled +[12/28/2023-12:58:38] [I] Time Deserialize: Disabled +[12/28/2023-12:58:38] [I] Time Refit: Disabled +[12/28/2023-12:58:38] [I] NVTX verbosity: 0 +[12/28/2023-12:58:38] [I] Persistent Cache Ratio: 0 +[12/28/2023-12:58:38] [I] Inputs: +[12/28/2023-12:58:38] [I] === Reporting Options === +[12/28/2023-12:58:38] [I] Verbose: Disabled +[12/28/2023-12:58:38] [I] Averages: 100 inferences +[12/28/2023-12:58:38] [I] Percentiles: 90,95,99 +[12/28/2023-12:58:38] [I] Dump refittable layers:Disabled +[12/28/2023-12:58:38] [I] Dump output: Disabled +[12/28/2023-12:58:38] [I] Profile: Disabled +[12/28/2023-12:58:38] [I] Export timing to JSON file: +[12/28/2023-12:58:38] [I] Export output to JSON file: +[12/28/2023-12:58:38] [I] Export profile to JSON file: +[12/28/2023-12:58:38] [I] +[12/28/2023-12:58:38] [I] === Device Information === +[12/28/2023-12:58:38] [I] Selected Device: Orin +[12/28/2023-12:58:38] [I] Compute Capability: 8.7 +[12/28/2023-12:58:38] [I] SMs: 8 +[12/28/2023-12:58:38] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-12:58:38] [I] Device Global Memory: 7471 MiB +[12/28/2023-12:58:38] [I] Shared Memory per SM: 164 KiB +[12/28/2023-12:58:38] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-12:58:38] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-12:58:38] [I] +[12/28/2023-12:58:38] [I] TensorRT version: 8.5.2 +[12/28/2023-12:58:43] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3010 (MiB) +[12/28/2023-12:58:48] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +283, now: CPU 574, GPU 3313 (MiB) +[12/28/2023-12:58:48] [I] Start parsing network model +[12/28/2023-12:58:51] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-12:58:51] [I] [TRT] Input filename: yolo_nas_pose_l_fp32.onnx +[12/28/2023-12:58:51] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-12:58:51] [I] [TRT] Opset version: 17 +[12/28/2023-12:58:51] [I] [TRT] Producer name: pytorch +[12/28/2023-12:58:51] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-12:58:51] [I] [TRT] Domain: +[12/28/2023-12:58:51] [I] [TRT] Model version: 0 +[12/28/2023-12:58:51] [I] [TRT] Doc string: +[12/28/2023-12:58:51] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-12:58:51] [I] Finish parsing network model +[12/28/2023-12:58:52] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-12:58:52] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 455) [Constant] +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 456) [Constant] +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 457) [Constant] +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.2.alpha + (Unnamed Layer* 187) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.3.alpha + (Unnamed Layer* 195) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 216) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 224) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 232) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.3.alpha + (Unnamed Layer* 240) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 271) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 294) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 302) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 310) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 341) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 364) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 372) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.3.alpha + (Unnamed Layer* 380) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 459) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-12:58:52] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-12:59:03] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +411, now: CPU 1351, GPU 3918 (MiB) +[12/28/2023-12:59:05] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +80, now: CPU 1433, GPU 3998 (MiB) +[12/28/2023-12:59:05] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-15:09:20] [I] [TRT] Total Activation Memory: 7964877312 +[12/28/2023-15:09:20] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-15:09:38] [I] [TRT] Total Host Persistent Memory: 331680 +[12/28/2023-15:09:38] [I] [TRT] Total Device Persistent Memory: 38912 +[12/28/2023-15:09:38] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-15:09:38] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 88 MiB, GPU 2110 MiB +[12/28/2023-15:09:38] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 176 steps to complete. +[12/28/2023-15:09:38] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 114.49ms to assign 14 blocks to 176 nodes requiring 147384320 bytes. +[12/28/2023-15:09:38] [I] [TRT] Total Activation Memory: 147384320 +[12/28/2023-15:09:47] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -15, now: CPU 1838, GPU 5747 (MiB) +[12/28/2023-15:09:47] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +53, GPU +64, now: CPU 53, GPU 64 (MiB) +[12/28/2023-15:09:48] [I] Engine built in 7870.12 sec. +[12/28/2023-15:09:48] [I] [TRT] Loaded engine size: 54 MiB +[12/28/2023-15:09:48] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1299, GPU 5509 (MiB) +[12/28/2023-15:09:48] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +52, now: CPU 0, GPU 52 (MiB) +[12/28/2023-15:09:48] [I] Engine deserialized in 0.136755 sec. +[12/28/2023-15:09:48] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1300, GPU 5509 (MiB) +[12/28/2023-15:09:48] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +140, now: CPU 0, GPU 192 (MiB) +[12/28/2023-15:09:48] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-15:09:48] [I] Using random values for input onnx::Cast_0 +[12/28/2023-15:09:48] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-15:09:48] [I] Using random values for output graph2_flat_predictions +[12/28/2023-15:09:48] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-15:09:48] [I] Starting inference +[12/28/2023-15:10:04] [I] Warmup completed 2 queries over 200 ms +[12/28/2023-15:10:04] [I] Timing trace has 712 queries over 15.0201 s +[12/28/2023-15:10:04] [I] +[12/28/2023-15:10:04] [I] === Trace details === +[12/28/2023-15:10:04] [I] Trace averages of 100 runs: +[12/28/2023-15:10:04] [I] Average on 100 runs - GPU latency: 21.1141 ms - Host latency: 21.2281 ms (enqueue 21.1848 ms) +[12/28/2023-15:10:04] [I] Average on 100 runs - GPU latency: 21.2938 ms - Host latency: 21.4086 ms (enqueue 21.3535 ms) +[12/28/2023-15:10:04] [I] Average on 100 runs - GPU latency: 20.5876 ms - Host latency: 20.6987 ms (enqueue 20.679 ms) +[12/28/2023-15:10:04] [I] Average on 100 runs - GPU latency: 20.9284 ms - Host latency: 21.0399 ms (enqueue 20.9968 ms) +[12/28/2023-15:10:04] [I] Average on 100 runs - GPU latency: 21.3846 ms - Host latency: 21.5023 ms (enqueue 21.4432 ms) +[12/28/2023-15:10:04] [I] Average on 100 runs - GPU latency: 20.5315 ms - Host latency: 20.6422 ms (enqueue 20.6192 ms) +[12/28/2023-15:10:04] [I] Average on 100 runs - GPU latency: 20.7566 ms - Host latency: 20.8657 ms (enqueue 20.8177 ms) +[12/28/2023-15:10:04] [I] +[12/28/2023-15:10:04] [I] === Performance summary === +[12/28/2023-15:10:04] [I] Throughput: 47.4032 qps +[12/28/2023-15:10:04] [I] Latency: min = 19.6377 ms, max = 32.405 ms, mean = 21.0632 ms, median = 20.583 ms, percentile(90%) = 21.897 ms, percentile(95%) = 23.0127 ms, percentile(99%) = 29.6182 ms +[12/28/2023-15:10:04] [I] Enqueue Time: min = 19.6035 ms, max = 33.8328 ms, mean = 21.0211 ms, median = 20.5366 ms, percentile(90%) = 21.8384 ms, percentile(95%) = 22.998 ms, percentile(99%) = 29.0708 ms +[12/28/2023-15:10:04] [I] H2D Latency: min = 0.0800781 ms, max = 0.128906 ms, mean = 0.0964459 ms, median = 0.097168 ms, percentile(90%) = 0.0991211 ms, percentile(95%) = 0.0996094 ms, percentile(99%) = 0.110474 ms +[12/28/2023-15:10:04] [I] GPU Compute Time: min = 19.5264 ms, max = 32.2937 ms, mean = 20.9506 ms, median = 20.4727 ms, percentile(90%) = 21.7739 ms, percentile(95%) = 22.8984 ms, percentile(99%) = 29.5049 ms +[12/28/2023-15:10:04] [I] D2H Latency: min = 0.00341797 ms, max = 0.0615234 ms, mean = 0.0161761 ms, median = 0.0136719 ms, percentile(90%) = 0.0258789 ms, percentile(95%) = 0.0273438 ms, percentile(99%) = 0.03125 ms +[12/28/2023-15:10:04] [I] Total Host Walltime: 15.0201 s +[12/28/2023-15:10:04] [I] Total GPU Compute Time: 14.9168 s +[12/28/2023-15:10:04] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-15:10:04] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp32.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp32.onnx.best.engine diff --git a/yolo_nas_pose_l_fp32.onnx.engine b/yolo_nas_pose_l_fp32.onnx.engine new file mode 100644 index 0000000000000000000000000000000000000000..13988e078b3572bdd6764f383ce02bbef198d96a --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41ae49ee96e19ad3d8bfede2728e2c7fb331aa5a1d99fc7b5e139b10ec687299 +size 219687473 diff --git a/yolo_nas_pose_l_fp32.onnx.engine.err b/yolo_nas_pose_l_fp32.onnx.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..ff46f7f63ba6b6203ca47e151ff8efed9e56fc7e --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.engine.err @@ -0,0 +1,237 @@ +[12/28/2023-11:29:23] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-11:29:23] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-11:30:52] [W] [TRT] Tactic Device request: 4720MB Available: 2387MB. Device memory is insufficient to use tactic. +[12/28/2023-11:30:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4720 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:30:53] [W] [TRT] Tactic Device request: 4720MB Available: 2388MB. Device memory is insufficient to use tactic. +[12/28/2023-11:30:53] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4720 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:30:54] [W] [TRT] Tactic Device request: 4720MB Available: 2389MB. Device memory is insufficient to use tactic. +[12/28/2023-11:30:54] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4720 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:30:59] [W] [TRT] Tactic Device request: 4711MB Available: 2388MB. Device memory is insufficient to use tactic. +[12/28/2023-11:30:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:31:00] [W] [TRT] Tactic Device request: 4711MB Available: 2388MB. Device memory is insufficient to use tactic. +[12/28/2023-11:31:00] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:31:00] [W] [TRT] Tactic Device request: 4711MB Available: 2389MB. Device memory is insufficient to use tactic. +[12/28/2023-11:31:00] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:32:39] [W] [TRT] Tactic Device request: 4711MB Available: 2349MB. Device memory is insufficient to use tactic. +[12/28/2023-11:32:39] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:32:39] [W] [TRT] Tactic Device request: 4711MB Available: 2348MB. Device memory is insufficient to use tactic. +[12/28/2023-11:32:39] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:32:39] [W] [TRT] Tactic Device request: 4711MB Available: 2349MB. Device memory is insufficient to use tactic. +[12/28/2023-11:32:39] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:32:51] [W] [TRT] Tactic Device request: 6275MB Available: 2347MB. Device memory is insufficient to use tactic. +[12/28/2023-11:32:51] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:32:51] [W] [TRT] Tactic Device request: 6275MB Available: 2348MB. Device memory is insufficient to use tactic. +[12/28/2023-11:32:51] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6275 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:32:51] [W] [TRT] Tactic Device request: 6275MB Available: 2350MB. Device memory is insufficient to use tactic. +[12/28/2023-11:32:51] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:35:14] [W] [TRT] Tactic Device request: 7056MB Available: 2331MB. Device memory is insufficient to use tactic. +[12/28/2023-11:35:14] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:35:14] [W] [TRT] Tactic Device request: 7056MB Available: 2330MB. Device memory is insufficient to use tactic. +[12/28/2023-11:35:14] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:35:15] [W] [TRT] Tactic Device request: 7056MB Available: 2330MB. Device memory is insufficient to use tactic. +[12/28/2023-11:35:15] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:35:27] [W] [TRT] Tactic Device request: 6354MB Available: 2331MB. Device memory is insufficient to use tactic. +[12/28/2023-11:35:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:35:27] [W] [TRT] Tactic Device request: 6354MB Available: 2331MB. Device memory is insufficient to use tactic. +[12/28/2023-11:35:27] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6354 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:35:27] [W] [TRT] Tactic Device request: 6354MB Available: 2331MB. Device memory is insufficient to use tactic. +[12/28/2023-11:35:27] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:31] [W] [TRT] Tactic Device request: 2394MB Available: 2273MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:31] [W] [TRT] Tactic Device request: 2394MB Available: 2273MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:31] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:31] [W] [TRT] Tactic Device request: 2394MB Available: 2273MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:31] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:32] [W] [TRT] Tactic Device request: 2392MB Available: 2272MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:32] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:32] [W] [TRT] Tactic Device request: 2392MB Available: 2272MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:32] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:32] [W] [TRT] Tactic Device request: 2392MB Available: 2272MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:32] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:41] [W] [TRT] Tactic Device request: 6540MB Available: 2263MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:41] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6540 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:42] [W] [TRT] Tactic Device request: 6540MB Available: 2262MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:42] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6540 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:42] [W] [TRT] Tactic Device request: 6540MB Available: 2262MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:42] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6540 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:46] [W] [TRT] Tactic Device request: 2191MB Available: 2163MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:46] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2191 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:47] [W] [TRT] Tactic Device request: 2191MB Available: 2160MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:47] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2191 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:51] [W] [TRT] Tactic Device request: 2190MB Available: 2161MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:51] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:52] [W] [TRT] Tactic Device request: 2190MB Available: 2161MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:52] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:38:53] [W] [TRT] Tactic Device request: 2190MB Available: 2161MB. Device memory is insufficient to use tactic. +[12/28/2023-11:38:53] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:40:32] [W] [TRT] Tactic Device request: 2457MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-11:40:32] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:40:32] [W] [TRT] Tactic Device request: 2457MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-11:40:32] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:40:32] [W] [TRT] Tactic Device request: 2457MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/28/2023-11:40:32] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:40:46] [W] [TRT] Tactic Device request: 3587MB Available: 2158MB. Device memory is insufficient to use tactic. +[12/28/2023-11:40:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:40:46] [W] [TRT] Tactic Device request: 3587MB Available: 2158MB. Device memory is insufficient to use tactic. +[12/28/2023-11:40:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:40:46] [W] [TRT] Tactic Device request: 3587MB Available: 2158MB. Device memory is insufficient to use tactic. +[12/28/2023-11:40:46] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:43:23] [W] [TRT] Tactic Device request: 3556MB Available: 2179MB. Device memory is insufficient to use tactic. +[12/28/2023-11:43:23] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:43:23] [W] [TRT] Tactic Device request: 3556MB Available: 2179MB. Device memory is insufficient to use tactic. +[12/28/2023-11:43:23] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:43:23] [W] [TRT] Tactic Device request: 3556MB Available: 2179MB. Device memory is insufficient to use tactic. +[12/28/2023-11:43:23] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:43:24] [W] [TRT] Tactic Device request: 3140MB Available: 2178MB. Device memory is insufficient to use tactic. +[12/28/2023-11:43:24] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:43:24] [W] [TRT] Tactic Device request: 3140MB Available: 2178MB. Device memory is insufficient to use tactic. +[12/28/2023-11:43:24] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:43:24] [W] [TRT] Tactic Device request: 3140MB Available: 2178MB. Device memory is insufficient to use tactic. +[12/28/2023-11:43:24] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:45:43] [W] [TRT] Tactic Device request: 3161MB Available: 2140MB. Device memory is insufficient to use tactic. +[12/28/2023-11:45:43] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3161 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:45:43] [W] [TRT] Tactic Device request: 3161MB Available: 2140MB. Device memory is insufficient to use tactic. +[12/28/2023-11:45:43] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3161 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:45:43] [W] [TRT] Tactic Device request: 3161MB Available: 2140MB. Device memory is insufficient to use tactic. +[12/28/2023-11:45:43] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3161 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:45:46] [W] [TRT] Tactic Device request: 4189MB Available: 2138MB. Device memory is insufficient to use tactic. +[12/28/2023-11:45:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4189 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:45:47] [W] [TRT] Tactic Device request: 4189MB Available: 2138MB. Device memory is insufficient to use tactic. +[12/28/2023-11:45:47] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4189 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:45:47] [W] [TRT] Tactic Device request: 4189MB Available: 2140MB. Device memory is insufficient to use tactic. +[12/28/2023-11:45:47] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4189 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:45:51] [W] [TRT] Tactic Device request: 4186MB Available: 2139MB. Device memory is insufficient to use tactic. +[12/28/2023-11:45:51] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4186 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:45:52] [W] [TRT] Tactic Device request: 4186MB Available: 2139MB. Device memory is insufficient to use tactic. +[12/28/2023-11:45:52] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4186 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:45:52] [W] [TRT] Tactic Device request: 4186MB Available: 2138MB. Device memory is insufficient to use tactic. +[12/28/2023-11:45:52] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4186 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:48:34] [W] [TRT] Tactic Device request: 4764MB Available: 2088MB. Device memory is insufficient to use tactic. +[12/28/2023-11:48:34] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4764 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:48:34] [W] [TRT] Tactic Device request: 4764MB Available: 2088MB. Device memory is insufficient to use tactic. +[12/28/2023-11:48:34] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4764 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:48:34] [W] [TRT] Tactic Device request: 4764MB Available: 2088MB. Device memory is insufficient to use tactic. +[12/28/2023-11:48:34] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4764 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:48:39] [W] [TRT] Tactic Device request: 4244MB Available: 2088MB. Device memory is insufficient to use tactic. +[12/28/2023-11:48:39] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4244 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:48:40] [W] [TRT] Tactic Device request: 4244MB Available: 2081MB. Device memory is insufficient to use tactic. +[12/28/2023-11:48:40] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4244 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:48:41] [W] [TRT] Tactic Device request: 4244MB Available: 2080MB. Device memory is insufficient to use tactic. +[12/28/2023-11:48:41] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4244 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:48:46] [W] [TRT] Tactic Device request: 4241MB Available: 2082MB. Device memory is insufficient to use tactic. +[12/28/2023-11:48:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4241 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:48:47] [W] [TRT] Tactic Device request: 4241MB Available: 2080MB. Device memory is insufficient to use tactic. +[12/28/2023-11:48:47] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4241 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:48:48] [W] [TRT] Tactic Device request: 4241MB Available: 2080MB. Device memory is insufficient to use tactic. +[12/28/2023-11:48:48] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4241 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:51:58] [W] [TRT] Tactic Device request: 3270MB Available: 2043MB. Device memory is insufficient to use tactic. +[12/28/2023-11:51:58] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:51:58] [W] [TRT] Tactic Device request: 3270MB Available: 2043MB. Device memory is insufficient to use tactic. +[12/28/2023-11:51:58] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:51:58] [W] [TRT] Tactic Device request: 3270MB Available: 2043MB. Device memory is insufficient to use tactic. +[12/28/2023-11:51:58] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3270 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:01] [W] [TRT] Tactic Device request: 4377MB Available: 2039MB. Device memory is insufficient to use tactic. +[12/28/2023-11:52:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4377 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:01] [W] [TRT] Tactic Device request: 2185MB Available: 2039MB. Device memory is insufficient to use tactic. +[12/28/2023-11:52:01] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2185 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:01] [W] [TRT] Tactic Device request: 4377MB Available: 2039MB. Device memory is insufficient to use tactic. +[12/28/2023-11:52:01] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4377 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:01] [W] [TRT] Tactic Device request: 2185MB Available: 2039MB. Device memory is insufficient to use tactic. +[12/28/2023-11:52:01] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 2185 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:02] [W] [TRT] Tactic Device request: 4377MB Available: 2039MB. Device memory is insufficient to use tactic. +[12/28/2023-11:52:02] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4377 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:02] [W] [TRT] Tactic Device request: 2185MB Available: 2038MB. Device memory is insufficient to use tactic. +[12/28/2023-11:52:02] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 2185 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:09] [W] [TRT] Tactic Device request: 4376MB Available: 2038MB. Device memory is insufficient to use tactic. +[12/28/2023-11:52:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4376 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:09] [W] [TRT] Tactic Device request: 2184MB Available: 2038MB. Device memory is insufficient to use tactic. +[12/28/2023-11:52:09] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2184 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:09] [W] [TRT] Tactic Device request: 4376MB Available: 2038MB. Device memory is insufficient to use tactic. +[12/28/2023-11:52:09] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4376 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:09] [W] [TRT] Tactic Device request: 2184MB Available: 2038MB. Device memory is insufficient to use tactic. +[12/28/2023-11:52:09] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 2184 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:09] [W] [TRT] Tactic Device request: 4376MB Available: 2037MB. Device memory is insufficient to use tactic. +[12/28/2023-11:52:09] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4376 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:09] [W] [TRT] Tactic Device request: 2184MB Available: 2038MB. Device memory is insufficient to use tactic. +[12/28/2023-11:52:09] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 2184 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:52:51] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-11:52:51] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-11:52:51] [W] * GPU compute time is unstable, with coefficient of variance = 4.81745%. +[12/28/2023-11:52:51] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_l_fp32.onnx.engine.log b/yolo_nas_pose_l_fp32.onnx.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..45cf8677429da667eef642eff7a1c892810edd90 --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.engine.log @@ -0,0 +1,326 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp32.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp32.onnx.engine +[12/28/2023-11:29:16] [I] === Model Options === +[12/28/2023-11:29:16] [I] Format: ONNX +[12/28/2023-11:29:16] [I] Model: yolo_nas_pose_l_fp32.onnx +[12/28/2023-11:29:16] [I] Output: +[12/28/2023-11:29:16] [I] === Build Options === +[12/28/2023-11:29:16] [I] Max batch: explicit batch +[12/28/2023-11:29:16] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-11:29:16] [I] minTiming: 1 +[12/28/2023-11:29:16] [I] avgTiming: 8 +[12/28/2023-11:29:16] [I] Precision: FP32 +[12/28/2023-11:29:16] [I] LayerPrecisions: +[12/28/2023-11:29:16] [I] Calibration: +[12/28/2023-11:29:16] [I] Refit: Disabled +[12/28/2023-11:29:16] [I] Sparsity: Disabled +[12/28/2023-11:29:16] [I] Safe mode: Disabled +[12/28/2023-11:29:16] [I] DirectIO mode: Disabled +[12/28/2023-11:29:16] [I] Restricted mode: Disabled +[12/28/2023-11:29:16] [I] Build only: Disabled +[12/28/2023-11:29:16] [I] Save engine: yolo_nas_pose_l_fp32.onnx.engine +[12/28/2023-11:29:16] [I] Load engine: +[12/28/2023-11:29:16] [I] Profiling verbosity: 0 +[12/28/2023-11:29:16] [I] Tactic sources: Using default tactic sources +[12/28/2023-11:29:16] [I] timingCacheMode: local +[12/28/2023-11:29:16] [I] timingCacheFile: +[12/28/2023-11:29:16] [I] Heuristic: Disabled +[12/28/2023-11:29:16] [I] Preview Features: Use default preview flags. +[12/28/2023-11:29:16] [I] Input(s)s format: fp32:CHW +[12/28/2023-11:29:16] [I] Output(s)s format: fp32:CHW +[12/28/2023-11:29:16] [I] Input build shapes: model +[12/28/2023-11:29:16] [I] Input calibration shapes: model +[12/28/2023-11:29:16] [I] === System Options === +[12/28/2023-11:29:16] [I] Device: 0 +[12/28/2023-11:29:16] [I] DLACore: +[12/28/2023-11:29:16] [I] Plugins: +[12/28/2023-11:29:16] [I] === Inference Options === +[12/28/2023-11:29:16] [I] Batch: Explicit +[12/28/2023-11:29:16] [I] Input inference shapes: model +[12/28/2023-11:29:16] [I] Iterations: 10 +[12/28/2023-11:29:16] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-11:29:16] [I] Sleep time: 0ms +[12/28/2023-11:29:16] [I] Idle time: 0ms +[12/28/2023-11:29:16] [I] Streams: 1 +[12/28/2023-11:29:16] [I] ExposeDMA: Disabled +[12/28/2023-11:29:16] [I] Data transfers: Enabled +[12/28/2023-11:29:16] [I] Spin-wait: Disabled +[12/28/2023-11:29:16] [I] Multithreading: Disabled +[12/28/2023-11:29:16] [I] CUDA Graph: Disabled +[12/28/2023-11:29:16] [I] Separate profiling: Disabled +[12/28/2023-11:29:16] [I] Time Deserialize: Disabled +[12/28/2023-11:29:16] [I] Time Refit: Disabled +[12/28/2023-11:29:16] [I] NVTX verbosity: 0 +[12/28/2023-11:29:16] [I] Persistent Cache Ratio: 0 +[12/28/2023-11:29:16] [I] Inputs: +[12/28/2023-11:29:16] [I] === Reporting Options === +[12/28/2023-11:29:16] [I] Verbose: Disabled +[12/28/2023-11:29:16] [I] Averages: 100 inferences +[12/28/2023-11:29:16] [I] Percentiles: 90,95,99 +[12/28/2023-11:29:16] [I] Dump refittable layers:Disabled +[12/28/2023-11:29:16] [I] Dump output: Disabled +[12/28/2023-11:29:16] [I] Profile: Disabled +[12/28/2023-11:29:16] [I] Export timing to JSON file: +[12/28/2023-11:29:16] [I] Export output to JSON file: +[12/28/2023-11:29:16] [I] Export profile to JSON file: +[12/28/2023-11:29:16] [I] +[12/28/2023-11:29:16] [I] === Device Information === +[12/28/2023-11:29:16] [I] Selected Device: Orin +[12/28/2023-11:29:16] [I] Compute Capability: 8.7 +[12/28/2023-11:29:16] [I] SMs: 8 +[12/28/2023-11:29:16] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-11:29:16] [I] Device Global Memory: 7471 MiB +[12/28/2023-11:29:16] [I] Shared Memory per SM: 164 KiB +[12/28/2023-11:29:16] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-11:29:16] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-11:29:16] [I] +[12/28/2023-11:29:16] [I] TensorRT version: 8.5.2 +[12/28/2023-11:29:17] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3132 (MiB) +[12/28/2023-11:29:20] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +424, now: CPU 574, GPU 3573 (MiB) +[12/28/2023-11:29:20] [I] Start parsing network model +[12/28/2023-11:29:23] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-11:29:23] [I] [TRT] Input filename: yolo_nas_pose_l_fp32.onnx +[12/28/2023-11:29:23] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-11:29:23] [I] [TRT] Opset version: 17 +[12/28/2023-11:29:23] [I] [TRT] Producer name: pytorch +[12/28/2023-11:29:23] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-11:29:23] [I] [TRT] Domain: +[12/28/2023-11:29:23] [I] [TRT] Model version: 0 +[12/28/2023-11:29:23] [I] [TRT] Doc string: +[12/28/2023-11:29:23] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-11:29:23] [I] Finish parsing network model +[12/28/2023-11:29:23] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-11:29:23] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 455) [Constant] +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 456) [Constant] +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 457) [Constant] +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-11:29:23] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.2.alpha + (Unnamed Layer* 187) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.3.alpha + (Unnamed Layer* 195) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 216) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 224) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 232) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.3.alpha + (Unnamed Layer* 240) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 271) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 294) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 302) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 310) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 341) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 364) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 372) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.3.alpha + (Unnamed Layer* 380) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 459) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-11:29:24] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-11:29:25] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +763, now: CPU 1350, GPU 4631 (MiB) +[12/28/2023-11:29:25] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +131, now: CPU 1433, GPU 4762 (MiB) +[12/28/2023-11:29:25] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-11:52:24] [I] [TRT] Total Activation Memory: 8308511744 +[12/28/2023-11:52:24] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-11:52:32] [I] [TRT] Total Host Persistent Memory: 380944 +[12/28/2023-11:52:32] [I] [TRT] Total Device Persistent Memory: 656384 +[12/28/2023-11:52:32] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-11:52:32] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 49 MiB, GPU 2207 MiB +[12/28/2023-11:52:32] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 216 steps to complete. +[12/28/2023-11:52:32] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 108.365ms to assign 15 blocks to 216 nodes requiring 188319232 bytes. +[12/28/2023-11:52:32] [I] [TRT] Total Activation Memory: 188319232 +[12/28/2023-11:52:34] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1778, GPU 5584 (MiB) +[12/28/2023-11:52:34] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +31, GPU +256, now: CPU 31, GPU 256 (MiB) +[12/28/2023-11:52:35] [I] Engine built in 1398.12 sec. +[12/28/2023-11:52:35] [I] [TRT] Loaded engine size: 209 MiB +[12/28/2023-11:52:36] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1451, GPU 4904 (MiB) +[12/28/2023-11:52:36] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +208, now: CPU 0, GPU 208 (MiB) +[12/28/2023-11:52:36] [I] Engine deserialized in 0.328264 sec. +[12/28/2023-11:52:36] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1451, GPU 4904 (MiB) +[12/28/2023-11:52:36] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +180, now: CPU 0, GPU 388 (MiB) +[12/28/2023-11:52:36] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-11:52:36] [I] Using random values for input onnx::Cast_0 +[12/28/2023-11:52:36] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-11:52:36] [I] Using random values for output graph2_flat_predictions +[12/28/2023-11:52:36] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-11:52:36] [I] Starting inference +[12/28/2023-11:52:51] [I] Warmup completed 3 queries over 200 ms +[12/28/2023-11:52:51] [I] Timing trace has 228 queries over 15.134 s +[12/28/2023-11:52:51] [I] +[12/28/2023-11:52:51] [I] === Trace details === +[12/28/2023-11:52:51] [I] Trace averages of 100 runs: +[12/28/2023-11:52:51] [I] Average on 100 runs - GPU latency: 66.5633 ms - Host latency: 66.6799 ms (enqueue 66.6144 ms) +[12/28/2023-11:52:51] [I] Average on 100 runs - GPU latency: 65.9454 ms - Host latency: 66.065 ms (enqueue 66.0061 ms) +[12/28/2023-11:52:51] [I] +[12/28/2023-11:52:51] [I] === Performance summary === +[12/28/2023-11:52:51] [I] Throughput: 15.0654 qps +[12/28/2023-11:52:51] [I] Latency: min = 63.8037 ms, max = 84.0011 ms, mean = 66.3393 ms, median = 65.521 ms, percentile(90%) = 67.4297 ms, percentile(95%) = 76.3467 ms, percentile(99%) = 78.6501 ms +[12/28/2023-11:52:51] [I] Enqueue Time: min = 63.7715 ms, max = 83.9532 ms, mean = 66.2756 ms, median = 65.4399 ms, percentile(90%) = 68.3887 ms, percentile(95%) = 76.9697 ms, percentile(99%) = 78.6002 ms +[12/28/2023-11:52:51] [I] H2D Latency: min = 0.081543 ms, max = 0.12793 ms, mean = 0.0942483 ms, median = 0.0947266 ms, percentile(90%) = 0.0976562 ms, percentile(95%) = 0.0986328 ms, percentile(99%) = 0.112305 ms +[12/28/2023-11:52:51] [I] GPU Compute Time: min = 63.6914 ms, max = 83.8763 ms, mean = 66.222 ms, median = 65.4058 ms, percentile(90%) = 67.313 ms, percentile(95%) = 76.2305 ms, percentile(99%) = 78.5333 ms +[12/28/2023-11:52:51] [I] D2H Latency: min = 0.00390625 ms, max = 0.0869141 ms, mean = 0.023145 ms, median = 0.0244141 ms, percentile(90%) = 0.0336914 ms, percentile(95%) = 0.0390625 ms, percentile(99%) = 0.0546875 ms +[12/28/2023-11:52:51] [I] Total Host Walltime: 15.134 s +[12/28/2023-11:52:51] [I] Total GPU Compute Time: 15.0986 s +[12/28/2023-11:52:51] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-11:52:51] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp32.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp32.onnx.engine diff --git a/yolo_nas_pose_l_fp32.onnx.fp16.engine b/yolo_nas_pose_l_fp32.onnx.fp16.engine new file mode 100644 index 0000000000000000000000000000000000000000..df2587f39921f82f8add8315b543fc5e5c7e7e90 --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.fp16.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3274fec20ea219c10bcdf044022eedcd2ced07cc929ad0b5f08c625d5fdf12b4 +size 110783531 diff --git a/yolo_nas_pose_l_fp32.onnx.fp16.engine.err b/yolo_nas_pose_l_fp32.onnx.fp16.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..95699e2a475e9659cdc007e9b22e4dc63e4aa19e --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.fp16.engine.err @@ -0,0 +1,401 @@ +[12/28/2023-11:53:03] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-11:53:03] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-11:58:20] [W] [TRT] Tactic Device request: 4720MB Available: 2406MB. Device memory is insufficient to use tactic. +[12/28/2023-11:58:20] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4720 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:58:20] [W] [TRT] Tactic Device request: 4720MB Available: 2407MB. Device memory is insufficient to use tactic. +[12/28/2023-11:58:20] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4720 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:58:21] [W] [TRT] Tactic Device request: 4720MB Available: 2407MB. Device memory is insufficient to use tactic. +[12/28/2023-11:58:21] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4720 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:58:27] [W] [TRT] Tactic Device request: 4706MB Available: 2408MB. Device memory is insufficient to use tactic. +[12/28/2023-11:58:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4706 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:58:27] [W] [TRT] Tactic Device request: 4706MB Available: 2408MB. Device memory is insufficient to use tactic. +[12/28/2023-11:58:28] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4706 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:58:30] [W] [TRT] Tactic Device request: 4711MB Available: 2408MB. Device memory is insufficient to use tactic. +[12/28/2023-11:58:30] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:58:31] [W] [TRT] Tactic Device request: 4711MB Available: 2407MB. Device memory is insufficient to use tactic. +[12/28/2023-11:58:31] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:58:32] [W] [TRT] Tactic Device request: 4711MB Available: 2407MB. Device memory is insufficient to use tactic. +[12/28/2023-11:58:32] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:58:37] [W] [TRT] Tactic Device request: 4702MB Available: 2407MB. Device memory is insufficient to use tactic. +[12/28/2023-11:58:37] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4702 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-11:58:38] [W] [TRT] Tactic Device request: 4702MB Available: 2407MB. Device memory is insufficient to use tactic. +[12/28/2023-11:58:38] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4702 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:03:21] [W] [TRT] Tactic Device request: 4711MB Available: 2283MB. Device memory is insufficient to use tactic. +[12/28/2023-12:03:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:03:21] [W] [TRT] Tactic Device request: 4711MB Available: 2282MB. Device memory is insufficient to use tactic. +[12/28/2023-12:03:21] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:03:21] [W] [TRT] Tactic Device request: 4711MB Available: 2282MB. Device memory is insufficient to use tactic. +[12/28/2023-12:03:21] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:03:23] [W] [TRT] Tactic Device request: 4701MB Available: 2281MB. Device memory is insufficient to use tactic. +[12/28/2023-12:03:23] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4701 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:03:23] [W] [TRT] Tactic Device request: 4701MB Available: 2281MB. Device memory is insufficient to use tactic. +[12/28/2023-12:03:23] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4701 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:03:40] [W] [TRT] Tactic Device request: 6275MB Available: 2278MB. Device memory is insufficient to use tactic. +[12/28/2023-12:03:41] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:03:41] [W] [TRT] Tactic Device request: 6275MB Available: 2277MB. Device memory is insufficient to use tactic. +[12/28/2023-12:03:41] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6275 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:03:41] [W] [TRT] Tactic Device request: 6275MB Available: 2278MB. Device memory is insufficient to use tactic. +[12/28/2023-12:03:41] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:03:43] [W] [TRT] Tactic Device request: 6270MB Available: 2276MB. Device memory is insufficient to use tactic. +[12/28/2023-12:03:43] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:03:43] [W] [TRT] Tactic Device request: 6270MB Available: 2276MB. Device memory is insufficient to use tactic. +[12/28/2023-12:03:43] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:10:19] [W] [TRT] Tactic Device request: 7056MB Available: 2319MB. Device memory is insufficient to use tactic. +[12/28/2023-12:10:19] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:10:19] [W] [TRT] Tactic Device request: 7056MB Available: 2319MB. Device memory is insufficient to use tactic. +[12/28/2023-12:10:19] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:10:19] [W] [TRT] Tactic Device request: 7056MB Available: 2319MB. Device memory is insufficient to use tactic. +[12/28/2023-12:10:19] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:10:21] [W] [TRT] Tactic Device request: 7050MB Available: 2320MB. Device memory is insufficient to use tactic. +[12/28/2023-12:10:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7050 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:10:21] [W] [TRT] Tactic Device request: 7050MB Available: 2320MB. Device memory is insufficient to use tactic. +[12/28/2023-12:10:21] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 7050 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:10:37] [W] [TRT] Tactic Device request: 6354MB Available: 2321MB. Device memory is insufficient to use tactic. +[12/28/2023-12:10:37] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:10:38] [W] [TRT] Tactic Device request: 6354MB Available: 2320MB. Device memory is insufficient to use tactic. +[12/28/2023-12:10:38] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6354 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:10:38] [W] [TRT] Tactic Device request: 6354MB Available: 2320MB. Device memory is insufficient to use tactic. +[12/28/2023-12:10:38] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:10:39] [W] [TRT] Tactic Device request: 6351MB Available: 2320MB. Device memory is insufficient to use tactic. +[12/28/2023-12:10:39] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6351 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:10:39] [W] [TRT] Tactic Device request: 6351MB Available: 2320MB. Device memory is insufficient to use tactic. +[12/28/2023-12:10:39] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6351 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:26] [W] [TRT] Tactic Device request: 2394MB Available: 2151MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:26] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:26] [W] [TRT] Tactic Device request: 2394MB Available: 2151MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:26] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:26] [W] [TRT] Tactic Device request: 2394MB Available: 2151MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:26] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:27] [W] [TRT] Tactic Device request: 2392MB Available: 2151MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:27] [W] [TRT] Tactic Device request: 2392MB Available: 2150MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:27] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:27] [W] [TRT] Tactic Device request: 2392MB Available: 2150MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:27] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:28] [W] [TRT] Tactic Device request: 2391MB Available: 2150MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:28] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2391 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:28] [W] [TRT] Tactic Device request: 2391MB Available: 2150MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:28] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2391 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:28] [W] [TRT] Tactic Device request: 2390MB Available: 2150MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:28] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:29] [W] [TRT] Tactic Device request: 2390MB Available: 2150MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:29] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:43] [W] [TRT] Tactic Device request: 6540MB Available: 2123MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:43] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6540 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:43] [W] [TRT] Tactic Device request: 6540MB Available: 2122MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:43] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6540 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:43] [W] [TRT] Tactic Device request: 6540MB Available: 2122MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:43] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6540 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:45] [W] [TRT] Tactic Device request: 6538MB Available: 2121MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:45] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6538 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:45] [W] [TRT] Tactic Device request: 6538MB Available: 2122MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:45] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6538 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:47] [W] [TRT] Tactic Device request: 2191MB Available: 2122MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2191 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:48] [W] [TRT] Tactic Device request: 2191MB Available: 2123MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:48] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2191 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:49] [W] [TRT] Tactic Device request: 2191MB Available: 2122MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:49] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2191 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:52] [W] [TRT] Tactic Device request: 2190MB Available: 2123MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:53] [W] [TRT] Tactic Device request: 2190MB Available: 2123MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:53] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:56] [W] [TRT] Tactic Device request: 2190MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:57] [W] [TRT] Tactic Device request: 2190MB Available: 2123MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:57] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:19:58] [W] [TRT] Tactic Device request: 2190MB Available: 2122MB. Device memory is insufficient to use tactic. +[12/28/2023-12:19:58] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:20:02] [W] [TRT] Tactic Device request: 2190MB Available: 2125MB. Device memory is insufficient to use tactic. +[12/28/2023-12:20:02] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:20:02] [W] [TRT] Tactic Device request: 2190MB Available: 2122MB. Device memory is insufficient to use tactic. +[12/28/2023-12:20:02] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:24:51] [W] [TRT] Tactic Device request: 2457MB Available: 2244MB. Device memory is insufficient to use tactic. +[12/28/2023-12:24:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:24:52] [W] [TRT] Tactic Device request: 2457MB Available: 2244MB. Device memory is insufficient to use tactic. +[12/28/2023-12:24:52] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:24:52] [W] [TRT] Tactic Device request: 2457MB Available: 2244MB. Device memory is insufficient to use tactic. +[12/28/2023-12:24:52] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:24:53] [W] [TRT] Tactic Device request: 2456MB Available: 2245MB. Device memory is insufficient to use tactic. +[12/28/2023-12:24:53] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:24:53] [W] [TRT] Tactic Device request: 2456MB Available: 2245MB. Device memory is insufficient to use tactic. +[12/28/2023-12:24:53] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:25:14] [W] [TRT] Tactic Device request: 3587MB Available: 2175MB. Device memory is insufficient to use tactic. +[12/28/2023-12:25:14] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:25:14] [W] [TRT] Tactic Device request: 3587MB Available: 2176MB. Device memory is insufficient to use tactic. +[12/28/2023-12:25:14] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:25:14] [W] [TRT] Tactic Device request: 3587MB Available: 2176MB. Device memory is insufficient to use tactic. +[12/28/2023-12:25:15] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:25:16] [W] [TRT] Tactic Device request: 3585MB Available: 2176MB. Device memory is insufficient to use tactic. +[12/28/2023-12:25:16] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3585 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:25:16] [W] [TRT] Tactic Device request: 3585MB Available: 2176MB. Device memory is insufficient to use tactic. +[12/28/2023-12:25:16] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3585 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:32:54] [W] [TRT] Tactic Device request: 3556MB Available: 2050MB. Device memory is insufficient to use tactic. +[12/28/2023-12:32:54] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:32:54] [W] [TRT] Tactic Device request: 3556MB Available: 2051MB. Device memory is insufficient to use tactic. +[12/28/2023-12:32:54] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:32:54] [W] [TRT] Tactic Device request: 3556MB Available: 2049MB. Device memory is insufficient to use tactic. +[12/28/2023-12:32:54] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:32:55] [W] [TRT] Tactic Device request: 3551MB Available: 2050MB. Device memory is insufficient to use tactic. +[12/28/2023-12:32:55] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3551 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:32:55] [W] [TRT] Tactic Device request: 3551MB Available: 2050MB. Device memory is insufficient to use tactic. +[12/28/2023-12:32:55] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3551 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:32:56] [W] [TRT] Tactic Device request: 3140MB Available: 2051MB. Device memory is insufficient to use tactic. +[12/28/2023-12:32:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:32:56] [W] [TRT] Tactic Device request: 3140MB Available: 2050MB. Device memory is insufficient to use tactic. +[12/28/2023-12:32:56] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:32:56] [W] [TRT] Tactic Device request: 3140MB Available: 2050MB. Device memory is insufficient to use tactic. +[12/28/2023-12:32:56] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:32:57] [W] [TRT] Tactic Device request: 3136MB Available: 2051MB. Device memory is insufficient to use tactic. +[12/28/2023-12:32:57] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3136 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:32:57] [W] [TRT] Tactic Device request: 3136MB Available: 2051MB. Device memory is insufficient to use tactic. +[12/28/2023-12:32:57] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3136 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:09] [W] [TRT] Tactic Device request: 3161MB Available: 2255MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3161 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:09] [W] [TRT] Tactic Device request: 3161MB Available: 2255MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:09] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3161 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:09] [W] [TRT] Tactic Device request: 3161MB Available: 2255MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:09] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3161 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:10] [W] [TRT] Tactic Device request: 3156MB Available: 2253MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3156 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:10] [W] [TRT] Tactic Device request: 3156MB Available: 2254MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:10] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3156 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:14] [W] [TRT] Tactic Device request: 4189MB Available: 2253MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:14] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4189 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:15] [W] [TRT] Tactic Device request: 4189MB Available: 2253MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:15] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4189 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:15] [W] [TRT] Tactic Device request: 4189MB Available: 2254MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:15] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4189 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:19] [W] [TRT] Tactic Device request: 4183MB Available: 2255MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:20] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4183 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:20] [W] [TRT] Tactic Device request: 4183MB Available: 2255MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:20] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4183 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:22] [W] [TRT] Tactic Device request: 4186MB Available: 2255MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:22] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4186 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:22] [W] [TRT] Tactic Device request: 4186MB Available: 2254MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:22] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4186 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:23] [W] [TRT] Tactic Device request: 4186MB Available: 2254MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:23] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4186 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:27] [W] [TRT] Tactic Device request: 4182MB Available: 2254MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4182 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:40:27] [W] [TRT] Tactic Device request: 4182MB Available: 2253MB. Device memory is insufficient to use tactic. +[12/28/2023-12:40:27] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4182 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:15] [W] [TRT] Tactic Device request: 4764MB Available: 2271MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:15] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4764 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:15] [W] [TRT] Tactic Device request: 4764MB Available: 2271MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:15] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4764 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:15] [W] [TRT] Tactic Device request: 4764MB Available: 2269MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:15] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4764 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:17] [W] [TRT] Tactic Device request: 4761MB Available: 2268MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:17] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4761 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:17] [W] [TRT] Tactic Device request: 4761MB Available: 2265MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:17] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4761 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:23] [W] [TRT] Tactic Device request: 4244MB Available: 2235MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:23] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4244 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:24] [W] [TRT] Tactic Device request: 4244MB Available: 1911MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:24] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4244 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:24] [W] [TRT] Tactic Device request: 4244MB Available: 1909MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:25] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4244 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:29] [W] [TRT] Tactic Device request: 4240MB Available: 1905MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4240 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:30] [W] [TRT] Tactic Device request: 4240MB Available: 1896MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:30] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4240 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:33] [W] [TRT] Tactic Device request: 4241MB Available: 1892MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:33] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4241 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:34] [W] [TRT] Tactic Device request: 4241MB Available: 1864MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:34] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4241 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:35] [W] [TRT] Tactic Device request: 4241MB Available: 1860MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:35] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4241 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:39] [W] [TRT] Tactic Device request: 4239MB Available: 1857MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:39] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4239 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:48:40] [W] [TRT] Tactic Device request: 4239MB Available: 1845MB. Device memory is insufficient to use tactic. +[12/28/2023-12:48:40] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4239 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:07] [W] [TRT] Tactic Device request: 3270MB Available: 1684MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:07] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:07] [W] [TRT] Tactic Device request: 3270MB Available: 1684MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:07] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:07] [W] [TRT] Tactic Device request: 3270MB Available: 1684MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:07] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3270 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:08] [W] [TRT] Tactic Device request: 3269MB Available: 1684MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3269 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:09] [W] [TRT] Tactic Device request: 3269MB Available: 1684MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:09] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3269 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:11] [W] [TRT] Tactic Device request: 4377MB Available: 1680MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:11] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4377 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:11] [W] [TRT] Tactic Device request: 2185MB Available: 1680MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:11] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2185 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:11] [W] [TRT] Tactic Device request: 4377MB Available: 1680MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:11] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4377 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:11] [W] [TRT] Tactic Device request: 2185MB Available: 1680MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:11] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 2185 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:12] [W] [TRT] Tactic Device request: 4377MB Available: 1680MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:12] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4377 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:12] [W] [TRT] Tactic Device request: 2185MB Available: 1680MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:12] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 2185 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:17] [W] [TRT] Tactic Device request: 4375MB Available: 1680MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:17] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4375 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:18] [W] [TRT] Tactic Device request: 2183MB Available: 1680MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:18] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2183 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:18] [W] [TRT] Tactic Device request: 4375MB Available: 1680MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:18] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4375 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:18] [W] [TRT] Tactic Device request: 2183MB Available: 1680MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:18] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2183 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:22] [W] [TRT] Tactic Device request: 4376MB Available: 1680MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:22] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4376 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:22] [W] [TRT] Tactic Device request: 2184MB Available: 1680MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:22] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2184 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:22] [W] [TRT] Tactic Device request: 4376MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:22] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4376 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:22] [W] [TRT] Tactic Device request: 2184MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:22] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 2184 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:23] [W] [TRT] Tactic Device request: 4376MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:23] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4376 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:23] [W] [TRT] Tactic Device request: 2184MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:23] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 2184 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:28] [W] [TRT] Tactic Device request: 4375MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:28] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4375 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:28] [W] [TRT] Tactic Device request: 2183MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:28] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2183 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:29] [W] [TRT] Tactic Device request: 4375MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:29] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4375 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:57:29] [W] [TRT] Tactic Device request: 2183MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/28/2023-12:57:29] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2183 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-12:58:16] [W] [TRT] TensorRT encountered issues when converting weights between types and that could affect accuracy. +[12/28/2023-12:58:16] [W] [TRT] If this is not the desired behavior, please modify the weights or retrain with regularization to adjust the magnitude of the weights. +[12/28/2023-12:58:16] [W] [TRT] Check verbose logs for the list of affected weights. +[12/28/2023-12:58:16] [W] [TRT] - 114 weights are affected by this issue: Detected subnormal FP16 values. +[12/28/2023-12:58:16] [W] [TRT] - 40 weights are affected by this issue: Detected values less than smallest positive FP16 subnormal value and converted them to the FP16 minimum subnormalized value. +[12/28/2023-12:58:33] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-12:58:33] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-12:58:33] [W] * GPU compute time is unstable, with coefficient of variance = 5.80641%. +[12/28/2023-12:58:33] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_l_fp32.onnx.fp16.engine.log b/yolo_nas_pose_l_fp32.onnx.fp16.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..f33a9bb9bf5943317f59c5c4f924d0fe13882aac --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.fp16.engine.log @@ -0,0 +1,328 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp32.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp32.onnx.fp16.engine +[12/28/2023-11:52:55] [I] === Model Options === +[12/28/2023-11:52:55] [I] Format: ONNX +[12/28/2023-11:52:55] [I] Model: yolo_nas_pose_l_fp32.onnx +[12/28/2023-11:52:55] [I] Output: +[12/28/2023-11:52:55] [I] === Build Options === +[12/28/2023-11:52:55] [I] Max batch: explicit batch +[12/28/2023-11:52:55] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-11:52:55] [I] minTiming: 1 +[12/28/2023-11:52:55] [I] avgTiming: 8 +[12/28/2023-11:52:55] [I] Precision: FP32+FP16 +[12/28/2023-11:52:55] [I] LayerPrecisions: +[12/28/2023-11:52:55] [I] Calibration: +[12/28/2023-11:52:55] [I] Refit: Disabled +[12/28/2023-11:52:55] [I] Sparsity: Disabled +[12/28/2023-11:52:55] [I] Safe mode: Disabled +[12/28/2023-11:52:55] [I] DirectIO mode: Disabled +[12/28/2023-11:52:55] [I] Restricted mode: Disabled +[12/28/2023-11:52:55] [I] Build only: Disabled +[12/28/2023-11:52:55] [I] Save engine: yolo_nas_pose_l_fp32.onnx.fp16.engine +[12/28/2023-11:52:55] [I] Load engine: +[12/28/2023-11:52:55] [I] Profiling verbosity: 0 +[12/28/2023-11:52:55] [I] Tactic sources: Using default tactic sources +[12/28/2023-11:52:55] [I] timingCacheMode: local +[12/28/2023-11:52:55] [I] timingCacheFile: +[12/28/2023-11:52:55] [I] Heuristic: Disabled +[12/28/2023-11:52:55] [I] Preview Features: Use default preview flags. +[12/28/2023-11:52:55] [I] Input(s)s format: fp32:CHW +[12/28/2023-11:52:55] [I] Output(s)s format: fp32:CHW +[12/28/2023-11:52:55] [I] Input build shapes: model +[12/28/2023-11:52:55] [I] Input calibration shapes: model +[12/28/2023-11:52:55] [I] === System Options === +[12/28/2023-11:52:55] [I] Device: 0 +[12/28/2023-11:52:55] [I] DLACore: +[12/28/2023-11:52:55] [I] Plugins: +[12/28/2023-11:52:55] [I] === Inference Options === +[12/28/2023-11:52:55] [I] Batch: Explicit +[12/28/2023-11:52:55] [I] Input inference shapes: model +[12/28/2023-11:52:55] [I] Iterations: 10 +[12/28/2023-11:52:55] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-11:52:55] [I] Sleep time: 0ms +[12/28/2023-11:52:55] [I] Idle time: 0ms +[12/28/2023-11:52:55] [I] Streams: 1 +[12/28/2023-11:52:55] [I] ExposeDMA: Disabled +[12/28/2023-11:52:55] [I] Data transfers: Enabled +[12/28/2023-11:52:55] [I] Spin-wait: Disabled +[12/28/2023-11:52:55] [I] Multithreading: Disabled +[12/28/2023-11:52:55] [I] CUDA Graph: Disabled +[12/28/2023-11:52:55] [I] Separate profiling: Disabled +[12/28/2023-11:52:55] [I] Time Deserialize: Disabled +[12/28/2023-11:52:55] [I] Time Refit: Disabled +[12/28/2023-11:52:55] [I] NVTX verbosity: 0 +[12/28/2023-11:52:55] [I] Persistent Cache Ratio: 0 +[12/28/2023-11:52:55] [I] Inputs: +[12/28/2023-11:52:55] [I] === Reporting Options === +[12/28/2023-11:52:55] [I] Verbose: Disabled +[12/28/2023-11:52:55] [I] Averages: 100 inferences +[12/28/2023-11:52:55] [I] Percentiles: 90,95,99 +[12/28/2023-11:52:55] [I] Dump refittable layers:Disabled +[12/28/2023-11:52:55] [I] Dump output: Disabled +[12/28/2023-11:52:55] [I] Profile: Disabled +[12/28/2023-11:52:55] [I] Export timing to JSON file: +[12/28/2023-11:52:55] [I] Export output to JSON file: +[12/28/2023-11:52:55] [I] Export profile to JSON file: +[12/28/2023-11:52:55] [I] +[12/28/2023-11:52:55] [I] === Device Information === +[12/28/2023-11:52:55] [I] Selected Device: Orin +[12/28/2023-11:52:55] [I] Compute Capability: 8.7 +[12/28/2023-11:52:55] [I] SMs: 8 +[12/28/2023-11:52:55] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-11:52:55] [I] Device Global Memory: 7471 MiB +[12/28/2023-11:52:55] [I] Shared Memory per SM: 164 KiB +[12/28/2023-11:52:55] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-11:52:55] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-11:52:55] [I] +[12/28/2023-11:52:55] [I] TensorRT version: 8.5.2 +[12/28/2023-11:52:56] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3001 (MiB) +[12/28/2023-11:52:59] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +283, now: CPU 574, GPU 3306 (MiB) +[12/28/2023-11:52:59] [I] Start parsing network model +[12/28/2023-11:53:02] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-11:53:02] [I] [TRT] Input filename: yolo_nas_pose_l_fp32.onnx +[12/28/2023-11:53:02] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-11:53:02] [I] [TRT] Opset version: 17 +[12/28/2023-11:53:02] [I] [TRT] Producer name: pytorch +[12/28/2023-11:53:02] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-11:53:02] [I] [TRT] Domain: +[12/28/2023-11:53:02] [I] [TRT] Model version: 0 +[12/28/2023-11:53:02] [I] [TRT] Doc string: +[12/28/2023-11:53:02] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-11:53:03] [I] Finish parsing network model +[12/28/2023-11:53:04] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-11:53:04] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 455) [Constant] +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 456) [Constant] +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 457) [Constant] +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.2.alpha + (Unnamed Layer* 187) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.3.alpha + (Unnamed Layer* 195) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 216) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 224) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 232) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.3.alpha + (Unnamed Layer* 240) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 271) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 294) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 302) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 310) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 341) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 364) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 372) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.3.alpha + (Unnamed Layer* 380) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 459) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-11:53:04] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-11:53:10] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +444, now: CPU 1350, GPU 4042 (MiB) +[12/28/2023-11:53:11] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +74, now: CPU 1433, GPU 4116 (MiB) +[12/28/2023-11:53:11] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-12:57:56] [I] [TRT] Total Activation Memory: 8060146176 +[12/28/2023-12:57:56] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-12:58:10] [I] [TRT] Total Host Persistent Memory: 376864 +[12/28/2023-12:58:10] [I] [TRT] Total Device Persistent Memory: 61440 +[12/28/2023-12:58:10] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-12:58:10] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 139 MiB, GPU 2131 MiB +[12/28/2023-12:58:10] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 209 steps to complete. +[12/28/2023-12:58:10] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 267.621ms to assign 16 blocks to 209 nodes requiring 160521216 bytes. +[12/28/2023-12:58:10] [I] [TRT] Total Activation Memory: 160521216 +[12/28/2023-12:58:16] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1, GPU +0, now: CPU 1885, GPU 5668 (MiB) +[12/28/2023-12:58:16] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +104, GPU +128, now: CPU 104, GPU 128 (MiB) +[12/28/2023-12:58:17] [I] Engine built in 3922.11 sec. +[12/28/2023-12:58:17] [I] [TRT] Loaded engine size: 105 MiB +[12/28/2023-12:58:18] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1347, GPU 5215 (MiB) +[12/28/2023-12:58:18] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +103, now: CPU 0, GPU 103 (MiB) +[12/28/2023-12:58:18] [I] Engine deserialized in 0.277587 sec. +[12/28/2023-12:58:18] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1348, GPU 5215 (MiB) +[12/28/2023-12:58:18] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +154, now: CPU 0, GPU 257 (MiB) +[12/28/2023-12:58:18] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-12:58:18] [I] Using random values for input onnx::Cast_0 +[12/28/2023-12:58:18] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-12:58:18] [I] Using random values for output graph2_flat_predictions +[12/28/2023-12:58:18] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-12:58:18] [I] Starting inference +[12/28/2023-12:58:33] [I] Warmup completed 2 queries over 200 ms +[12/28/2023-12:58:33] [I] Timing trace has 437 queries over 15.0687 s +[12/28/2023-12:58:33] [I] +[12/28/2023-12:58:33] [I] === Trace details === +[12/28/2023-12:58:33] [I] Trace averages of 100 runs: +[12/28/2023-12:58:33] [I] Average on 100 runs - GPU latency: 34.2122 ms - Host latency: 34.3209 ms (enqueue 34.261 ms) +[12/28/2023-12:58:33] [I] Average on 100 runs - GPU latency: 34.6286 ms - Host latency: 34.7419 ms (enqueue 34.6733 ms) +[12/28/2023-12:58:33] [I] Average on 100 runs - GPU latency: 34.32 ms - Host latency: 34.4264 ms (enqueue 34.3737 ms) +[12/28/2023-12:58:33] [I] Average on 100 runs - GPU latency: 34.1213 ms - Host latency: 34.2258 ms (enqueue 34.1666 ms) +[12/28/2023-12:58:33] [I] +[12/28/2023-12:58:33] [I] === Performance summary === +[12/28/2023-12:58:33] [I] Throughput: 29.0005 qps +[12/28/2023-12:58:33] [I] Latency: min = 32.0405 ms, max = 46.5195 ms, mean = 34.4576 ms, median = 34.021 ms, percentile(90%) = 35.2378 ms, percentile(95%) = 37.8905 ms, percentile(99%) = 44.1553 ms +[12/28/2023-12:58:33] [I] Enqueue Time: min = 32.0081 ms, max = 46.4473 ms, mean = 34.3937 ms, median = 33.9883 ms, percentile(90%) = 35.166 ms, percentile(95%) = 37.6458 ms, percentile(99%) = 44.0703 ms +[12/28/2023-12:58:33] [I] H2D Latency: min = 0.0800781 ms, max = 0.119629 ms, mean = 0.0890405 ms, median = 0.0893555 ms, percentile(90%) = 0.0913086 ms, percentile(95%) = 0.0917969 ms, percentile(99%) = 0.103516 ms +[12/28/2023-12:58:33] [I] GPU Compute Time: min = 31.9365 ms, max = 46.3989 ms, mean = 34.3493 ms, median = 33.9121 ms, percentile(90%) = 35.1299 ms, percentile(95%) = 37.7999 ms, percentile(99%) = 44.0645 ms +[12/28/2023-12:58:33] [I] D2H Latency: min = 0.00292969 ms, max = 0.0566406 ms, mean = 0.0192997 ms, median = 0.0175781 ms, percentile(90%) = 0.0292969 ms, percentile(95%) = 0.03125 ms, percentile(99%) = 0.0371094 ms +[12/28/2023-12:58:33] [I] Total Host Walltime: 15.0687 s +[12/28/2023-12:58:33] [I] Total GPU Compute Time: 15.0106 s +[12/28/2023-12:58:33] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-12:58:33] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp32.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp32.onnx.fp16.engine diff --git a/yolo_nas_pose_l_fp32.onnx.int8.engine b/yolo_nas_pose_l_fp32.onnx.int8.engine new file mode 100644 index 0000000000000000000000000000000000000000..fd35b97c4aae74be5d3b9c8b80e221d89a0212a9 --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.int8.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48058cfe18fb0bfcfbf2822e4ac65f9801a49f738a3d6718eff46f2237aeee33 +size 57073047 diff --git a/yolo_nas_pose_l_fp32.onnx.int8.engine.err b/yolo_nas_pose_l_fp32.onnx.int8.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..c03874673f2028dc1bc43bc3bf90a1dae4a6ee18 --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.int8.engine.err @@ -0,0 +1,256 @@ +[12/28/2023-15:10:20] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-15:10:20] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-15:10:21] [W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool. +[12/28/2023-15:17:13] [W] [TRT] Tactic Device request: 4720MB Available: 2708MB. Device memory is insufficient to use tactic. +[12/28/2023-15:17:13] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4720 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:17:14] [W] [TRT] Tactic Device request: 4720MB Available: 2708MB. Device memory is insufficient to use tactic. +[12/28/2023-15:17:14] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4720 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:17:14] [W] [TRT] Tactic Device request: 4720MB Available: 2707MB. Device memory is insufficient to use tactic. +[12/28/2023-15:17:14] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4720 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:17:21] [W] [TRT] Tactic Device request: 4711MB Available: 2698MB. Device memory is insufficient to use tactic. +[12/28/2023-15:17:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:17:22] [W] [TRT] Tactic Device request: 4711MB Available: 2697MB. Device memory is insufficient to use tactic. +[12/28/2023-15:17:22] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:17:22] [W] [TRT] Tactic Device request: 4711MB Available: 2702MB. Device memory is insufficient to use tactic. +[12/28/2023-15:17:22] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:17:29] [W] [TRT] Tactic Device request: 4711MB Available: 2688MB. Device memory is insufficient to use tactic. +[12/28/2023-15:17:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:17:30] [W] [TRT] Tactic Device request: 4711MB Available: 2689MB. Device memory is insufficient to use tactic. +[12/28/2023-15:17:30] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:17:30] [W] [TRT] Tactic Device request: 4711MB Available: 2689MB. Device memory is insufficient to use tactic. +[12/28/2023-15:17:30] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:21:24] [W] [TRT] Tactic Device request: 4711MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-15:21:24] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:21:24] [W] [TRT] Tactic Device request: 4711MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-15:21:24] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:21:24] [W] [TRT] Tactic Device request: 4711MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-15:21:24] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:21:39] [W] [TRT] Tactic Device request: 6275MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-15:21:39] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:21:39] [W] [TRT] Tactic Device request: 6275MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-15:21:39] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6275 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:21:39] [W] [TRT] Tactic Device request: 6275MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-15:21:39] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:27:20] [W] [TRT] Tactic Device request: 7056MB Available: 2259MB. Device memory is insufficient to use tactic. +[12/28/2023-15:27:20] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:27:20] [W] [TRT] Tactic Device request: 7056MB Available: 2257MB. Device memory is insufficient to use tactic. +[12/28/2023-15:27:20] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:27:20] [W] [TRT] Tactic Device request: 7056MB Available: 2257MB. Device memory is insufficient to use tactic. +[12/28/2023-15:27:20] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:27:36] [W] [TRT] Tactic Device request: 6354MB Available: 2258MB. Device memory is insufficient to use tactic. +[12/28/2023-15:27:36] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:27:36] [W] [TRT] Tactic Device request: 6354MB Available: 2258MB. Device memory is insufficient to use tactic. +[12/28/2023-15:27:36] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6354 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:27:36] [W] [TRT] Tactic Device request: 6354MB Available: 2258MB. Device memory is insufficient to use tactic. +[12/28/2023-15:27:36] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:28] [W] [TRT] Tactic Device request: 2394MB Available: 2120MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:28] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:28] [W] [TRT] Tactic Device request: 2394MB Available: 2120MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:28] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:28] [W] [TRT] Tactic Device request: 2394MB Available: 2120MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:28] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:29] [W] [TRT] Tactic Device request: 2392MB Available: 2120MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:29] [W] [TRT] Tactic Device request: 2392MB Available: 2120MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:29] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:29] [W] [TRT] Tactic Device request: 2392MB Available: 2120MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:29] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:42] [W] [TRT] Tactic Device request: 6540MB Available: 2103MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:42] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6540 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:42] [W] [TRT] Tactic Device request: 6540MB Available: 2103MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:42] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6540 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:42] [W] [TRT] Tactic Device request: 6540MB Available: 2103MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:42] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6540 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:46] [W] [TRT] Tactic Device request: 2191MB Available: 2103MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2191 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:47] [W] [TRT] Tactic Device request: 2191MB Available: 2102MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:47] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2191 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:48] [W] [TRT] Tactic Device request: 2191MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:48] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2191 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:53] [W] [TRT] Tactic Device request: 2190MB Available: 2102MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:53] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:54] [W] [TRT] Tactic Device request: 2190MB Available: 2099MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:54] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:35:55] [W] [TRT] Tactic Device request: 2190MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-15:35:55] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:36:01] [W] [TRT] Tactic Device request: 2190MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-15:36:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:36:02] [W] [TRT] Tactic Device request: 2190MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-15:36:02] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2190 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:36:03] [W] [TRT] Tactic Device request: 2190MB Available: 2099MB. Device memory is insufficient to use tactic. +[12/28/2023-15:36:03] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2190 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:40:03] [W] [TRT] Tactic Device request: 2457MB Available: 2339MB. Device memory is insufficient to use tactic. +[12/28/2023-15:40:03] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:40:03] [W] [TRT] Tactic Device request: 2457MB Available: 2339MB. Device memory is insufficient to use tactic. +[12/28/2023-15:40:03] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:40:23] [W] [TRT] Tactic Device request: 3587MB Available: 2337MB. Device memory is insufficient to use tactic. +[12/28/2023-15:40:23] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:40:23] [W] [TRT] Tactic Device request: 3587MB Available: 2337MB. Device memory is insufficient to use tactic. +[12/28/2023-15:40:23] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:40:23] [W] [TRT] Tactic Device request: 3587MB Available: 2337MB. Device memory is insufficient to use tactic. +[12/28/2023-15:40:23] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:47:03] [W] [TRT] Tactic Device request: 3556MB Available: 2223MB. Device memory is insufficient to use tactic. +[12/28/2023-15:47:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:47:03] [W] [TRT] Tactic Device request: 3556MB Available: 2223MB. Device memory is insufficient to use tactic. +[12/28/2023-15:47:03] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:47:03] [W] [TRT] Tactic Device request: 3556MB Available: 2223MB. Device memory is insufficient to use tactic. +[12/28/2023-15:47:03] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:47:05] [W] [TRT] Tactic Device request: 3140MB Available: 2192MB. Device memory is insufficient to use tactic. +[12/28/2023-15:47:05] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:47:05] [W] [TRT] Tactic Device request: 3140MB Available: 2191MB. Device memory is insufficient to use tactic. +[12/28/2023-15:47:05] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:47:06] [W] [TRT] Tactic Device request: 3140MB Available: 2191MB. Device memory is insufficient to use tactic. +[12/28/2023-15:47:06] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:53:21] [W] [TRT] Tactic Device request: 3161MB Available: 2119MB. Device memory is insufficient to use tactic. +[12/28/2023-15:53:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3161 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:53:21] [W] [TRT] Tactic Device request: 3161MB Available: 2119MB. Device memory is insufficient to use tactic. +[12/28/2023-15:53:21] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3161 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:53:21] [W] [TRT] Tactic Device request: 3161MB Available: 2119MB. Device memory is insufficient to use tactic. +[12/28/2023-15:53:21] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3161 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:53:26] [W] [TRT] Tactic Device request: 4189MB Available: 2117MB. Device memory is insufficient to use tactic. +[12/28/2023-15:53:26] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4189 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:53:27] [W] [TRT] Tactic Device request: 4189MB Available: 2101MB. Device memory is insufficient to use tactic. +[12/28/2023-15:53:27] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4189 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:53:27] [W] [TRT] Tactic Device request: 4189MB Available: 2101MB. Device memory is insufficient to use tactic. +[12/28/2023-15:53:27] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4189 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:53:33] [W] [TRT] Tactic Device request: 4186MB Available: 2099MB. Device memory is insufficient to use tactic. +[12/28/2023-15:53:33] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4186 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:53:33] [W] [TRT] Tactic Device request: 4186MB Available: 2099MB. Device memory is insufficient to use tactic. +[12/28/2023-15:53:33] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4186 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-15:53:33] [W] [TRT] Tactic Device request: 4186MB Available: 2099MB. Device memory is insufficient to use tactic. +[12/28/2023-15:53:34] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4186 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:00:19] [W] [TRT] Tactic Device request: 4764MB Available: 2172MB. Device memory is insufficient to use tactic. +[12/28/2023-16:00:19] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4764 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:00:19] [W] [TRT] Tactic Device request: 4764MB Available: 2170MB. Device memory is insufficient to use tactic. +[12/28/2023-16:00:19] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4764 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:00:19] [W] [TRT] Tactic Device request: 4764MB Available: 2170MB. Device memory is insufficient to use tactic. +[12/28/2023-16:00:19] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4764 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:00:26] [W] [TRT] Tactic Device request: 4244MB Available: 2030MB. Device memory is insufficient to use tactic. +[12/28/2023-16:00:26] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4244 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:00:27] [W] [TRT] Tactic Device request: 4244MB Available: 1979MB. Device memory is insufficient to use tactic. +[12/28/2023-16:00:27] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4244 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:00:28] [W] [TRT] Tactic Device request: 4244MB Available: 1979MB. Device memory is insufficient to use tactic. +[12/28/2023-16:00:28] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4244 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:00:35] [W] [TRT] Tactic Device request: 4241MB Available: 1980MB. Device memory is insufficient to use tactic. +[12/28/2023-16:00:35] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4241 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:00:36] [W] [TRT] Tactic Device request: 4241MB Available: 1980MB. Device memory is insufficient to use tactic. +[12/28/2023-16:00:36] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4241 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:00:37] [W] [TRT] Tactic Device request: 4241MB Available: 1979MB. Device memory is insufficient to use tactic. +[12/28/2023-16:00:37] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4241 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:17] [W] [TRT] Tactic Device request: 3270MB Available: 1808MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:17] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:17] [W] [TRT] Tactic Device request: 3270MB Available: 1808MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:17] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:17] [W] [TRT] Tactic Device request: 3270MB Available: 1808MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:17] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3270 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:21] [W] [TRT] Tactic Device request: 4377MB Available: 1804MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4377 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:21] [W] [TRT] Tactic Device request: 2185MB Available: 1804MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:21] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2185 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:21] [W] [TRT] Tactic Device request: 4377MB Available: 1804MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:21] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4377 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:21] [W] [TRT] Tactic Device request: 2185MB Available: 1804MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:21] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 2185 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:22] [W] [TRT] Tactic Device request: 4377MB Available: 1803MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:22] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4377 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:22] [W] [TRT] Tactic Device request: 2185MB Available: 1803MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:22] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 2185 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:31] [W] [TRT] Tactic Device request: 4376MB Available: 1803MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4376 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:31] [W] [TRT] Tactic Device request: 2184MB Available: 1803MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:31] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 2184 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:31] [W] [TRT] Tactic Device request: 4376MB Available: 1803MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:31] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4376 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:31] [W] [TRT] Tactic Device request: 2184MB Available: 1803MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:31] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 2184 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:32] [W] [TRT] Tactic Device request: 4376MB Available: 1803MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:32] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4376 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:08:32] [W] [TRT] Tactic Device request: 2184MB Available: 1803MB. Device memory is insufficient to use tactic. +[12/28/2023-16:08:32] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 2184 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-16:09:30] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-16:09:30] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-16:09:30] [W] * GPU compute time is unstable, with coefficient of variance = 6.6527%. +[12/28/2023-16:09:30] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_l_fp32.onnx.int8.engine.log b/yolo_nas_pose_l_fp32.onnx.int8.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..bca2700513e0bc3c5edb8ac72d00c779203492bc --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.int8.engine.log @@ -0,0 +1,332 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp32.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp32.onnx.int8.engine +[12/28/2023-15:10:07] [I] === Model Options === +[12/28/2023-15:10:07] [I] Format: ONNX +[12/28/2023-15:10:07] [I] Model: yolo_nas_pose_l_fp32.onnx +[12/28/2023-15:10:07] [I] Output: +[12/28/2023-15:10:07] [I] === Build Options === +[12/28/2023-15:10:07] [I] Max batch: explicit batch +[12/28/2023-15:10:07] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-15:10:07] [I] minTiming: 1 +[12/28/2023-15:10:07] [I] avgTiming: 8 +[12/28/2023-15:10:07] [I] Precision: FP32+INT8 +[12/28/2023-15:10:07] [I] LayerPrecisions: +[12/28/2023-15:10:07] [I] Calibration: Dynamic +[12/28/2023-15:10:07] [I] Refit: Disabled +[12/28/2023-15:10:07] [I] Sparsity: Disabled +[12/28/2023-15:10:07] [I] Safe mode: Disabled +[12/28/2023-15:10:07] [I] DirectIO mode: Disabled +[12/28/2023-15:10:07] [I] Restricted mode: Disabled +[12/28/2023-15:10:07] [I] Build only: Disabled +[12/28/2023-15:10:07] [I] Save engine: yolo_nas_pose_l_fp32.onnx.int8.engine +[12/28/2023-15:10:07] [I] Load engine: +[12/28/2023-15:10:07] [I] Profiling verbosity: 0 +[12/28/2023-15:10:07] [I] Tactic sources: Using default tactic sources +[12/28/2023-15:10:07] [I] timingCacheMode: local +[12/28/2023-15:10:07] [I] timingCacheFile: +[12/28/2023-15:10:07] [I] Heuristic: Disabled +[12/28/2023-15:10:07] [I] Preview Features: Use default preview flags. +[12/28/2023-15:10:07] [I] Input(s)s format: fp32:CHW +[12/28/2023-15:10:07] [I] Output(s)s format: fp32:CHW +[12/28/2023-15:10:07] [I] Input build shapes: model +[12/28/2023-15:10:07] [I] Input calibration shapes: model +[12/28/2023-15:10:07] [I] === System Options === +[12/28/2023-15:10:07] [I] Device: 0 +[12/28/2023-15:10:07] [I] DLACore: +[12/28/2023-15:10:07] [I] Plugins: +[12/28/2023-15:10:07] [I] === Inference Options === +[12/28/2023-15:10:07] [I] Batch: Explicit +[12/28/2023-15:10:07] [I] Input inference shapes: model +[12/28/2023-15:10:07] [I] Iterations: 10 +[12/28/2023-15:10:07] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-15:10:07] [I] Sleep time: 0ms +[12/28/2023-15:10:07] [I] Idle time: 0ms +[12/28/2023-15:10:07] [I] Streams: 1 +[12/28/2023-15:10:07] [I] ExposeDMA: Disabled +[12/28/2023-15:10:07] [I] Data transfers: Enabled +[12/28/2023-15:10:07] [I] Spin-wait: Disabled +[12/28/2023-15:10:07] [I] Multithreading: Disabled +[12/28/2023-15:10:07] [I] CUDA Graph: Disabled +[12/28/2023-15:10:07] [I] Separate profiling: Disabled +[12/28/2023-15:10:07] [I] Time Deserialize: Disabled +[12/28/2023-15:10:07] [I] Time Refit: Disabled +[12/28/2023-15:10:07] [I] NVTX verbosity: 0 +[12/28/2023-15:10:07] [I] Persistent Cache Ratio: 0 +[12/28/2023-15:10:07] [I] Inputs: +[12/28/2023-15:10:07] [I] === Reporting Options === +[12/28/2023-15:10:07] [I] Verbose: Disabled +[12/28/2023-15:10:07] [I] Averages: 100 inferences +[12/28/2023-15:10:07] [I] Percentiles: 90,95,99 +[12/28/2023-15:10:07] [I] Dump refittable layers:Disabled +[12/28/2023-15:10:07] [I] Dump output: Disabled +[12/28/2023-15:10:07] [I] Profile: Disabled +[12/28/2023-15:10:07] [I] Export timing to JSON file: +[12/28/2023-15:10:07] [I] Export output to JSON file: +[12/28/2023-15:10:07] [I] Export profile to JSON file: +[12/28/2023-15:10:07] [I] +[12/28/2023-15:10:07] [I] === Device Information === +[12/28/2023-15:10:07] [I] Selected Device: Orin +[12/28/2023-15:10:07] [I] Compute Capability: 8.7 +[12/28/2023-15:10:07] [I] SMs: 8 +[12/28/2023-15:10:07] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-15:10:07] [I] Device Global Memory: 7471 MiB +[12/28/2023-15:10:07] [I] Shared Memory per SM: 164 KiB +[12/28/2023-15:10:07] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-15:10:07] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-15:10:07] [I] +[12/28/2023-15:10:07] [I] TensorRT version: 8.5.2 +[12/28/2023-15:10:12] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3019 (MiB) +[12/28/2023-15:10:17] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +286, now: CPU 574, GPU 3326 (MiB) +[12/28/2023-15:10:17] [I] Start parsing network model +[12/28/2023-15:10:20] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-15:10:20] [I] [TRT] Input filename: yolo_nas_pose_l_fp32.onnx +[12/28/2023-15:10:20] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-15:10:20] [I] [TRT] Opset version: 17 +[12/28/2023-15:10:20] [I] [TRT] Producer name: pytorch +[12/28/2023-15:10:20] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-15:10:20] [I] [TRT] Domain: +[12/28/2023-15:10:20] [I] [TRT] Model version: 0 +[12/28/2023-15:10:20] [I] [TRT] Doc string: +[12/28/2023-15:10:20] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-15:10:21] [I] Finish parsing network model +[12/28/2023-15:10:21] [I] FP32 and INT8 precisions have been specified - more performance might be enabled by additionally specifying --fp16 or --best +[12/28/2023-15:10:21] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-15:10:21] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 455) [Constant] +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 456) [Constant] +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 457) [Constant] +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.2.alpha + (Unnamed Layer* 187) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.3.alpha + (Unnamed Layer* 195) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 216) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 224) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 232) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.3.alpha + (Unnamed Layer* 240) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 271) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 294) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 302) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 310) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 341) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 364) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 372) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.3.alpha + (Unnamed Layer* 380) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 459) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-15:10:21] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-15:10:35] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +335, now: CPU 1351, GPU 3954 (MiB) +[12/28/2023-15:10:37] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +63, now: CPU 1433, GPU 4017 (MiB) +[12/28/2023-15:10:37] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-16:08:54] [I] [TRT] Total Activation Memory: 7959592448 +[12/28/2023-16:08:54] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-16:09:08] [I] [TRT] Total Host Persistent Memory: 331808 +[12/28/2023-16:09:08] [I] [TRT] Total Device Persistent Memory: 38912 +[12/28/2023-16:09:08] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-16:09:08] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 88 MiB, GPU 2461 MiB +[12/28/2023-16:09:08] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 160 steps to complete. +[12/28/2023-16:09:08] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 55.0722ms to assign 13 blocks to 160 nodes requiring 147361280 bytes. +[12/28/2023-16:09:08] [I] [TRT] Total Activation Memory: 147361280 +[12/28/2023-16:09:13] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -1, now: CPU 1835, GPU 5360 (MiB) +[12/28/2023-16:09:13] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +52, GPU +64, now: CPU 52, GPU 64 (MiB) +[12/28/2023-16:09:14] [I] Engine built in 3546.25 sec. +[12/28/2023-16:09:14] [I] [TRT] Loaded engine size: 54 MiB +[12/28/2023-16:09:15] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1299, GPU 5007 (MiB) +[12/28/2023-16:09:15] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +52, now: CPU 0, GPU 52 (MiB) +[12/28/2023-16:09:15] [I] Engine deserialized in 0.128069 sec. +[12/28/2023-16:09:15] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1, GPU +0, now: CPU 1300, GPU 5007 (MiB) +[12/28/2023-16:09:15] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +140, now: CPU 0, GPU 192 (MiB) +[12/28/2023-16:09:15] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-16:09:15] [I] Using random values for input onnx::Cast_0 +[12/28/2023-16:09:15] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-16:09:15] [I] Using random values for output graph2_flat_predictions +[12/28/2023-16:09:15] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-16:09:15] [I] Starting inference +[12/28/2023-16:09:30] [I] Warmup completed 3 queries over 200 ms +[12/28/2023-16:09:30] [I] Timing trace has 720 queries over 15.0291 s +[12/28/2023-16:09:30] [I] +[12/28/2023-16:09:30] [I] === Trace details === +[12/28/2023-16:09:30] [I] Trace averages of 100 runs: +[12/28/2023-16:09:30] [I] Average on 100 runs - GPU latency: 20.9331 ms - Host latency: 21.0491 ms (enqueue 20.9849 ms) +[12/28/2023-16:09:30] [I] Average on 100 runs - GPU latency: 20.3949 ms - Host latency: 20.5041 ms (enqueue 20.4581 ms) +[12/28/2023-16:09:30] [I] Average on 100 runs - GPU latency: 20.9551 ms - Host latency: 21.0733 ms (enqueue 21.0198 ms) +[12/28/2023-16:09:30] [I] Average on 100 runs - GPU latency: 20.8394 ms - Host latency: 20.9537 ms (enqueue 20.8945 ms) +[12/28/2023-16:09:30] [I] Average on 100 runs - GPU latency: 20.4666 ms - Host latency: 20.5773 ms (enqueue 20.5432 ms) +[12/28/2023-16:09:30] [I] Average on 100 runs - GPU latency: 20.689 ms - Host latency: 20.8023 ms (enqueue 20.7502 ms) +[12/28/2023-16:09:30] [I] Average on 100 runs - GPU latency: 20.7472 ms - Host latency: 20.8614 ms (enqueue 20.8134 ms) +[12/28/2023-16:09:30] [I] +[12/28/2023-16:09:30] [I] === Performance summary === +[12/28/2023-16:09:30] [I] Throughput: 47.9071 qps +[12/28/2023-16:09:30] [I] Latency: min = 19.4119 ms, max = 30.8398 ms, mean = 20.8421 ms, median = 20.7168 ms, percentile(90%) = 21.5566 ms, percentile(95%) = 22.2598 ms, percentile(99%) = 28.5723 ms +[12/28/2023-16:09:30] [I] Enqueue Time: min = 19.3838 ms, max = 30.7852 ms, mean = 20.7908 ms, median = 20.6725 ms, percentile(90%) = 21.4995 ms, percentile(95%) = 22.1094 ms, percentile(99%) = 28.5068 ms +[12/28/2023-16:09:30] [I] H2D Latency: min = 0.0800781 ms, max = 0.133301 ms, mean = 0.0950138 ms, median = 0.0957031 ms, percentile(90%) = 0.0981445 ms, percentile(95%) = 0.0986328 ms, percentile(99%) = 0.0996094 ms +[12/28/2023-16:09:30] [I] GPU Compute Time: min = 19.304 ms, max = 30.7158 ms, mean = 20.7286 ms, median = 20.5989 ms, percentile(90%) = 21.4419 ms, percentile(95%) = 22.1621 ms, percentile(99%) = 28.4521 ms +[12/28/2023-16:09:30] [I] D2H Latency: min = 0.00292969 ms, max = 0.0688477 ms, mean = 0.0184459 ms, median = 0.0166016 ms, percentile(90%) = 0.0273438 ms, percentile(95%) = 0.0288086 ms, percentile(99%) = 0.0444336 ms +[12/28/2023-16:09:30] [I] Total Host Walltime: 15.0291 s +[12/28/2023-16:09:30] [I] Total GPU Compute Time: 14.9246 s +[12/28/2023-16:09:30] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-16:09:30] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_fp32.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_fp32.onnx.int8.engine diff --git a/yolo_nas_pose_l_fp32.onnx.usage.txt b/yolo_nas_pose_l_fp32.onnx.usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..e3e9cd59106feed005d0fd166585e65c3737d315 --- /dev/null +++ b/yolo_nas_pose_l_fp32.onnx.usage.txt @@ -0,0 +1,58 @@ + +Model exported successfully to yolo_nas_pose_l_fp32.onnx +Model expects input image of shape [1, 3, 640, 640] +Input image dtype is torch.uint8 + +Exported model already contains preprocessing (normalization) step, so you don't need to do it manually. +Preprocessing steps to be applied to input image are: +Sequential( + (0): CastTensorTo(dtype=torch.float32) + (1): ChannelSelect(channels_indexes=tensor([2, 1, 0])) + (2): ApplyMeanStd(mean=[0.], scale=[255.]) +) + + +Exported model contains postprocessing (NMS) step with the following parameters: + num_pre_nms_predictions=1000 + max_predictions_per_image=10 + nms_threshold=0.5 + confidence_threshold=0.15 + output_predictions_format=flat + + +Exported model is in ONNX format and can be used with ONNXRuntime +To run inference with ONNXRuntime, please use the following code snippet: + + import onnxruntime + import numpy as np + session = onnxruntime.InferenceSession("yolo_nas_pose_l_fp32.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + example_input_image = np.zeros((1, 3, 640, 640)).astype(np.uint8) + predictions = session.run(outputs, {inputs[0]: example_input_image}) + +Exported model can also be used with TensorRT +To run inference with TensorRT, please see TensorRT deployment documentation +You can benchmark the model using the following code snippet: + + trtexec --onnx=yolo_nas_pose_l_fp32.onnx --fp16 --avgRuns=100 --duration=15 + + +Exported model has predictions in flat format: + +# flat_predictions is a 2D array of [N,K] shape +# Each row represents (image_index, x_min, y_min, x_max, y_max, confidence, joints...) +# Please note all values are floats, so you have to convert them to integers if needed + +[flat_predictions] = predictions +pred_bboxes = flat_predictions[:, 1:5] +pred_scores = flat_predictions[:, 5] +pred_joints = flat_predictions[:, 6:].reshape((len(pred_bboxes), -1, 3)) +for i in range(len(pred_bboxes)): + confidence = pred_scores[i] + x_min, y_min, x_max, y_max = pred_bboxes[i] + print(f"Detected pose with confidence={{confidence}}, x_min={{x_min}}, y_min={{y_min}}, x_max={{x_max}}, y_max={{y_max}}") + for joint_index, (x, y, confidence) in enumerate(pred_joints[i]):") + print(f"Joint {{joint_index}} has coordinates x={{x}}, y={{y}}, confidence={{confidence}}") + diff --git a/yolo_nas_pose_l_int8.onnx b/yolo_nas_pose_l_int8.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1c82227b2449757639ce61d25de6c9bb0c6ff383 --- /dev/null +++ b/yolo_nas_pose_l_int8.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3acfcfd2f63c649f1a6695ff7c3a77dbfb67f654592ddf1abb8f5d32c2efd0cb +size 218735221 diff --git a/yolo_nas_pose_l_int8.onnx.best.engine b/yolo_nas_pose_l_int8.onnx.best.engine new file mode 100644 index 0000000000000000000000000000000000000000..8a0b3987d3438052dab9587c23481da2a903e0f6 --- /dev/null +++ b/yolo_nas_pose_l_int8.onnx.best.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6be26b88c524b32a7fe99eeaa6166ae80bd428e90d7f45b24317115286e1c80 +size 57269617 diff --git a/yolo_nas_pose_l_int8.onnx.best.engine.err b/yolo_nas_pose_l_int8.onnx.best.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..e3ea23faf60786a5665fb2edfee6997784f9ed37 --- /dev/null +++ b/yolo_nas_pose_l_int8.onnx.best.engine.err @@ -0,0 +1,7 @@ +[12/28/2023-19:27:37] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-19:27:37] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-19:27:42] [W] [TRT] Calibrator won't be used in explicit precision mode. Use quantization aware training to generate network with Quantize/Dequantize nodes. +[12/28/2023-20:16:25] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-20:16:25] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-20:16:25] [W] * GPU compute time is unstable, with coefficient of variance = 6.69708%. +[12/28/2023-20:16:25] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_l_int8.onnx.best.engine.log b/yolo_nas_pose_l_int8.onnx.best.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..d28e5fb3e063c5f6e81132874bbbb966f46b70d7 --- /dev/null +++ b/yolo_nas_pose_l_int8.onnx.best.engine.log @@ -0,0 +1,357 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_int8.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_int8.onnx.best.engine +[12/28/2023-19:27:34] [I] === Model Options === +[12/28/2023-19:27:34] [I] Format: ONNX +[12/28/2023-19:27:34] [I] Model: yolo_nas_pose_l_int8.onnx +[12/28/2023-19:27:34] [I] Output: +[12/28/2023-19:27:34] [I] === Build Options === +[12/28/2023-19:27:34] [I] Max batch: explicit batch +[12/28/2023-19:27:34] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-19:27:34] [I] minTiming: 1 +[12/28/2023-19:27:34] [I] avgTiming: 8 +[12/28/2023-19:27:34] [I] Precision: FP32+FP16+INT8 +[12/28/2023-19:27:34] [I] LayerPrecisions: +[12/28/2023-19:27:34] [I] Calibration: Dynamic +[12/28/2023-19:27:34] [I] Refit: Disabled +[12/28/2023-19:27:34] [I] Sparsity: Disabled +[12/28/2023-19:27:34] [I] Safe mode: Disabled +[12/28/2023-19:27:34] [I] DirectIO mode: Disabled +[12/28/2023-19:27:34] [I] Restricted mode: Disabled +[12/28/2023-19:27:34] [I] Build only: Disabled +[12/28/2023-19:27:34] [I] Save engine: yolo_nas_pose_l_int8.onnx.best.engine +[12/28/2023-19:27:34] [I] Load engine: +[12/28/2023-19:27:34] [I] Profiling verbosity: 0 +[12/28/2023-19:27:34] [I] Tactic sources: Using default tactic sources +[12/28/2023-19:27:34] [I] timingCacheMode: local +[12/28/2023-19:27:34] [I] timingCacheFile: +[12/28/2023-19:27:34] [I] Heuristic: Disabled +[12/28/2023-19:27:34] [I] Preview Features: Use default preview flags. +[12/28/2023-19:27:34] [I] Input(s)s format: fp32:CHW +[12/28/2023-19:27:34] [I] Output(s)s format: fp32:CHW +[12/28/2023-19:27:34] [I] Input build shapes: model +[12/28/2023-19:27:34] [I] Input calibration shapes: model +[12/28/2023-19:27:34] [I] === System Options === +[12/28/2023-19:27:34] [I] Device: 0 +[12/28/2023-19:27:34] [I] DLACore: +[12/28/2023-19:27:34] [I] Plugins: +[12/28/2023-19:27:34] [I] === Inference Options === +[12/28/2023-19:27:34] [I] Batch: Explicit +[12/28/2023-19:27:34] [I] Input inference shapes: model +[12/28/2023-19:27:34] [I] Iterations: 10 +[12/28/2023-19:27:34] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-19:27:34] [I] Sleep time: 0ms +[12/28/2023-19:27:34] [I] Idle time: 0ms +[12/28/2023-19:27:34] [I] Streams: 1 +[12/28/2023-19:27:34] [I] ExposeDMA: Disabled +[12/28/2023-19:27:34] [I] Data transfers: Enabled +[12/28/2023-19:27:34] [I] Spin-wait: Disabled +[12/28/2023-19:27:34] [I] Multithreading: Disabled +[12/28/2023-19:27:34] [I] CUDA Graph: Disabled +[12/28/2023-19:27:34] [I] Separate profiling: Disabled +[12/28/2023-19:27:34] [I] Time Deserialize: Disabled +[12/28/2023-19:27:34] [I] Time Refit: Disabled +[12/28/2023-19:27:34] [I] NVTX verbosity: 0 +[12/28/2023-19:27:34] [I] Persistent Cache Ratio: 0 +[12/28/2023-19:27:34] [I] Inputs: +[12/28/2023-19:27:34] [I] === Reporting Options === +[12/28/2023-19:27:34] [I] Verbose: Disabled +[12/28/2023-19:27:34] [I] Averages: 100 inferences +[12/28/2023-19:27:34] [I] Percentiles: 90,95,99 +[12/28/2023-19:27:34] [I] Dump refittable layers:Disabled +[12/28/2023-19:27:34] [I] Dump output: Disabled +[12/28/2023-19:27:34] [I] Profile: Disabled +[12/28/2023-19:27:34] [I] Export timing to JSON file: +[12/28/2023-19:27:34] [I] Export output to JSON file: +[12/28/2023-19:27:34] [I] Export profile to JSON file: +[12/28/2023-19:27:34] [I] +[12/28/2023-19:27:34] [I] === Device Information === +[12/28/2023-19:27:34] [I] Selected Device: Orin +[12/28/2023-19:27:34] [I] Compute Capability: 8.7 +[12/28/2023-19:27:34] [I] SMs: 8 +[12/28/2023-19:27:34] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-19:27:34] [I] Device Global Memory: 7471 MiB +[12/28/2023-19:27:34] [I] Shared Memory per SM: 164 KiB +[12/28/2023-19:27:34] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-19:27:34] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-19:27:34] [I] +[12/28/2023-19:27:34] [I] TensorRT version: 8.5.2 +[12/28/2023-19:27:35] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2833 (MiB) +[12/28/2023-19:27:37] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3138 (MiB) +[12/28/2023-19:27:37] [I] Start parsing network model +[12/28/2023-19:27:37] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-19:27:37] [I] [TRT] Input filename: yolo_nas_pose_l_int8.onnx +[12/28/2023-19:27:37] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-19:27:37] [I] [TRT] Opset version: 17 +[12/28/2023-19:27:37] [I] [TRT] Producer name: pytorch +[12/28/2023-19:27:37] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-19:27:37] [I] [TRT] Domain: +[12/28/2023-19:27:37] [I] [TRT] Model version: 0 +[12/28/2023-19:27:37] [I] [TRT] Doc string: +[12/28/2023-19:27:37] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-19:27:42] [I] Finish parsing network model +[12/28/2023-19:27:47] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-19:27:47] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1417) [Constant] +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1418) [Constant] +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1419) [Constant] +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stem/conv/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stem.conv.rbr_reparam.weight + /model/backbone/stem/conv/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stem/conv/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.downsample.rbr_reparam.weight + /model/backbone/stage1/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/downsample/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv2.conv.weight + /model/backbone/stage1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv1.conv.weight + /model/backbone/stage1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 557) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 573) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv3.conv.weight + /model/backbone/stage1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv3/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_skip2.conv.weight + /model/neck/neck2/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.downsample.rbr_reparam.weight + /model/backbone/stage2/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/downsample/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.downsample.conv.weight + /model/neck/neck2/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/downsample/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv2.conv.weight + /model/backbone/stage2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv1.conv.weight + /model/backbone/stage2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 620) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 636) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 652) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_2 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv3.conv.weight + /model/backbone/stage2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv3/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip2.conv.weight + /model/neck/neck1/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip2/conv/Conv || model.neck.neck2.reduce_skip1.conv.weight + /model/neck/neck2/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.downsample.rbr_reparam.weight + /model/backbone/stage3/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/downsample/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.downsample.conv.weight + /model/neck/neck1/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/downsample/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv2.conv.weight + /model/backbone/stage3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv1.conv.weight + /model/backbone/stage3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 702) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 718) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 734) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 750) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.4.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.4.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 766) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_4 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_2 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv3.conv.weight + /model/backbone/stage3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv3/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip1.conv.weight + /model/neck/neck1/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.downsample.rbr_reparam.weight + /model/backbone/stage4/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/downsample/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv2.conv.weight + /model/backbone/stage4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv1.conv.weight + /model/backbone/stage4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 806) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 822) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv3.conv.weight + /model/backbone/stage4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv3/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv1.conv.weight + /model/backbone/context_module/cv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/m.2/MaxPool_output_0 copy +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv2.conv.weight + /model/backbone/context_module/cv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.conv.conv.weight + /model/neck/neck1/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/conv/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck1.upsample.weight + /model/neck/neck1/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_after_concat.conv.weight + /model/neck/neck1/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_after_concat/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv2.conv.weight + /model/neck/neck1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv1.conv.weight + /model/neck/neck1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 887) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 903) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.2.alpha + (Unnamed Layer* 919) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.3.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.3.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.3.alpha + (Unnamed Layer* 935) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv3.conv.weight + /model/neck/neck1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv3/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.conv.conv.weight + /model/neck/neck2/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/conv/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck2.upsample.weight + /model/neck/neck2/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/Concat_/model/neck/neck2/reduce_skip1/act/Relu_output_0_clone_1 copy +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_after_concat.conv.weight + /model/neck/neck2/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_after_concat/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv2.conv.weight + /model/neck/neck2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv1.conv.weight + /model/neck/neck2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 984) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1000) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 1016) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.3.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.3.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.3.alpha + (Unnamed Layer* 1032) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv3.conv.weight + /model/neck/neck2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv3/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.bbox_stem.seq.conv.weight + /model/heads/head1/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/bbox_stem/seq/conv/Conv || model.heads.head1.pose_stem.seq.conv.weight + /model/heads/head1/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_stem/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.conv.conv.weight + /model/neck/neck3/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/conv/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_convs.0.seq.conv.weight + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head1.cls_convs.0.seq.conv.weight + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.0.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv2.conv.weight + /model/neck/neck3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv1.conv.weight + /model/neck/neck3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.cls_pred.weight + /model/heads/head1/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_pred/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_pred.weight + /model/heads/head1/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_pred/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.1.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_pred.weight + /model/heads/head1/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_pred/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 1097) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1138) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.2.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.2.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 1154) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.3.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.3.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 1170) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv3.conv.weight + /model/neck/neck3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv3/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_stem.seq.conv.weight + /model/heads/head2/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_stem/seq/conv/Conv || model.heads.head2.bbox_stem.seq.conv.weight + /model/heads/head2/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/bbox_stem/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.conv.conv.weight + /model/neck/neck4/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/conv/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_convs.0.seq.conv.weight + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head2.cls_convs.0.seq.conv.weight + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.0.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv2.conv.weight + /model/neck/neck4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv1.conv.weight + /model/neck/neck4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.cls_pred.weight + /model/heads/head2/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_pred/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_pred.weight + /model/heads/head2/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_pred/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.1.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_pred.weight + /model/heads/head2/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_pred/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 1235) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1276) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.2.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.2.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 1292) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.3.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.3.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.3.alpha + (Unnamed Layer* 1308) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv3.conv.weight + /model/neck/neck4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv3/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.bbox_stem.seq.conv.weight + /model/heads/head3/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/bbox_stem/seq/conv/Conv || model.heads.head3.pose_stem.seq.conv.weight + /model/heads/head3/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_stem/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_convs.0.seq.conv.weight + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head3.cls_convs.0.seq.conv.weight + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.0.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.cls_pred.weight + /model/heads/head3/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_pred/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_pred.weight + /model/heads/head3/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_pred/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.1.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.2.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_pred.weight + /model/heads/head3/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_pred/Conv +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] NMS: batched_nms_274 +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 1421) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-19:27:47] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-19:28:00] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +418, now: CPU 1532, GPU 3937 (MiB) +[12/28/2023-19:28:02] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +79, now: CPU 1615, GPU 4016 (MiB) +[12/28/2023-19:28:02] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-20:15:51] [I] [TRT] Total Activation Memory: 7994521088 +[12/28/2023-20:15:51] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-20:16:04] [I] [TRT] Total Host Persistent Memory: 355104 +[12/28/2023-20:16:04] [I] [TRT] Total Device Persistent Memory: 38912 +[12/28/2023-20:16:04] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-20:16:04] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 264 MiB, GPU 454 MiB +[12/28/2023-20:16:04] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 188 steps to complete. +[12/28/2023-20:16:04] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 61.0389ms to assign 14 blocks to 188 nodes requiring 156376576 bytes. +[12/28/2023-20:16:04] [I] [TRT] Total Activation Memory: 156376576 +[12/28/2023-20:16:09] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 2003, GPU 5524 (MiB) +[12/28/2023-20:16:09] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +52, GPU +64, now: CPU 52, GPU 64 (MiB) +[12/28/2023-20:16:09] [I] Engine built in 2915.03 sec. +[12/28/2023-20:16:09] [I] [TRT] Loaded engine size: 54 MiB +[12/28/2023-20:16:10] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1288, GPU 5322 (MiB) +[12/28/2023-20:16:10] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +52, now: CPU 0, GPU 52 (MiB) +[12/28/2023-20:16:10] [I] Engine deserialized in 0.222476 sec. +[12/28/2023-20:16:10] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1289, GPU 5322 (MiB) +[12/28/2023-20:16:10] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +149, now: CPU 0, GPU 201 (MiB) +[12/28/2023-20:16:10] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-20:16:10] [I] Using random values for input onnx::Cast_0 +[12/28/2023-20:16:10] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-20:16:10] [I] Using random values for output graph2_flat_predictions +[12/28/2023-20:16:10] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-20:16:10] [I] Starting inference +[12/28/2023-20:16:25] [I] Warmup completed 7 queries over 200 ms +[12/28/2023-20:16:25] [I] Timing trace has 556 queries over 15.0394 s +[12/28/2023-20:16:25] [I] +[12/28/2023-20:16:25] [I] === Trace details === +[12/28/2023-20:16:25] [I] Trace averages of 100 runs: +[12/28/2023-20:16:25] [I] Average on 100 runs - GPU latency: 27.1478 ms - Host latency: 27.2648 ms (enqueue 27.2126 ms) +[12/28/2023-20:16:25] [I] Average on 100 runs - GPU latency: 26.6955 ms - Host latency: 26.8082 ms (enqueue 26.7744 ms) +[12/28/2023-20:16:25] [I] Average on 100 runs - GPU latency: 27.0859 ms - Host latency: 27.1971 ms (enqueue 27.1502 ms) +[12/28/2023-20:16:25] [I] Average on 100 runs - GPU latency: 26.5486 ms - Host latency: 26.6622 ms (enqueue 26.6226 ms) +[12/28/2023-20:16:25] [I] Average on 100 runs - GPU latency: 26.938 ms - Host latency: 27.049 ms (enqueue 27.01 ms) +[12/28/2023-20:16:25] [I] +[12/28/2023-20:16:25] [I] === Performance summary === +[12/28/2023-20:16:25] [I] Throughput: 36.9695 qps +[12/28/2023-20:16:25] [I] Latency: min = 24.9229 ms, max = 40.8822 ms, mean = 27.0196 ms, median = 26.4901 ms, percentile(90%) = 27.7676 ms, percentile(95%) = 29.9111 ms, percentile(99%) = 36.0352 ms +[12/28/2023-20:16:25] [I] Enqueue Time: min = 24.9536 ms, max = 40.8246 ms, mean = 26.9763 ms, median = 26.46 ms, percentile(90%) = 27.5393 ms, percentile(95%) = 30.0479 ms, percentile(99%) = 36.1467 ms +[12/28/2023-20:16:25] [I] H2D Latency: min = 0.0800781 ms, max = 0.117188 ms, mean = 0.0969461 ms, median = 0.0976562 ms, percentile(90%) = 0.100098 ms, percentile(95%) = 0.100586 ms, percentile(99%) = 0.101562 ms +[12/28/2023-20:16:25] [I] GPU Compute Time: min = 24.8271 ms, max = 40.7577 ms, mean = 26.9066 ms, median = 26.3784 ms, percentile(90%) = 27.6466 ms, percentile(95%) = 29.7852 ms, percentile(99%) = 35.9219 ms +[12/28/2023-20:16:25] [I] D2H Latency: min = 0.00292969 ms, max = 0.079834 ms, mean = 0.0159901 ms, median = 0.0146484 ms, percentile(90%) = 0.0258789 ms, percentile(95%) = 0.0291748 ms, percentile(99%) = 0.036377 ms +[12/28/2023-20:16:25] [I] Total Host Walltime: 15.0394 s +[12/28/2023-20:16:25] [I] Total GPU Compute Time: 14.9601 s +[12/28/2023-20:16:25] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-20:16:25] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_int8.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_int8.onnx.best.engine diff --git a/yolo_nas_pose_l_int8.onnx.engine.err b/yolo_nas_pose_l_int8.onnx.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..cdfeb04d03685e2cd12b7a415f2ded33500cbfb7 --- /dev/null +++ b/yolo_nas_pose_l_int8.onnx.engine.err @@ -0,0 +1,8 @@ +[12/28/2023-19:27:20] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-19:27:20] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-19:27:24] [E] Error[4]: [network.cpp::validate::2922] Error Code 4: Internal Error (Int8 precision has been set for a layer or layer output, but int8 is not configured in the builder) +[12/28/2023-19:27:24] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/28/2023-19:27:24] [E] Engine could not be created from network +[12/28/2023-19:27:24] [E] Building engine failed +[12/28/2023-19:27:24] [E] Failed to create engine from model or file. +[12/28/2023-19:27:24] [E] Engine set up failed diff --git a/yolo_nas_pose_l_int8.onnx.engine.log b/yolo_nas_pose_l_int8.onnx.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..24a4ba7018f2e47d7256b8aad5d18000592b87d8 --- /dev/null +++ b/yolo_nas_pose_l_int8.onnx.engine.log @@ -0,0 +1,91 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_int8.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_int8.onnx.engine +[12/28/2023-19:27:14] [I] === Model Options === +[12/28/2023-19:27:14] [I] Format: ONNX +[12/28/2023-19:27:14] [I] Model: yolo_nas_pose_l_int8.onnx +[12/28/2023-19:27:14] [I] Output: +[12/28/2023-19:27:14] [I] === Build Options === +[12/28/2023-19:27:14] [I] Max batch: explicit batch +[12/28/2023-19:27:14] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-19:27:14] [I] minTiming: 1 +[12/28/2023-19:27:14] [I] avgTiming: 8 +[12/28/2023-19:27:14] [I] Precision: FP32 +[12/28/2023-19:27:14] [I] LayerPrecisions: +[12/28/2023-19:27:14] [I] Calibration: +[12/28/2023-19:27:14] [I] Refit: Disabled +[12/28/2023-19:27:14] [I] Sparsity: Disabled +[12/28/2023-19:27:14] [I] Safe mode: Disabled +[12/28/2023-19:27:14] [I] DirectIO mode: Disabled +[12/28/2023-19:27:14] [I] Restricted mode: Disabled +[12/28/2023-19:27:14] [I] Build only: Disabled +[12/28/2023-19:27:14] [I] Save engine: yolo_nas_pose_l_int8.onnx.engine +[12/28/2023-19:27:14] [I] Load engine: +[12/28/2023-19:27:14] [I] Profiling verbosity: 0 +[12/28/2023-19:27:14] [I] Tactic sources: Using default tactic sources +[12/28/2023-19:27:14] [I] timingCacheMode: local +[12/28/2023-19:27:14] [I] timingCacheFile: +[12/28/2023-19:27:14] [I] Heuristic: Disabled +[12/28/2023-19:27:14] [I] Preview Features: Use default preview flags. +[12/28/2023-19:27:14] [I] Input(s)s format: fp32:CHW +[12/28/2023-19:27:14] [I] Output(s)s format: fp32:CHW +[12/28/2023-19:27:14] [I] Input build shapes: model +[12/28/2023-19:27:14] [I] Input calibration shapes: model +[12/28/2023-19:27:14] [I] === System Options === +[12/28/2023-19:27:14] [I] Device: 0 +[12/28/2023-19:27:14] [I] DLACore: +[12/28/2023-19:27:14] [I] Plugins: +[12/28/2023-19:27:14] [I] === Inference Options === +[12/28/2023-19:27:14] [I] Batch: Explicit +[12/28/2023-19:27:14] [I] Input inference shapes: model +[12/28/2023-19:27:14] [I] Iterations: 10 +[12/28/2023-19:27:14] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-19:27:14] [I] Sleep time: 0ms +[12/28/2023-19:27:14] [I] Idle time: 0ms +[12/28/2023-19:27:14] [I] Streams: 1 +[12/28/2023-19:27:14] [I] ExposeDMA: Disabled +[12/28/2023-19:27:14] [I] Data transfers: Enabled +[12/28/2023-19:27:14] [I] Spin-wait: Disabled +[12/28/2023-19:27:14] [I] Multithreading: Disabled +[12/28/2023-19:27:14] [I] CUDA Graph: Disabled +[12/28/2023-19:27:14] [I] Separate profiling: Disabled +[12/28/2023-19:27:14] [I] Time Deserialize: Disabled +[12/28/2023-19:27:14] [I] Time Refit: Disabled +[12/28/2023-19:27:14] [I] NVTX verbosity: 0 +[12/28/2023-19:27:14] [I] Persistent Cache Ratio: 0 +[12/28/2023-19:27:14] [I] Inputs: +[12/28/2023-19:27:14] [I] === Reporting Options === +[12/28/2023-19:27:14] [I] Verbose: Disabled +[12/28/2023-19:27:14] [I] Averages: 100 inferences +[12/28/2023-19:27:14] [I] Percentiles: 90,95,99 +[12/28/2023-19:27:14] [I] Dump refittable layers:Disabled +[12/28/2023-19:27:14] [I] Dump output: Disabled +[12/28/2023-19:27:14] [I] Profile: Disabled +[12/28/2023-19:27:14] [I] Export timing to JSON file: +[12/28/2023-19:27:14] [I] Export output to JSON file: +[12/28/2023-19:27:14] [I] Export profile to JSON file: +[12/28/2023-19:27:14] [I] +[12/28/2023-19:27:14] [I] === Device Information === +[12/28/2023-19:27:14] [I] Selected Device: Orin +[12/28/2023-19:27:14] [I] Compute Capability: 8.7 +[12/28/2023-19:27:14] [I] SMs: 8 +[12/28/2023-19:27:14] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-19:27:14] [I] Device Global Memory: 7471 MiB +[12/28/2023-19:27:14] [I] Shared Memory per SM: 164 KiB +[12/28/2023-19:27:14] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-19:27:14] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-19:27:14] [I] +[12/28/2023-19:27:14] [I] TensorRT version: 8.5.2 +[12/28/2023-19:27:14] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2835 (MiB) +[12/28/2023-19:27:17] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +286, now: CPU 574, GPU 3142 (MiB) +[12/28/2023-19:27:17] [I] Start parsing network model +[12/28/2023-19:27:20] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-19:27:20] [I] [TRT] Input filename: yolo_nas_pose_l_int8.onnx +[12/28/2023-19:27:20] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-19:27:20] [I] [TRT] Opset version: 17 +[12/28/2023-19:27:20] [I] [TRT] Producer name: pytorch +[12/28/2023-19:27:20] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-19:27:20] [I] [TRT] Domain: +[12/28/2023-19:27:20] [I] [TRT] Model version: 0 +[12/28/2023-19:27:20] [I] [TRT] Doc string: +[12/28/2023-19:27:20] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-19:27:24] [I] Finish parsing network model +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_int8.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_int8.onnx.engine diff --git a/yolo_nas_pose_l_int8.onnx.fp16.engine.err b/yolo_nas_pose_l_int8.onnx.fp16.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..4cab9039ef24f4b29fa5a24a5ef0ba041d34ba07 --- /dev/null +++ b/yolo_nas_pose_l_int8.onnx.fp16.engine.err @@ -0,0 +1,8 @@ +[12/28/2023-19:27:29] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-19:27:29] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-19:27:33] [E] Error[4]: [network.cpp::validate::2922] Error Code 4: Internal Error (Int8 precision has been set for a layer or layer output, but int8 is not configured in the builder) +[12/28/2023-19:27:33] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/28/2023-19:27:33] [E] Engine could not be created from network +[12/28/2023-19:27:33] [E] Building engine failed +[12/28/2023-19:27:33] [E] Failed to create engine from model or file. +[12/28/2023-19:27:33] [E] Engine set up failed diff --git a/yolo_nas_pose_l_int8.onnx.fp16.engine.log b/yolo_nas_pose_l_int8.onnx.fp16.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..d05e22ffbacf2720c8075f5b8b85b23d03bd0e02 --- /dev/null +++ b/yolo_nas_pose_l_int8.onnx.fp16.engine.log @@ -0,0 +1,91 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_int8.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_int8.onnx.fp16.engine +[12/28/2023-19:27:25] [I] === Model Options === +[12/28/2023-19:27:25] [I] Format: ONNX +[12/28/2023-19:27:25] [I] Model: yolo_nas_pose_l_int8.onnx +[12/28/2023-19:27:25] [I] Output: +[12/28/2023-19:27:25] [I] === Build Options === +[12/28/2023-19:27:25] [I] Max batch: explicit batch +[12/28/2023-19:27:25] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-19:27:25] [I] minTiming: 1 +[12/28/2023-19:27:25] [I] avgTiming: 8 +[12/28/2023-19:27:25] [I] Precision: FP32+FP16 +[12/28/2023-19:27:25] [I] LayerPrecisions: +[12/28/2023-19:27:25] [I] Calibration: +[12/28/2023-19:27:25] [I] Refit: Disabled +[12/28/2023-19:27:25] [I] Sparsity: Disabled +[12/28/2023-19:27:25] [I] Safe mode: Disabled +[12/28/2023-19:27:25] [I] DirectIO mode: Disabled +[12/28/2023-19:27:25] [I] Restricted mode: Disabled +[12/28/2023-19:27:25] [I] Build only: Disabled +[12/28/2023-19:27:25] [I] Save engine: yolo_nas_pose_l_int8.onnx.fp16.engine +[12/28/2023-19:27:25] [I] Load engine: +[12/28/2023-19:27:25] [I] Profiling verbosity: 0 +[12/28/2023-19:27:25] [I] Tactic sources: Using default tactic sources +[12/28/2023-19:27:25] [I] timingCacheMode: local +[12/28/2023-19:27:25] [I] timingCacheFile: +[12/28/2023-19:27:25] [I] Heuristic: Disabled +[12/28/2023-19:27:25] [I] Preview Features: Use default preview flags. +[12/28/2023-19:27:25] [I] Input(s)s format: fp32:CHW +[12/28/2023-19:27:25] [I] Output(s)s format: fp32:CHW +[12/28/2023-19:27:25] [I] Input build shapes: model +[12/28/2023-19:27:25] [I] Input calibration shapes: model +[12/28/2023-19:27:25] [I] === System Options === +[12/28/2023-19:27:25] [I] Device: 0 +[12/28/2023-19:27:25] [I] DLACore: +[12/28/2023-19:27:25] [I] Plugins: +[12/28/2023-19:27:25] [I] === Inference Options === +[12/28/2023-19:27:25] [I] Batch: Explicit +[12/28/2023-19:27:25] [I] Input inference shapes: model +[12/28/2023-19:27:25] [I] Iterations: 10 +[12/28/2023-19:27:25] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-19:27:25] [I] Sleep time: 0ms +[12/28/2023-19:27:25] [I] Idle time: 0ms +[12/28/2023-19:27:25] [I] Streams: 1 +[12/28/2023-19:27:25] [I] ExposeDMA: Disabled +[12/28/2023-19:27:25] [I] Data transfers: Enabled +[12/28/2023-19:27:25] [I] Spin-wait: Disabled +[12/28/2023-19:27:25] [I] Multithreading: Disabled +[12/28/2023-19:27:25] [I] CUDA Graph: Disabled +[12/28/2023-19:27:25] [I] Separate profiling: Disabled +[12/28/2023-19:27:25] [I] Time Deserialize: Disabled +[12/28/2023-19:27:25] [I] Time Refit: Disabled +[12/28/2023-19:27:25] [I] NVTX verbosity: 0 +[12/28/2023-19:27:25] [I] Persistent Cache Ratio: 0 +[12/28/2023-19:27:25] [I] Inputs: +[12/28/2023-19:27:25] [I] === Reporting Options === +[12/28/2023-19:27:25] [I] Verbose: Disabled +[12/28/2023-19:27:25] [I] Averages: 100 inferences +[12/28/2023-19:27:25] [I] Percentiles: 90,95,99 +[12/28/2023-19:27:25] [I] Dump refittable layers:Disabled +[12/28/2023-19:27:25] [I] Dump output: Disabled +[12/28/2023-19:27:25] [I] Profile: Disabled +[12/28/2023-19:27:25] [I] Export timing to JSON file: +[12/28/2023-19:27:25] [I] Export output to JSON file: +[12/28/2023-19:27:25] [I] Export profile to JSON file: +[12/28/2023-19:27:25] [I] +[12/28/2023-19:27:25] [I] === Device Information === +[12/28/2023-19:27:25] [I] Selected Device: Orin +[12/28/2023-19:27:25] [I] Compute Capability: 8.7 +[12/28/2023-19:27:25] [I] SMs: 8 +[12/28/2023-19:27:25] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-19:27:25] [I] Device Global Memory: 7471 MiB +[12/28/2023-19:27:25] [I] Shared Memory per SM: 164 KiB +[12/28/2023-19:27:25] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-19:27:25] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-19:27:25] [I] +[12/28/2023-19:27:25] [I] TensorRT version: 8.5.2 +[12/28/2023-19:27:26] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2833 (MiB) +[12/28/2023-19:27:28] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3139 (MiB) +[12/28/2023-19:27:28] [I] Start parsing network model +[12/28/2023-19:27:29] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-19:27:29] [I] [TRT] Input filename: yolo_nas_pose_l_int8.onnx +[12/28/2023-19:27:29] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-19:27:29] [I] [TRT] Opset version: 17 +[12/28/2023-19:27:29] [I] [TRT] Producer name: pytorch +[12/28/2023-19:27:29] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-19:27:29] [I] [TRT] Domain: +[12/28/2023-19:27:29] [I] [TRT] Model version: 0 +[12/28/2023-19:27:29] [I] [TRT] Doc string: +[12/28/2023-19:27:29] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-19:27:33] [I] Finish parsing network model +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_int8.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_int8.onnx.fp16.engine diff --git a/yolo_nas_pose_l_int8.onnx.int8.engine b/yolo_nas_pose_l_int8.onnx.int8.engine new file mode 100644 index 0000000000000000000000000000000000000000..4b5de8313950bc166e6147f3c15ef24bc3ad54c6 --- /dev/null +++ b/yolo_nas_pose_l_int8.onnx.int8.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0437c2287a6331143c0b7181c0ce945e117b29eae0d9755f226919acbe4a8cff +size 57433967 diff --git a/yolo_nas_pose_l_int8.onnx.int8.engine.err b/yolo_nas_pose_l_int8.onnx.int8.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..85d007509eb447ca2c313706e844b026845bf664 --- /dev/null +++ b/yolo_nas_pose_l_int8.onnx.int8.engine.err @@ -0,0 +1,7 @@ +[12/28/2023-20:16:35] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-20:16:35] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-20:16:40] [W] [TRT] Calibrator won't be used in explicit precision mode. Use quantization aware training to generate network with Quantize/Dequantize nodes. +[12/28/2023-20:31:04] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-20:31:04] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-20:31:04] [W] * GPU compute time is unstable, with coefficient of variance = 6.47493%. +[12/28/2023-20:31:04] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_l_int8.onnx.int8.engine.log b/yolo_nas_pose_l_int8.onnx.int8.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..2c13ec4042e6026e83e0915cc37151be6fd02fcf --- /dev/null +++ b/yolo_nas_pose_l_int8.onnx.int8.engine.log @@ -0,0 +1,357 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_int8.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_int8.onnx.int8.engine +[12/28/2023-20:16:29] [I] === Model Options === +[12/28/2023-20:16:29] [I] Format: ONNX +[12/28/2023-20:16:29] [I] Model: yolo_nas_pose_l_int8.onnx +[12/28/2023-20:16:29] [I] Output: +[12/28/2023-20:16:29] [I] === Build Options === +[12/28/2023-20:16:29] [I] Max batch: explicit batch +[12/28/2023-20:16:29] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-20:16:29] [I] minTiming: 1 +[12/28/2023-20:16:29] [I] avgTiming: 8 +[12/28/2023-20:16:29] [I] Precision: FP32+INT8 +[12/28/2023-20:16:29] [I] LayerPrecisions: +[12/28/2023-20:16:29] [I] Calibration: Dynamic +[12/28/2023-20:16:29] [I] Refit: Disabled +[12/28/2023-20:16:29] [I] Sparsity: Disabled +[12/28/2023-20:16:29] [I] Safe mode: Disabled +[12/28/2023-20:16:29] [I] DirectIO mode: Disabled +[12/28/2023-20:16:29] [I] Restricted mode: Disabled +[12/28/2023-20:16:29] [I] Build only: Disabled +[12/28/2023-20:16:29] [I] Save engine: yolo_nas_pose_l_int8.onnx.int8.engine +[12/28/2023-20:16:29] [I] Load engine: +[12/28/2023-20:16:29] [I] Profiling verbosity: 0 +[12/28/2023-20:16:29] [I] Tactic sources: Using default tactic sources +[12/28/2023-20:16:29] [I] timingCacheMode: local +[12/28/2023-20:16:29] [I] timingCacheFile: +[12/28/2023-20:16:29] [I] Heuristic: Disabled +[12/28/2023-20:16:29] [I] Preview Features: Use default preview flags. +[12/28/2023-20:16:29] [I] Input(s)s format: fp32:CHW +[12/28/2023-20:16:29] [I] Output(s)s format: fp32:CHW +[12/28/2023-20:16:29] [I] Input build shapes: model +[12/28/2023-20:16:29] [I] Input calibration shapes: model +[12/28/2023-20:16:29] [I] === System Options === +[12/28/2023-20:16:29] [I] Device: 0 +[12/28/2023-20:16:29] [I] DLACore: +[12/28/2023-20:16:29] [I] Plugins: +[12/28/2023-20:16:29] [I] === Inference Options === +[12/28/2023-20:16:29] [I] Batch: Explicit +[12/28/2023-20:16:29] [I] Input inference shapes: model +[12/28/2023-20:16:29] [I] Iterations: 10 +[12/28/2023-20:16:29] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-20:16:29] [I] Sleep time: 0ms +[12/28/2023-20:16:29] [I] Idle time: 0ms +[12/28/2023-20:16:29] [I] Streams: 1 +[12/28/2023-20:16:29] [I] ExposeDMA: Disabled +[12/28/2023-20:16:29] [I] Data transfers: Enabled +[12/28/2023-20:16:29] [I] Spin-wait: Disabled +[12/28/2023-20:16:29] [I] Multithreading: Disabled +[12/28/2023-20:16:29] [I] CUDA Graph: Disabled +[12/28/2023-20:16:29] [I] Separate profiling: Disabled +[12/28/2023-20:16:29] [I] Time Deserialize: Disabled +[12/28/2023-20:16:29] [I] Time Refit: Disabled +[12/28/2023-20:16:29] [I] NVTX verbosity: 0 +[12/28/2023-20:16:29] [I] Persistent Cache Ratio: 0 +[12/28/2023-20:16:29] [I] Inputs: +[12/28/2023-20:16:29] [I] === Reporting Options === +[12/28/2023-20:16:29] [I] Verbose: Disabled +[12/28/2023-20:16:29] [I] Averages: 100 inferences +[12/28/2023-20:16:29] [I] Percentiles: 90,95,99 +[12/28/2023-20:16:29] [I] Dump refittable layers:Disabled +[12/28/2023-20:16:29] [I] Dump output: Disabled +[12/28/2023-20:16:29] [I] Profile: Disabled +[12/28/2023-20:16:29] [I] Export timing to JSON file: +[12/28/2023-20:16:29] [I] Export output to JSON file: +[12/28/2023-20:16:29] [I] Export profile to JSON file: +[12/28/2023-20:16:29] [I] +[12/28/2023-20:16:29] [I] === Device Information === +[12/28/2023-20:16:29] [I] Selected Device: Orin +[12/28/2023-20:16:29] [I] Compute Capability: 8.7 +[12/28/2023-20:16:29] [I] SMs: 8 +[12/28/2023-20:16:29] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-20:16:29] [I] Device Global Memory: 7471 MiB +[12/28/2023-20:16:29] [I] Shared Memory per SM: 164 KiB +[12/28/2023-20:16:29] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-20:16:29] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-20:16:29] [I] +[12/28/2023-20:16:29] [I] TensorRT version: 8.5.2 +[12/28/2023-20:16:29] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2967 (MiB) +[12/28/2023-20:16:33] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +283, now: CPU 574, GPU 3271 (MiB) +[12/28/2023-20:16:33] [I] Start parsing network model +[12/28/2023-20:16:35] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-20:16:35] [I] [TRT] Input filename: yolo_nas_pose_l_int8.onnx +[12/28/2023-20:16:35] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-20:16:35] [I] [TRT] Opset version: 17 +[12/28/2023-20:16:35] [I] [TRT] Producer name: pytorch +[12/28/2023-20:16:35] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-20:16:35] [I] [TRT] Domain: +[12/28/2023-20:16:35] [I] [TRT] Model version: 0 +[12/28/2023-20:16:35] [I] [TRT] Doc string: +[12/28/2023-20:16:35] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-20:16:40] [I] Finish parsing network model +[12/28/2023-20:16:40] [I] FP32 and INT8 precisions have been specified - more performance might be enabled by additionally specifying --fp16 or --best +[12/28/2023-20:16:45] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-20:16:45] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1417) [Constant] +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1418) [Constant] +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1419) [Constant] +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stem/conv/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stem.conv.rbr_reparam.weight + /model/backbone/stem/conv/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stem/conv/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.downsample.rbr_reparam.weight + /model/backbone/stage1/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/downsample/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv2.conv.weight + /model/backbone/stage1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv1.conv.weight + /model/backbone/stage1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 557) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 573) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv3.conv.weight + /model/backbone/stage1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv3/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_skip2.conv.weight + /model/neck/neck2/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.downsample.rbr_reparam.weight + /model/backbone/stage2/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/downsample/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.downsample.conv.weight + /model/neck/neck2/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/downsample/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv2.conv.weight + /model/backbone/stage2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv1.conv.weight + /model/backbone/stage2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 620) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 636) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 652) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_2 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv3.conv.weight + /model/backbone/stage2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv3/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip2.conv.weight + /model/neck/neck1/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip2/conv/Conv || model.neck.neck2.reduce_skip1.conv.weight + /model/neck/neck2/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.downsample.rbr_reparam.weight + /model/backbone/stage3/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/downsample/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.downsample.conv.weight + /model/neck/neck1/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/downsample/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv2.conv.weight + /model/backbone/stage3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv1.conv.weight + /model/backbone/stage3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 702) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 718) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 734) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 750) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.4.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.4.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 766) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_4 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_2 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv3.conv.weight + /model/backbone/stage3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv3/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip1.conv.weight + /model/neck/neck1/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.downsample.rbr_reparam.weight + /model/backbone/stage4/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/downsample/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv2.conv.weight + /model/backbone/stage4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv1.conv.weight + /model/backbone/stage4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 806) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 822) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv3.conv.weight + /model/backbone/stage4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv3/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv1.conv.weight + /model/backbone/context_module/cv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/m.2/MaxPool_output_0 copy +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv2.conv.weight + /model/backbone/context_module/cv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.conv.conv.weight + /model/neck/neck1/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/conv/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck1.upsample.weight + /model/neck/neck1/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_after_concat.conv.weight + /model/neck/neck1/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_after_concat/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv2.conv.weight + /model/neck/neck1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv1.conv.weight + /model/neck/neck1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 887) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 903) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.2.alpha + (Unnamed Layer* 919) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.3.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.3.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.3.alpha + (Unnamed Layer* 935) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv3.conv.weight + /model/neck/neck1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv3/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.conv.conv.weight + /model/neck/neck2/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/conv/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck2.upsample.weight + /model/neck/neck2/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/Concat_/model/neck/neck2/reduce_skip1/act/Relu_output_0_clone_1 copy +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_after_concat.conv.weight + /model/neck/neck2/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_after_concat/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv2.conv.weight + /model/neck/neck2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv1.conv.weight + /model/neck/neck2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 984) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1000) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 1016) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.3.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.3.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.3.alpha + (Unnamed Layer* 1032) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv3.conv.weight + /model/neck/neck2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv3/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.bbox_stem.seq.conv.weight + /model/heads/head1/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/bbox_stem/seq/conv/Conv || model.heads.head1.pose_stem.seq.conv.weight + /model/heads/head1/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_stem/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.conv.conv.weight + /model/neck/neck3/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/conv/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_convs.0.seq.conv.weight + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head1.cls_convs.0.seq.conv.weight + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.0.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv2.conv.weight + /model/neck/neck3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv1.conv.weight + /model/neck/neck3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.cls_pred.weight + /model/heads/head1/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_pred/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_pred.weight + /model/heads/head1/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_pred/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.1.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_pred.weight + /model/heads/head1/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_pred/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 1097) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1138) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.2.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.2.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 1154) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.3.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.3.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 1170) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv3.conv.weight + /model/neck/neck3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv3/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_stem.seq.conv.weight + /model/heads/head2/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_stem/seq/conv/Conv || model.heads.head2.bbox_stem.seq.conv.weight + /model/heads/head2/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/bbox_stem/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.conv.conv.weight + /model/neck/neck4/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/conv/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_convs.0.seq.conv.weight + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head2.cls_convs.0.seq.conv.weight + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.0.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv2.conv.weight + /model/neck/neck4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv1.conv.weight + /model/neck/neck4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.cls_pred.weight + /model/heads/head2/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_pred/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_pred.weight + /model/heads/head2/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_pred/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.1.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_pred.weight + /model/heads/head2/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_pred/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 1235) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1276) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.2.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.2.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 1292) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.3.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv1/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.3.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/cv2/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.3.alpha + (Unnamed Layer* 1308) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv3.conv.weight + /model/neck/neck4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv3/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.bbox_stem.seq.conv.weight + /model/heads/head3/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/bbox_stem/seq/conv/Conv || model.heads.head3.pose_stem.seq.conv.weight + /model/heads/head3/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_stem/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_convs.0.seq.conv.weight + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head3.cls_convs.0.seq.conv.weight + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.0.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.cls_pred.weight + /model/heads/head3/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_pred/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_pred.weight + /model/heads/head3/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_pred/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.1.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.2.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_pred.weight + /model/heads/head3/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_pred/Conv +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] NMS: batched_nms_274 +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 1421) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-20:16:45] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-20:16:46] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +481, now: CPU 1532, GPU 4180 (MiB) +[12/28/2023-20:16:47] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +76, now: CPU 1615, GPU 4256 (MiB) +[12/28/2023-20:16:47] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-20:30:40] [I] [TRT] Total Activation Memory: 8076738048 +[12/28/2023-20:30:40] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-20:30:46] [I] [TRT] Total Host Persistent Memory: 355104 +[12/28/2023-20:30:46] [I] [TRT] Total Device Persistent Memory: 38912 +[12/28/2023-20:30:46] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-20:30:46] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 264 MiB, GPU 132 MiB +[12/28/2023-20:30:46] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 194 steps to complete. +[12/28/2023-20:30:46] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 71.4503ms to assign 13 blocks to 194 nodes requiring 183278080 bytes. +[12/28/2023-20:30:46] [I] [TRT] Total Activation Memory: 183278080 +[12/28/2023-20:30:48] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1, GPU +7, now: CPU 2003, GPU 5811 (MiB) +[12/28/2023-20:30:48] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +52, GPU +64, now: CPU 52, GPU 64 (MiB) +[12/28/2023-20:30:48] [I] Engine built in 859.207 sec. +[12/28/2023-20:30:48] [I] [TRT] Loaded engine size: 54 MiB +[12/28/2023-20:30:48] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +11, now: CPU 1289, GPU 5551 (MiB) +[12/28/2023-20:30:48] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +52, now: CPU 0, GPU 52 (MiB) +[12/28/2023-20:30:48] [I] Engine deserialized in 0.183216 sec. +[12/28/2023-20:30:49] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +6, now: CPU 1289, GPU 5551 (MiB) +[12/28/2023-20:30:49] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +175, now: CPU 0, GPU 227 (MiB) +[12/28/2023-20:30:49] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-20:30:49] [I] Using random values for input onnx::Cast_0 +[12/28/2023-20:30:49] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-20:30:49] [I] Using random values for output graph2_flat_predictions +[12/28/2023-20:30:49] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-20:30:49] [I] Starting inference +[12/28/2023-20:31:04] [I] Warmup completed 5 queries over 200 ms +[12/28/2023-20:31:04] [I] Timing trace has 467 queries over 15.0803 s +[12/28/2023-20:31:04] [I] +[12/28/2023-20:31:04] [I] === Trace details === +[12/28/2023-20:31:04] [I] Trace averages of 100 runs: +[12/28/2023-20:31:04] [I] Average on 100 runs - GPU latency: 32.4935 ms - Host latency: 32.6124 ms (enqueue 32.5531 ms) +[12/28/2023-20:31:04] [I] Average on 100 runs - GPU latency: 32.0289 ms - Host latency: 32.1442 ms (enqueue 32.092 ms) +[12/28/2023-20:31:04] [I] Average on 100 runs - GPU latency: 31.9715 ms - Host latency: 32.0855 ms (enqueue 32.0299 ms) +[12/28/2023-20:31:04] [I] Average on 100 runs - GPU latency: 32.1741 ms - Host latency: 32.2872 ms (enqueue 32.2448 ms) +[12/28/2023-20:31:04] [I] +[12/28/2023-20:31:04] [I] === Performance summary === +[12/28/2023-20:31:04] [I] Throughput: 30.9676 qps +[12/28/2023-20:31:04] [I] Latency: min = 29.5527 ms, max = 48.669 ms, mean = 32.255 ms, median = 31.8447 ms, percentile(90%) = 33.2435 ms, percentile(95%) = 35.3904 ms, percentile(99%) = 42.4307 ms +[12/28/2023-20:31:04] [I] Enqueue Time: min = 29.5156 ms, max = 48.6032 ms, mean = 32.204 ms, median = 31.8164 ms, percentile(90%) = 32.9639 ms, percentile(95%) = 35.3633 ms, percentile(99%) = 42.3721 ms +[12/28/2023-20:31:04] [I] H2D Latency: min = 0.0800781 ms, max = 0.116211 ms, mean = 0.0943801 ms, median = 0.0952148 ms, percentile(90%) = 0.0976562 ms, percentile(95%) = 0.0981445 ms, percentile(99%) = 0.107422 ms +[12/28/2023-20:31:04] [I] GPU Compute Time: min = 29.4414 ms, max = 48.5475 ms, mean = 32.1403 ms, median = 31.7275 ms, percentile(90%) = 33.1393 ms, percentile(95%) = 35.2798 ms, percentile(99%) = 42.3066 ms +[12/28/2023-20:31:04] [I] D2H Latency: min = 0.00292969 ms, max = 0.0655518 ms, mean = 0.0203297 ms, median = 0.0205078 ms, percentile(90%) = 0.0288086 ms, percentile(95%) = 0.03125 ms, percentile(99%) = 0.0375977 ms +[12/28/2023-20:31:04] [I] Total Host Walltime: 15.0803 s +[12/28/2023-20:31:04] [I] Total GPU Compute Time: 15.0095 s +[12/28/2023-20:31:04] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-20:31:04] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_l_int8.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_l_int8.onnx.int8.engine diff --git a/yolo_nas_pose_l_int8.onnx.usage.txt b/yolo_nas_pose_l_int8.onnx.usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..aeab6984d3cc658647db0b9c1752c0183f9089e6 --- /dev/null +++ b/yolo_nas_pose_l_int8.onnx.usage.txt @@ -0,0 +1,58 @@ + +Model exported successfully to yolo_nas_pose_l_int8.onnx +Model expects input image of shape [1, 3, 640, 640] +Input image dtype is torch.uint8 + +Exported model already contains preprocessing (normalization) step, so you don't need to do it manually. +Preprocessing steps to be applied to input image are: +Sequential( + (0): CastTensorTo(dtype=torch.float32) + (1): ChannelSelect(channels_indexes=tensor([2, 1, 0])) + (2): ApplyMeanStd(mean=[0.], scale=[255.]) +) + + +Exported model contains postprocessing (NMS) step with the following parameters: + num_pre_nms_predictions=1000 + max_predictions_per_image=10 + nms_threshold=0.5 + confidence_threshold=0.15 + output_predictions_format=flat + + +Exported model is in ONNX format and can be used with ONNXRuntime +To run inference with ONNXRuntime, please use the following code snippet: + + import onnxruntime + import numpy as np + session = onnxruntime.InferenceSession("yolo_nas_pose_l_int8.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + example_input_image = np.zeros((1, 3, 640, 640)).astype(np.uint8) + predictions = session.run(outputs, {inputs[0]: example_input_image}) + +Exported model can also be used with TensorRT +To run inference with TensorRT, please see TensorRT deployment documentation +You can benchmark the model using the following code snippet: + + trtexec --onnx=yolo_nas_pose_l_int8.onnx --int8 --avgRuns=100 --duration=15 + + +Exported model has predictions in flat format: + +# flat_predictions is a 2D array of [N,K] shape +# Each row represents (image_index, x_min, y_min, x_max, y_max, confidence, joints...) +# Please note all values are floats, so you have to convert them to integers if needed + +[flat_predictions] = predictions +pred_bboxes = flat_predictions[:, 1:5] +pred_scores = flat_predictions[:, 5] +pred_joints = flat_predictions[:, 6:].reshape((len(pred_bboxes), -1, 3)) +for i in range(len(pred_bboxes)): + confidence = pred_scores[i] + x_min, y_min, x_max, y_max = pred_bboxes[i] + print(f"Detected pose with confidence={{confidence}}, x_min={{x_min}}, y_min={{y_min}}, x_max={{x_max}}, y_max={{y_max}}") + for joint_index, (x, y, confidence) in enumerate(pred_joints[i]):") + print(f"Joint {{joint_index}} has coordinates x={{x}}, y={{y}}, confidence={{confidence}}") + diff --git a/yolo_nas_pose_m_fp16.onnx b/yolo_nas_pose_m_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..129fd375c9789f0ef34b24ddd32c075822d29f62 --- /dev/null +++ b/yolo_nas_pose_m_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7674809abd50d8acbab6500f1b7ad6cb0103539e6102066d7695160b9b0f8413 +size 78063545 diff --git a/yolo_nas_pose_m_fp16.onnx.best.engine b/yolo_nas_pose_m_fp16.onnx.best.engine new file mode 100644 index 0000000000000000000000000000000000000000..f8554813ba2274979c7bfe2034e9a598c3d8928b --- /dev/null +++ b/yolo_nas_pose_m_fp16.onnx.best.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d694deef0e1f970fb285d669185a1984b38cb587324b7c380b896a02794431ae +size 41498155 diff --git a/yolo_nas_pose_m_fp16.onnx.best.engine.err b/yolo_nas_pose_m_fp16.onnx.best.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..56eec3be20e1ddce29efb760816f42200d0113ed --- /dev/null +++ b/yolo_nas_pose_m_fp16.onnx.best.engine.err @@ -0,0 +1,406 @@ +[12/28/2023-08:45:30] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-08:45:30] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-08:45:30] [W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool. +[12/28/2023-09:13:06] [W] [TRT] Tactic Device request: 6262MB Available: 3108MB. Device memory is insufficient to use tactic. +[12/28/2023-09:13:07] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6262 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:13:07] [W] [TRT] Tactic Device request: 6262MB Available: 3107MB. Device memory is insufficient to use tactic. +[12/28/2023-09:13:07] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6262 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:13:07] [W] [TRT] Tactic Device request: 6262MB Available: 3106MB. Device memory is insufficient to use tactic. +[12/28/2023-09:13:07] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6262 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:13:09] [W] [TRT] Tactic Device request: 6251MB Available: 3131MB. Device memory is insufficient to use tactic. +[12/28/2023-09:13:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6251 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:13:09] [W] [TRT] Tactic Device request: 6251MB Available: 3131MB. Device memory is insufficient to use tactic. +[12/28/2023-09:13:09] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6251 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:21:25] [W] [TRT] Tactic Device request: 4711MB Available: 2436MB. Device memory is insufficient to use tactic. +[12/28/2023-09:21:25] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:21:25] [W] [TRT] Tactic Device request: 4711MB Available: 2436MB. Device memory is insufficient to use tactic. +[12/28/2023-09:21:26] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:21:26] [W] [TRT] Tactic Device request: 4711MB Available: 2437MB. Device memory is insufficient to use tactic. +[12/28/2023-09:21:26] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:21:27] [W] [TRT] Tactic Device request: 4701MB Available: 2436MB. Device memory is insufficient to use tactic. +[12/28/2023-09:21:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4701 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:21:27] [W] [TRT] Tactic Device request: 4701MB Available: 2436MB. Device memory is insufficient to use tactic. +[12/28/2023-09:21:27] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4701 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:21:47] [W] [TRT] Tactic Device request: 6275MB Available: 2435MB. Device memory is insufficient to use tactic. +[12/28/2023-09:21:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:21:47] [W] [TRT] Tactic Device request: 6275MB Available: 2435MB. Device memory is insufficient to use tactic. +[12/28/2023-09:21:47] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6275 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:21:47] [W] [TRT] Tactic Device request: 6275MB Available: 2435MB. Device memory is insufficient to use tactic. +[12/28/2023-09:21:47] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:21:49] [W] [TRT] Tactic Device request: 6270MB Available: 2436MB. Device memory is insufficient to use tactic. +[12/28/2023-09:21:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:21:49] [W] [TRT] Tactic Device request: 6270MB Available: 2436MB. Device memory is insufficient to use tactic. +[12/28/2023-09:21:49] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:32:33] [W] [TRT] Tactic Device request: 7056MB Available: 2270MB. Device memory is insufficient to use tactic. +[12/28/2023-09:32:33] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:32:33] [W] [TRT] Tactic Device request: 7056MB Available: 2270MB. Device memory is insufficient to use tactic. +[12/28/2023-09:32:33] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:32:33] [W] [TRT] Tactic Device request: 7056MB Available: 2270MB. Device memory is insufficient to use tactic. +[12/28/2023-09:32:33] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:32:35] [W] [TRT] Tactic Device request: 7050MB Available: 2269MB. Device memory is insufficient to use tactic. +[12/28/2023-09:32:35] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7050 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:32:35] [W] [TRT] Tactic Device request: 7050MB Available: 2269MB. Device memory is insufficient to use tactic. +[12/28/2023-09:32:35] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 7050 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:32:56] [W] [TRT] Tactic Device request: 6354MB Available: 2266MB. Device memory is insufficient to use tactic. +[12/28/2023-09:32:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:32:56] [W] [TRT] Tactic Device request: 6354MB Available: 2266MB. Device memory is insufficient to use tactic. +[12/28/2023-09:32:56] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6354 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:32:56] [W] [TRT] Tactic Device request: 6354MB Available: 2267MB. Device memory is insufficient to use tactic. +[12/28/2023-09:32:56] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:32:58] [W] [TRT] Tactic Device request: 6351MB Available: 2267MB. Device memory is insufficient to use tactic. +[12/28/2023-09:32:58] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6351 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:32:58] [W] [TRT] Tactic Device request: 6351MB Available: 2267MB. Device memory is insufficient to use tactic. +[12/28/2023-09:32:58] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6351 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:47:49] [W] [TRT] Tactic Device request: 2394MB Available: 2107MB. Device memory is insufficient to use tactic. +[12/28/2023-09:47:50] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:47:50] [W] [TRT] Tactic Device request: 2394MB Available: 2107MB. Device memory is insufficient to use tactic. +[12/28/2023-09:47:50] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:47:50] [W] [TRT] Tactic Device request: 2392MB Available: 2105MB. Device memory is insufficient to use tactic. +[12/28/2023-09:47:50] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:47:51] [W] [TRT] Tactic Device request: 2392MB Available: 2105MB. Device memory is insufficient to use tactic. +[12/28/2023-09:47:51] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:47:51] [W] [TRT] Tactic Device request: 2392MB Available: 2105MB. Device memory is insufficient to use tactic. +[12/28/2023-09:47:51] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:47:52] [W] [TRT] Tactic Device request: 2391MB Available: 2105MB. Device memory is insufficient to use tactic. +[12/28/2023-09:47:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2391 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:47:52] [W] [TRT] Tactic Device request: 2391MB Available: 2105MB. Device memory is insufficient to use tactic. +[12/28/2023-09:47:52] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2391 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:47:52] [W] [TRT] Tactic Device request: 2390MB Available: 2105MB. Device memory is insufficient to use tactic. +[12/28/2023-09:47:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:47:52] [W] [TRT] Tactic Device request: 2390MB Available: 2105MB. Device memory is insufficient to use tactic. +[12/28/2023-09:47:52] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:48:10] [W] [TRT] Tactic Device request: 4906MB Available: 2087MB. Device memory is insufficient to use tactic. +[12/28/2023-09:48:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:48:10] [W] [TRT] Tactic Device request: 4906MB Available: 2087MB. Device memory is insufficient to use tactic. +[12/28/2023-09:48:10] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4906 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:48:10] [W] [TRT] Tactic Device request: 4906MB Available: 2087MB. Device memory is insufficient to use tactic. +[12/28/2023-09:48:10] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:48:12] [W] [TRT] Tactic Device request: 4905MB Available: 2088MB. Device memory is insufficient to use tactic. +[12/28/2023-09:48:12] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4905 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:48:12] [W] [TRT] Tactic Device request: 4905MB Available: 2088MB. Device memory is insufficient to use tactic. +[12/28/2023-09:48:12] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4905 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:12] [W] [TRT] Tactic Device request: 4906MB Available: 1978MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:12] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:12] [W] [TRT] Tactic Device request: 4906MB Available: 1978MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:12] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4906 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:12] [W] [TRT] Tactic Device request: 4906MB Available: 1978MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:12] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:13] [W] [TRT] Tactic Device request: 4905MB Available: 1977MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:14] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4905 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:14] [W] [TRT] Tactic Device request: 4905MB Available: 1977MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:14] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4905 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:16] [W] [TRT] Tactic Device request: 2457MB Available: 1978MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:16] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:16] [W] [TRT] Tactic Device request: 2457MB Available: 1977MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:16] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:16] [W] [TRT] Tactic Device request: 2457MB Available: 1978MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:16] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:18] [W] [TRT] Tactic Device request: 2456MB Available: 1978MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:18] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:18] [W] [TRT] Tactic Device request: 2456MB Available: 1978MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:18] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:46] [W] [TRT] Tactic Device request: 3587MB Available: 1952MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:46] [W] [TRT] Tactic Device request: 3587MB Available: 1952MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:46] [W] [TRT] Tactic Device request: 3587MB Available: 1952MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:46] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:47] [W] [TRT] Tactic Device request: 3585MB Available: 1951MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3585 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:47] [W] [TRT] Tactic Device request: 3585MB Available: 1951MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:47] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3585 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:49] [W] [TRT] Tactic Device request: 2385MB Available: 1949MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:49] [W] [TRT] Tactic Device request: 2385MB Available: 1949MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:49] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:49] [W] [TRT] Tactic Device request: 2385MB Available: 1949MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:49] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:50] [W] [TRT] Tactic Device request: 2384MB Available: 1950MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:50] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2384 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-09:56:50] [W] [TRT] Tactic Device request: 2384MB Available: 1950MB. Device memory is insufficient to use tactic. +[12/28/2023-09:56:50] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2384 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:05:05] [W] [TRT] Tactic Device request: 3556MB Available: 1897MB. Device memory is insufficient to use tactic. +[12/28/2023-10:05:05] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:05:05] [W] [TRT] Tactic Device request: 3556MB Available: 1897MB. Device memory is insufficient to use tactic. +[12/28/2023-10:05:05] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:05:05] [W] [TRT] Tactic Device request: 3556MB Available: 1897MB. Device memory is insufficient to use tactic. +[12/28/2023-10:05:05] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:05:06] [W] [TRT] Tactic Device request: 3551MB Available: 1897MB. Device memory is insufficient to use tactic. +[12/28/2023-10:05:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3551 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:05:06] [W] [TRT] Tactic Device request: 3551MB Available: 1897MB. Device memory is insufficient to use tactic. +[12/28/2023-10:05:06] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3551 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:43] [W] [TRT] Tactic Device request: 2359MB Available: 1749MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:43] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:43] [W] [TRT] Tactic Device request: 2359MB Available: 1749MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:43] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2359 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:43] [W] [TRT] Tactic Device request: 2359MB Available: 1749MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:43] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:44] [W] [TRT] Tactic Device request: 2355MB Available: 1749MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2355 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:44] [W] [TRT] Tactic Device request: 2355MB Available: 1749MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:44] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2355 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:51] [W] [TRT] Tactic Device request: 2362MB Available: 1754MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:51] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2362 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:51] [W] [TRT] Tactic Device request: 2362MB Available: 1754MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:51] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2362 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:51] [W] [TRT] Tactic Device request: 2362MB Available: 1754MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:51] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2362 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:54] [W] [TRT] Tactic Device request: 2357MB Available: 1752MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:54] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2357 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:54] [W] [TRT] Tactic Device request: 2357MB Available: 1752MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:54] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2357 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:57] [W] [TRT] Tactic Device request: 2359MB Available: 1752MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:57] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:57] [W] [TRT] Tactic Device request: 2359MB Available: 1752MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:57] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2359 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:15:57] [W] [TRT] Tactic Device request: 2359MB Available: 1752MB. Device memory is insufficient to use tactic. +[12/28/2023-10:15:58] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:16:00] [W] [TRT] Tactic Device request: 2356MB Available: 1752MB. Device memory is insufficient to use tactic. +[12/28/2023-10:16:00] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2356 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:16:00] [W] [TRT] Tactic Device request: 2356MB Available: 1752MB. Device memory is insufficient to use tactic. +[12/28/2023-10:16:00] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2356 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:24:58] [W] [TRT] Tactic Device request: 3575MB Available: 2009MB. Device memory is insufficient to use tactic. +[12/28/2023-10:24:58] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3575 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:24:58] [W] [TRT] Tactic Device request: 3575MB Available: 2009MB. Device memory is insufficient to use tactic. +[12/28/2023-10:24:58] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3575 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:24:58] [W] [TRT] Tactic Device request: 3575MB Available: 2009MB. Device memory is insufficient to use tactic. +[12/28/2023-10:24:58] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3575 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:24:59] [W] [TRT] Tactic Device request: 3572MB Available: 2008MB. Device memory is insufficient to use tactic. +[12/28/2023-10:24:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3572 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:24:59] [W] [TRT] Tactic Device request: 3572MB Available: 2008MB. Device memory is insufficient to use tactic. +[12/28/2023-10:24:59] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3572 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:08] [W] [TRT] Tactic Device request: 2390MB Available: 1909MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:08] [W] [TRT] Tactic Device request: 2390MB Available: 1824MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:08] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:09] [W] [TRT] Tactic Device request: 2390MB Available: 1822MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:09] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:12] [W] [TRT] Tactic Device request: 2387MB Available: 1824MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:12] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2387 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:12] [W] [TRT] Tactic Device request: 2387MB Available: 1800MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:12] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2387 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:15] [W] [TRT] Tactic Device request: 2388MB Available: 1797MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:15] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2388 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:16] [W] [TRT] Tactic Device request: 2388MB Available: 1753MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:16] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2388 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:16] [W] [TRT] Tactic Device request: 2388MB Available: 1749MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:16] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2388 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:19] [W] [TRT] Tactic Device request: 2386MB Available: 1749MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:19] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2386 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:19] [W] [TRT] Tactic Device request: 2386MB Available: 1729MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:19] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2386 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:23] [W] [TRT] Tactic Device request: 4775MB Available: 1722MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:23] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4775 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:24] [W] [TRT] Tactic Device request: 4775MB Available: 1600MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:24] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4775 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:25] [W] [TRT] Tactic Device request: 4775MB Available: 1593MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:25] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4775 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:30] [W] [TRT] Tactic Device request: 4772MB Available: 1593MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:30] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4772 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:31] [W] [TRT] Tactic Device request: 4772MB Available: 1591MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:31] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4772 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:36] [W] [TRT] Tactic Device request: 4774MB Available: 1593MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:36] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:37] [W] [TRT] Tactic Device request: 4774MB Available: 1592MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:37] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4774 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:38] [W] [TRT] Tactic Device request: 4774MB Available: 1593MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:38] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:43] [W] [TRT] Tactic Device request: 4771MB Available: 1592MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:43] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4771 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:44] [W] [TRT] Tactic Device request: 4771MB Available: 1592MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:44] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4771 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:48] [W] [TRT] Tactic Device request: 1637MB Available: 1594MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:48] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:49] [W] [TRT] Tactic Device request: 1637MB Available: 1594MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:49] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1637 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:49] [W] [TRT] Tactic Device request: 1637MB Available: 1594MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:49] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:50] [W] [TRT] Tactic Device request: 1636MB Available: 1592MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:50] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1636 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:25:50] [W] [TRT] Tactic Device request: 1636MB Available: 1592MB. Device memory is insufficient to use tactic. +[12/28/2023-10:25:50] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 1636 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:26:03] [W] [TRT] Tactic Device request: 4774MB Available: 1593MB. Device memory is insufficient to use tactic. +[12/28/2023-10:26:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:26:04] [W] [TRT] Tactic Device request: 4774MB Available: 1591MB. Device memory is insufficient to use tactic. +[12/28/2023-10:26:04] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4774 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:26:05] [W] [TRT] Tactic Device request: 4774MB Available: 1589MB. Device memory is insufficient to use tactic. +[12/28/2023-10:26:05] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:26:10] [W] [TRT] Tactic Device request: 4771MB Available: 1591MB. Device memory is insufficient to use tactic. +[12/28/2023-10:26:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4771 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:26:11] [W] [TRT] Tactic Device request: 4771MB Available: 1588MB. Device memory is insufficient to use tactic. +[12/28/2023-10:26:11] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4771 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:30] [W] [TRT] Tactic Device request: 1638MB Available: 1396MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:30] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1638 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:30] [W] [TRT] Tactic Device request: 1638MB Available: 1396MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:30] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1638 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:31] [W] [TRT] Tactic Device request: 1637MB Available: 1395MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:31] [W] [TRT] Tactic Device request: 1637MB Available: 1396MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:31] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 1637 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:33] [W] [TRT] Tactic Device request: 2454MB Available: 1396MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:33] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:33] [W] [TRT] Tactic Device request: 2454MB Available: 1396MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:33] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2454 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:33] [W] [TRT] Tactic Device request: 2454MB Available: 1397MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:33] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:34] [W] [TRT] Tactic Device request: 2453MB Available: 1397MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:34] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2453 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:34] [W] [TRT] Tactic Device request: 2453MB Available: 1397MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:34] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2453 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:37] [W] [TRT] Tactic Device request: 2463MB Available: 1427MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:37] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:38] [W] [TRT] Tactic Device request: 2463MB Available: 1390MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:38] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2463 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:39] [W] [TRT] Tactic Device request: 2463MB Available: 1388MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:39] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:43] [W] [TRT] Tactic Device request: 2462MB Available: 1596MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:43] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2462 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:44] [W] [TRT] Tactic Device request: 2462MB Available: 1379MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:44] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2462 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:49] [W] [TRT] Tactic Device request: 2463MB Available: 1378MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:49] [W] [TRT] Tactic Device request: 2463MB Available: 1376MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:49] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2463 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:50] [W] [TRT] Tactic Device request: 2463MB Available: 1376MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:50] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:54] [W] [TRT] Tactic Device request: 2462MB Available: 1375MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:54] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2462 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:36:55] [W] [TRT] Tactic Device request: 2462MB Available: 1375MB. Device memory is insufficient to use tactic. +[12/28/2023-10:36:55] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2462 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-10:38:08] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-10:38:08] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-10:38:08] [W] * GPU compute time is unstable, with coefficient of variance = 7.66665%. +[12/28/2023-10:38:08] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_m_fp16.onnx.best.engine.log b/yolo_nas_pose_m_fp16.onnx.best.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..4e4f2e109e41fa236e414adba80fb59bdff2dbee --- /dev/null +++ b/yolo_nas_pose_m_fp16.onnx.best.engine.log @@ -0,0 +1,312 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp16.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp16.onnx.best.engine +[12/28/2023-08:45:20] [I] === Model Options === +[12/28/2023-08:45:20] [I] Format: ONNX +[12/28/2023-08:45:20] [I] Model: yolo_nas_pose_m_fp16.onnx +[12/28/2023-08:45:20] [I] Output: +[12/28/2023-08:45:20] [I] === Build Options === +[12/28/2023-08:45:20] [I] Max batch: explicit batch +[12/28/2023-08:45:20] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-08:45:20] [I] minTiming: 1 +[12/28/2023-08:45:20] [I] avgTiming: 8 +[12/28/2023-08:45:20] [I] Precision: FP32+FP16+INT8 +[12/28/2023-08:45:20] [I] LayerPrecisions: +[12/28/2023-08:45:20] [I] Calibration: Dynamic +[12/28/2023-08:45:20] [I] Refit: Disabled +[12/28/2023-08:45:20] [I] Sparsity: Disabled +[12/28/2023-08:45:20] [I] Safe mode: Disabled +[12/28/2023-08:45:20] [I] DirectIO mode: Disabled +[12/28/2023-08:45:20] [I] Restricted mode: Disabled +[12/28/2023-08:45:20] [I] Build only: Disabled +[12/28/2023-08:45:20] [I] Save engine: yolo_nas_pose_m_fp16.onnx.best.engine +[12/28/2023-08:45:20] [I] Load engine: +[12/28/2023-08:45:20] [I] Profiling verbosity: 0 +[12/28/2023-08:45:20] [I] Tactic sources: Using default tactic sources +[12/28/2023-08:45:20] [I] timingCacheMode: local +[12/28/2023-08:45:20] [I] timingCacheFile: +[12/28/2023-08:45:20] [I] Heuristic: Disabled +[12/28/2023-08:45:20] [I] Preview Features: Use default preview flags. +[12/28/2023-08:45:20] [I] Input(s)s format: fp32:CHW +[12/28/2023-08:45:20] [I] Output(s)s format: fp32:CHW +[12/28/2023-08:45:20] [I] Input build shapes: model +[12/28/2023-08:45:20] [I] Input calibration shapes: model +[12/28/2023-08:45:20] [I] === System Options === +[12/28/2023-08:45:20] [I] Device: 0 +[12/28/2023-08:45:20] [I] DLACore: +[12/28/2023-08:45:20] [I] Plugins: +[12/28/2023-08:45:20] [I] === Inference Options === +[12/28/2023-08:45:20] [I] Batch: Explicit +[12/28/2023-08:45:20] [I] Input inference shapes: model +[12/28/2023-08:45:20] [I] Iterations: 10 +[12/28/2023-08:45:20] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-08:45:20] [I] Sleep time: 0ms +[12/28/2023-08:45:20] [I] Idle time: 0ms +[12/28/2023-08:45:20] [I] Streams: 1 +[12/28/2023-08:45:20] [I] ExposeDMA: Disabled +[12/28/2023-08:45:20] [I] Data transfers: Enabled +[12/28/2023-08:45:20] [I] Spin-wait: Disabled +[12/28/2023-08:45:20] [I] Multithreading: Disabled +[12/28/2023-08:45:20] [I] CUDA Graph: Disabled +[12/28/2023-08:45:20] [I] Separate profiling: Disabled +[12/28/2023-08:45:20] [I] Time Deserialize: Disabled +[12/28/2023-08:45:20] [I] Time Refit: Disabled +[12/28/2023-08:45:20] [I] NVTX verbosity: 0 +[12/28/2023-08:45:20] [I] Persistent Cache Ratio: 0 +[12/28/2023-08:45:20] [I] Inputs: +[12/28/2023-08:45:20] [I] === Reporting Options === +[12/28/2023-08:45:20] [I] Verbose: Disabled +[12/28/2023-08:45:20] [I] Averages: 100 inferences +[12/28/2023-08:45:20] [I] Percentiles: 90,95,99 +[12/28/2023-08:45:20] [I] Dump refittable layers:Disabled +[12/28/2023-08:45:20] [I] Dump output: Disabled +[12/28/2023-08:45:20] [I] Profile: Disabled +[12/28/2023-08:45:20] [I] Export timing to JSON file: +[12/28/2023-08:45:20] [I] Export output to JSON file: +[12/28/2023-08:45:20] [I] Export profile to JSON file: +[12/28/2023-08:45:20] [I] +[12/28/2023-08:45:20] [I] === Device Information === +[12/28/2023-08:45:20] [I] Selected Device: Orin +[12/28/2023-08:45:20] [I] Compute Capability: 8.7 +[12/28/2023-08:45:20] [I] SMs: 8 +[12/28/2023-08:45:20] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-08:45:20] [I] Device Global Memory: 7471 MiB +[12/28/2023-08:45:20] [I] Shared Memory per SM: 164 KiB +[12/28/2023-08:45:20] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-08:45:20] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-08:45:20] [I] +[12/28/2023-08:45:20] [I] TensorRT version: 8.5.2 +[12/28/2023-08:45:24] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2980 (MiB) +[12/28/2023-08:45:29] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3286 (MiB) +[12/28/2023-08:45:29] [I] Start parsing network model +[12/28/2023-08:45:30] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-08:45:30] [I] [TRT] Input filename: yolo_nas_pose_m_fp16.onnx +[12/28/2023-08:45:30] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-08:45:30] [I] [TRT] Opset version: 17 +[12/28/2023-08:45:30] [I] [TRT] Producer name: pytorch +[12/28/2023-08:45:30] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-08:45:30] [I] [TRT] Domain: +[12/28/2023-08:45:30] [I] [TRT] Model version: 0 +[12/28/2023-08:45:30] [I] [TRT] Doc string: +[12/28/2023-08:45:30] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-08:45:30] [I] Finish parsing network model +[12/28/2023-08:45:31] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-08:45:31] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 410) [Constant] +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 411) [Constant] +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 412) [Constant] +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 208) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 216) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 247) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 270) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 301) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 324) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 332) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1...cast_boxes_to_fp32]} +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 414) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-08:45:31] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-08:45:44] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +400, now: CPU 1196, GPU 3820 (MiB) +[12/28/2023-08:45:46] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +70, now: CPU 1278, GPU 3890 (MiB) +[12/28/2023-08:45:46] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-10:37:30] [I] [TRT] Total Activation Memory: 7941309952 +[12/28/2023-10:37:30] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-10:37:43] [I] [TRT] Total Host Persistent Memory: 294816 +[12/28/2023-10:37:43] [I] [TRT] Total Device Persistent Memory: 38912 +[12/28/2023-10:37:43] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-10:37:43] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 54 MiB, GPU 2398 MiB +[12/28/2023-10:37:43] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 157 steps to complete. +[12/28/2023-10:37:43] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 76.003ms to assign 14 blocks to 157 nodes requiring 144914944 bytes. +[12/28/2023-10:37:43] [I] [TRT] Total Activation Memory: 144914944 +[12/28/2023-10:37:50] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -1, now: CPU 1667, GPU 5526 (MiB) +[12/28/2023-10:37:50] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +38, GPU +64, now: CPU 38, GPU 64 (MiB) +[12/28/2023-10:37:51] [I] Engine built in 6750.81 sec. +[12/28/2023-10:37:52] [I] [TRT] Loaded engine size: 39 MiB +[12/28/2023-10:37:52] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1284, GPU 5348 (MiB) +[12/28/2023-10:37:52] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +37, now: CPU 0, GPU 37 (MiB) +[12/28/2023-10:37:52] [I] Engine deserialized in 0.265686 sec. +[12/28/2023-10:37:52] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1284, GPU 5348 (MiB) +[12/28/2023-10:37:52] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +138, now: CPU 0, GPU 175 (MiB) +[12/28/2023-10:37:52] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-10:37:52] [I] Using random values for input onnx::Cast_0 +[12/28/2023-10:37:52] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-10:37:52] [I] Using random values for output graph2_flat_predictions +[12/28/2023-10:37:52] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-10:37:52] [I] Starting inference +[12/28/2023-10:38:08] [I] Warmup completed 2 queries over 200 ms +[12/28/2023-10:38:08] [I] Timing trace has 875 queries over 15.0204 s +[12/28/2023-10:38:08] [I] +[12/28/2023-10:38:08] [I] === Trace details === +[12/28/2023-10:38:08] [I] Trace averages of 100 runs: +[12/28/2023-10:38:08] [I] Average on 100 runs - GPU latency: 17.3015 ms - Host latency: 17.42 ms (enqueue 17.373 ms) +[12/28/2023-10:38:08] [I] Average on 100 runs - GPU latency: 16.7848 ms - Host latency: 16.8974 ms (enqueue 16.8579 ms) +[12/28/2023-10:38:08] [I] Average on 100 runs - GPU latency: 16.7202 ms - Host latency: 16.8311 ms (enqueue 16.7917 ms) +[12/28/2023-10:38:08] [I] Average on 100 runs - GPU latency: 16.7189 ms - Host latency: 16.8301 ms (enqueue 16.7925 ms) +[12/28/2023-10:38:08] [I] Average on 100 runs - GPU latency: 16.8722 ms - Host latency: 16.9814 ms (enqueue 16.939 ms) +[12/28/2023-10:38:08] [I] Average on 100 runs - GPU latency: 17.0499 ms - Host latency: 17.165 ms (enqueue 17.1147 ms) +[12/28/2023-10:38:08] [I] Average on 100 runs - GPU latency: 17.4353 ms - Host latency: 17.5548 ms (enqueue 17.5031 ms) +[12/28/2023-10:38:08] [I] Average on 100 runs - GPU latency: 17.2635 ms - Host latency: 17.3821 ms (enqueue 17.328 ms) +[12/28/2023-10:38:08] [I] +[12/28/2023-10:38:08] [I] === Performance summary === +[12/28/2023-10:38:08] [I] Throughput: 58.254 qps +[12/28/2023-10:38:08] [I] Latency: min = 15.8867 ms, max = 26.5801 ms, mean = 17.1335 ms, median = 16.9814 ms, percentile(90%) = 17.8691 ms, percentile(95%) = 18.4963 ms, percentile(99%) = 24.3384 ms +[12/28/2023-10:38:08] [I] Enqueue Time: min = 15.8533 ms, max = 26.5361 ms, mean = 17.0885 ms, median = 16.9271 ms, percentile(90%) = 17.8125 ms, percentile(95%) = 18.4688 ms, percentile(99%) = 23.7695 ms +[12/28/2023-10:38:08] [I] H2D Latency: min = 0.0800781 ms, max = 0.148483 ms, mean = 0.0972059 ms, median = 0.0976562 ms, percentile(90%) = 0.100464 ms, percentile(95%) = 0.100586 ms, percentile(99%) = 0.110596 ms +[12/28/2023-10:38:08] [I] GPU Compute Time: min = 15.7729 ms, max = 26.4624 ms, mean = 17.019 ms, median = 16.8655 ms, percentile(90%) = 17.7568 ms, percentile(95%) = 18.3837 ms, percentile(99%) = 24.2227 ms +[12/28/2023-10:38:08] [I] D2H Latency: min = 0.00292969 ms, max = 0.0673828 ms, mean = 0.0172236 ms, median = 0.0146484 ms, percentile(90%) = 0.0263672 ms, percentile(95%) = 0.0283203 ms, percentile(99%) = 0.0407715 ms +[12/28/2023-10:38:08] [I] Total Host Walltime: 15.0204 s +[12/28/2023-10:38:08] [I] Total GPU Compute Time: 14.8917 s +[12/28/2023-10:38:08] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-10:38:08] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp16.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp16.onnx.best.engine diff --git a/yolo_nas_pose_m_fp16.onnx.engine.err b/yolo_nas_pose_m_fp16.onnx.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..9247585c11c8a22abe163b348afbfdddf4003437 --- /dev/null +++ b/yolo_nas_pose_m_fp16.onnx.engine.err @@ -0,0 +1,8 @@ +[12/28/2023-07:47:56] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-07:47:56] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-07:47:57] [E] Error[4]: [network.cpp::validate::2925] Error Code 4: Internal Error (fp16 precision has been set for a layer or layer output, but fp16 is not configured in the builder) +[12/28/2023-07:47:57] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/28/2023-07:47:57] [E] Engine could not be created from network +[12/28/2023-07:47:57] [E] Building engine failed +[12/28/2023-07:47:57] [E] Failed to create engine from model or file. +[12/28/2023-07:47:57] [E] Engine set up failed diff --git a/yolo_nas_pose_m_fp16.onnx.engine.log b/yolo_nas_pose_m_fp16.onnx.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..c25a2375ab4faf0fc84f78808d019fabe88a9f5a --- /dev/null +++ b/yolo_nas_pose_m_fp16.onnx.engine.log @@ -0,0 +1,91 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp16.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp16.onnx.engine +[12/28/2023-07:47:46] [I] === Model Options === +[12/28/2023-07:47:46] [I] Format: ONNX +[12/28/2023-07:47:46] [I] Model: yolo_nas_pose_m_fp16.onnx +[12/28/2023-07:47:46] [I] Output: +[12/28/2023-07:47:46] [I] === Build Options === +[12/28/2023-07:47:46] [I] Max batch: explicit batch +[12/28/2023-07:47:46] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-07:47:46] [I] minTiming: 1 +[12/28/2023-07:47:46] [I] avgTiming: 8 +[12/28/2023-07:47:46] [I] Precision: FP32 +[12/28/2023-07:47:46] [I] LayerPrecisions: +[12/28/2023-07:47:46] [I] Calibration: +[12/28/2023-07:47:46] [I] Refit: Disabled +[12/28/2023-07:47:46] [I] Sparsity: Disabled +[12/28/2023-07:47:46] [I] Safe mode: Disabled +[12/28/2023-07:47:46] [I] DirectIO mode: Disabled +[12/28/2023-07:47:46] [I] Restricted mode: Disabled +[12/28/2023-07:47:46] [I] Build only: Disabled +[12/28/2023-07:47:46] [I] Save engine: yolo_nas_pose_m_fp16.onnx.engine +[12/28/2023-07:47:46] [I] Load engine: +[12/28/2023-07:47:46] [I] Profiling verbosity: 0 +[12/28/2023-07:47:46] [I] Tactic sources: Using default tactic sources +[12/28/2023-07:47:46] [I] timingCacheMode: local +[12/28/2023-07:47:46] [I] timingCacheFile: +[12/28/2023-07:47:46] [I] Heuristic: Disabled +[12/28/2023-07:47:46] [I] Preview Features: Use default preview flags. +[12/28/2023-07:47:46] [I] Input(s)s format: fp32:CHW +[12/28/2023-07:47:46] [I] Output(s)s format: fp32:CHW +[12/28/2023-07:47:46] [I] Input build shapes: model +[12/28/2023-07:47:46] [I] Input calibration shapes: model +[12/28/2023-07:47:46] [I] === System Options === +[12/28/2023-07:47:46] [I] Device: 0 +[12/28/2023-07:47:46] [I] DLACore: +[12/28/2023-07:47:46] [I] Plugins: +[12/28/2023-07:47:46] [I] === Inference Options === +[12/28/2023-07:47:46] [I] Batch: Explicit +[12/28/2023-07:47:46] [I] Input inference shapes: model +[12/28/2023-07:47:46] [I] Iterations: 10 +[12/28/2023-07:47:46] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-07:47:46] [I] Sleep time: 0ms +[12/28/2023-07:47:46] [I] Idle time: 0ms +[12/28/2023-07:47:46] [I] Streams: 1 +[12/28/2023-07:47:46] [I] ExposeDMA: Disabled +[12/28/2023-07:47:46] [I] Data transfers: Enabled +[12/28/2023-07:47:46] [I] Spin-wait: Disabled +[12/28/2023-07:47:46] [I] Multithreading: Disabled +[12/28/2023-07:47:46] [I] CUDA Graph: Disabled +[12/28/2023-07:47:46] [I] Separate profiling: Disabled +[12/28/2023-07:47:46] [I] Time Deserialize: Disabled +[12/28/2023-07:47:46] [I] Time Refit: Disabled +[12/28/2023-07:47:46] [I] NVTX verbosity: 0 +[12/28/2023-07:47:46] [I] Persistent Cache Ratio: 0 +[12/28/2023-07:47:46] [I] Inputs: +[12/28/2023-07:47:46] [I] === Reporting Options === +[12/28/2023-07:47:46] [I] Verbose: Disabled +[12/28/2023-07:47:46] [I] Averages: 100 inferences +[12/28/2023-07:47:46] [I] Percentiles: 90,95,99 +[12/28/2023-07:47:46] [I] Dump refittable layers:Disabled +[12/28/2023-07:47:46] [I] Dump output: Disabled +[12/28/2023-07:47:46] [I] Profile: Disabled +[12/28/2023-07:47:46] [I] Export timing to JSON file: +[12/28/2023-07:47:46] [I] Export output to JSON file: +[12/28/2023-07:47:46] [I] Export profile to JSON file: +[12/28/2023-07:47:46] [I] +[12/28/2023-07:47:46] [I] === Device Information === +[12/28/2023-07:47:46] [I] Selected Device: Orin +[12/28/2023-07:47:46] [I] Compute Capability: 8.7 +[12/28/2023-07:47:46] [I] SMs: 8 +[12/28/2023-07:47:46] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-07:47:46] [I] Device Global Memory: 7471 MiB +[12/28/2023-07:47:46] [I] Shared Memory per SM: 164 KiB +[12/28/2023-07:47:46] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-07:47:46] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-07:47:46] [I] +[12/28/2023-07:47:46] [I] TensorRT version: 8.5.2 +[12/28/2023-07:47:51] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3024 (MiB) +[12/28/2023-07:47:55] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3327 (MiB) +[12/28/2023-07:47:55] [I] Start parsing network model +[12/28/2023-07:47:56] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-07:47:56] [I] [TRT] Input filename: yolo_nas_pose_m_fp16.onnx +[12/28/2023-07:47:56] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-07:47:56] [I] [TRT] Opset version: 17 +[12/28/2023-07:47:56] [I] [TRT] Producer name: pytorch +[12/28/2023-07:47:56] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-07:47:56] [I] [TRT] Domain: +[12/28/2023-07:47:56] [I] [TRT] Model version: 0 +[12/28/2023-07:47:56] [I] [TRT] Doc string: +[12/28/2023-07:47:56] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-07:47:57] [I] Finish parsing network model +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp16.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp16.onnx.engine diff --git a/yolo_nas_pose_m_fp16.onnx.fp16.engine b/yolo_nas_pose_m_fp16.onnx.fp16.engine new file mode 100644 index 0000000000000000000000000000000000000000..b85694de3fe715c30413bca23d3fcdfaf770ad3e --- /dev/null +++ b/yolo_nas_pose_m_fp16.onnx.fp16.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88b66cc2f44220a42e474a86ccc143cd0982f5549e47a979486545e1657fbbc8 +size 79706512 diff --git a/yolo_nas_pose_m_fp16.onnx.fp16.engine.err b/yolo_nas_pose_m_fp16.onnx.fp16.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..9f0785c8a9e4e2e8daa939be49fb0e7ae2c83037 --- /dev/null +++ b/yolo_nas_pose_m_fp16.onnx.fp16.engine.err @@ -0,0 +1,348 @@ +[12/28/2023-07:48:01] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-07:48:01] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-07:52:40] [W] [TRT] Tactic Device request: 6262MB Available: 3207MB. Device memory is insufficient to use tactic. +[12/28/2023-07:52:40] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6262 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:52:40] [W] [TRT] Tactic Device request: 6262MB Available: 3207MB. Device memory is insufficient to use tactic. +[12/28/2023-07:52:40] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6262 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:52:40] [W] [TRT] Tactic Device request: 6262MB Available: 3207MB. Device memory is insufficient to use tactic. +[12/28/2023-07:52:40] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6262 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:52:42] [W] [TRT] Tactic Device request: 6251MB Available: 3210MB. Device memory is insufficient to use tactic. +[12/28/2023-07:52:42] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6251 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:52:42] [W] [TRT] Tactic Device request: 6251MB Available: 3210MB. Device memory is insufficient to use tactic. +[12/28/2023-07:52:43] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6251 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:57:47] [W] [TRT] Tactic Device request: 4711MB Available: 2426MB. Device memory is insufficient to use tactic. +[12/28/2023-07:57:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:57:47] [W] [TRT] Tactic Device request: 4711MB Available: 2426MB. Device memory is insufficient to use tactic. +[12/28/2023-07:57:47] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:57:47] [W] [TRT] Tactic Device request: 4711MB Available: 2426MB. Device memory is insufficient to use tactic. +[12/28/2023-07:57:47] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:57:49] [W] [TRT] Tactic Device request: 4701MB Available: 2426MB. Device memory is insufficient to use tactic. +[12/28/2023-07:57:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4701 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:57:49] [W] [TRT] Tactic Device request: 4701MB Available: 2426MB. Device memory is insufficient to use tactic. +[12/28/2023-07:57:49] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4701 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:58:04] [W] [TRT] Tactic Device request: 6275MB Available: 2426MB. Device memory is insufficient to use tactic. +[12/28/2023-07:58:04] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:58:05] [W] [TRT] Tactic Device request: 6275MB Available: 2426MB. Device memory is insufficient to use tactic. +[12/28/2023-07:58:05] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6275 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:58:05] [W] [TRT] Tactic Device request: 6275MB Available: 2426MB. Device memory is insufficient to use tactic. +[12/28/2023-07:58:05] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:58:06] [W] [TRT] Tactic Device request: 6270MB Available: 2425MB. Device memory is insufficient to use tactic. +[12/28/2023-07:58:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:58:06] [W] [TRT] Tactic Device request: 6270MB Available: 2426MB. Device memory is insufficient to use tactic. +[12/28/2023-07:58:06] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:04:38] [W] [TRT] Tactic Device request: 7056MB Available: 2305MB. Device memory is insufficient to use tactic. +[12/28/2023-08:04:38] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:04:38] [W] [TRT] Tactic Device request: 7056MB Available: 2305MB. Device memory is insufficient to use tactic. +[12/28/2023-08:04:38] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:04:38] [W] [TRT] Tactic Device request: 7056MB Available: 2302MB. Device memory is insufficient to use tactic. +[12/28/2023-08:04:38] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:04:40] [W] [TRT] Tactic Device request: 7050MB Available: 2302MB. Device memory is insufficient to use tactic. +[12/28/2023-08:04:40] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7050 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:04:40] [W] [TRT] Tactic Device request: 7050MB Available: 2302MB. Device memory is insufficient to use tactic. +[12/28/2023-08:04:40] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 7050 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:04:56] [W] [TRT] Tactic Device request: 6354MB Available: 2299MB. Device memory is insufficient to use tactic. +[12/28/2023-08:04:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:04:56] [W] [TRT] Tactic Device request: 6354MB Available: 2299MB. Device memory is insufficient to use tactic. +[12/28/2023-08:04:56] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6354 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:04:57] [W] [TRT] Tactic Device request: 6354MB Available: 2308MB. Device memory is insufficient to use tactic. +[12/28/2023-08:04:57] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:04:58] [W] [TRT] Tactic Device request: 6351MB Available: 2305MB. Device memory is insufficient to use tactic. +[12/28/2023-08:04:58] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6351 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:04:58] [W] [TRT] Tactic Device request: 6351MB Available: 2306MB. Device memory is insufficient to use tactic. +[12/28/2023-08:04:58] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6351 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:13:43] [W] [TRT] Tactic Device request: 2394MB Available: 2369MB. Device memory is insufficient to use tactic. +[12/28/2023-08:13:43] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:13:43] [W] [TRT] Tactic Device request: 2394MB Available: 2369MB. Device memory is insufficient to use tactic. +[12/28/2023-08:13:43] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:13:44] [W] [TRT] Tactic Device request: 2392MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-08:13:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:13:44] [W] [TRT] Tactic Device request: 2392MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-08:13:44] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:13:44] [W] [TRT] Tactic Device request: 2392MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-08:13:44] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:13:45] [W] [TRT] Tactic Device request: 2391MB Available: 2367MB. Device memory is insufficient to use tactic. +[12/28/2023-08:13:45] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2391 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:13:45] [W] [TRT] Tactic Device request: 2391MB Available: 2367MB. Device memory is insufficient to use tactic. +[12/28/2023-08:13:45] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2391 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:13:45] [W] [TRT] Tactic Device request: 2390MB Available: 2367MB. Device memory is insufficient to use tactic. +[12/28/2023-08:13:45] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:13:45] [W] [TRT] Tactic Device request: 2390MB Available: 2365MB. Device memory is insufficient to use tactic. +[12/28/2023-08:13:45] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:13:59] [W] [TRT] Tactic Device request: 4906MB Available: 2349MB. Device memory is insufficient to use tactic. +[12/28/2023-08:13:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:13:59] [W] [TRT] Tactic Device request: 4906MB Available: 2348MB. Device memory is insufficient to use tactic. +[12/28/2023-08:13:59] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4906 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:14:00] [W] [TRT] Tactic Device request: 4906MB Available: 2349MB. Device memory is insufficient to use tactic. +[12/28/2023-08:14:00] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:14:01] [W] [TRT] Tactic Device request: 4905MB Available: 2337MB. Device memory is insufficient to use tactic. +[12/28/2023-08:14:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4905 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:14:01] [W] [TRT] Tactic Device request: 4905MB Available: 2336MB. Device memory is insufficient to use tactic. +[12/28/2023-08:14:01] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4905 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:18:56] [W] [TRT] Tactic Device request: 2457MB Available: 2273MB. Device memory is insufficient to use tactic. +[12/28/2023-08:18:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:18:56] [W] [TRT] Tactic Device request: 2457MB Available: 2273MB. Device memory is insufficient to use tactic. +[12/28/2023-08:18:56] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:18:56] [W] [TRT] Tactic Device request: 2457MB Available: 2273MB. Device memory is insufficient to use tactic. +[12/28/2023-08:18:56] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:18:57] [W] [TRT] Tactic Device request: 2456MB Available: 2272MB. Device memory is insufficient to use tactic. +[12/28/2023-08:18:57] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:18:57] [W] [TRT] Tactic Device request: 2456MB Available: 2272MB. Device memory is insufficient to use tactic. +[12/28/2023-08:18:57] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:19:19] [W] [TRT] Tactic Device request: 3587MB Available: 2263MB. Device memory is insufficient to use tactic. +[12/28/2023-08:19:19] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:19:19] [W] [TRT] Tactic Device request: 3587MB Available: 2263MB. Device memory is insufficient to use tactic. +[12/28/2023-08:19:19] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:19:19] [W] [TRT] Tactic Device request: 3587MB Available: 2263MB. Device memory is insufficient to use tactic. +[12/28/2023-08:19:19] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:19:20] [W] [TRT] Tactic Device request: 3585MB Available: 2263MB. Device memory is insufficient to use tactic. +[12/28/2023-08:19:20] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3585 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:19:20] [W] [TRT] Tactic Device request: 3585MB Available: 2263MB. Device memory is insufficient to use tactic. +[12/28/2023-08:19:20] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3585 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:19:21] [W] [TRT] Tactic Device request: 2385MB Available: 2264MB. Device memory is insufficient to use tactic. +[12/28/2023-08:19:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:19:21] [W] [TRT] Tactic Device request: 2385MB Available: 2264MB. Device memory is insufficient to use tactic. +[12/28/2023-08:19:21] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:19:21] [W] [TRT] Tactic Device request: 2385MB Available: 2264MB. Device memory is insufficient to use tactic. +[12/28/2023-08:19:21] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:19:22] [W] [TRT] Tactic Device request: 2384MB Available: 2262MB. Device memory is insufficient to use tactic. +[12/28/2023-08:19:22] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2384 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:19:22] [W] [TRT] Tactic Device request: 2384MB Available: 2262MB. Device memory is insufficient to use tactic. +[12/28/2023-08:19:22] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2384 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:24:28] [W] [TRT] Tactic Device request: 3556MB Available: 2243MB. Device memory is insufficient to use tactic. +[12/28/2023-08:24:28] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:24:28] [W] [TRT] Tactic Device request: 3556MB Available: 2243MB. Device memory is insufficient to use tactic. +[12/28/2023-08:24:28] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:24:28] [W] [TRT] Tactic Device request: 3556MB Available: 2243MB. Device memory is insufficient to use tactic. +[12/28/2023-08:24:28] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:24:29] [W] [TRT] Tactic Device request: 3551MB Available: 2243MB. Device memory is insufficient to use tactic. +[12/28/2023-08:24:30] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3551 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:24:30] [W] [TRT] Tactic Device request: 3551MB Available: 2243MB. Device memory is insufficient to use tactic. +[12/28/2023-08:24:30] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3551 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:02] [W] [TRT] Tactic Device request: 2359MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:02] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:02] [W] [TRT] Tactic Device request: 2359MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:02] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2359 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:02] [W] [TRT] Tactic Device request: 2359MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:02] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:03] [W] [TRT] Tactic Device request: 2355MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2355 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:03] [W] [TRT] Tactic Device request: 2355MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:03] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2355 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:07] [W] [TRT] Tactic Device request: 2362MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:07] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2362 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:08] [W] [TRT] Tactic Device request: 2362MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:08] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2362 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:08] [W] [TRT] Tactic Device request: 2362MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:08] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2362 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:10] [W] [TRT] Tactic Device request: 2357MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2357 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:11] [W] [TRT] Tactic Device request: 2357MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:11] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2357 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:12] [W] [TRT] Tactic Device request: 2359MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:12] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:13] [W] [TRT] Tactic Device request: 2359MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:13] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2359 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:13] [W] [TRT] Tactic Device request: 2359MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:13] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:15] [W] [TRT] Tactic Device request: 2356MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:16] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2356 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:31:16] [W] [TRT] Tactic Device request: 2356MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-08:31:16] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2356 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:36:53] [W] [TRT] Tactic Device request: 3575MB Available: 2474MB. Device memory is insufficient to use tactic. +[12/28/2023-08:36:53] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3575 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:36:53] [W] [TRT] Tactic Device request: 3575MB Available: 2475MB. Device memory is insufficient to use tactic. +[12/28/2023-08:36:53] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3575 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:36:53] [W] [TRT] Tactic Device request: 3575MB Available: 2475MB. Device memory is insufficient to use tactic. +[12/28/2023-08:36:53] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3575 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:36:54] [W] [TRT] Tactic Device request: 3572MB Available: 2475MB. Device memory is insufficient to use tactic. +[12/28/2023-08:36:54] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3572 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:36:54] [W] [TRT] Tactic Device request: 3572MB Available: 2475MB. Device memory is insufficient to use tactic. +[12/28/2023-08:36:54] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3572 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:00] [W] [TRT] Tactic Device request: 2390MB Available: 2347MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:00] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:00] [W] [TRT] Tactic Device request: 2390MB Available: 2269MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:00] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:01] [W] [TRT] Tactic Device request: 2390MB Available: 2265MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:01] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:04] [W] [TRT] Tactic Device request: 2387MB Available: 2263MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:04] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2387 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:04] [W] [TRT] Tactic Device request: 2387MB Available: 2239MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:04] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2387 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:06] [W] [TRT] Tactic Device request: 2388MB Available: 2234MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2388 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:07] [W] [TRT] Tactic Device request: 2388MB Available: 2205MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:07] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2388 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:07] [W] [TRT] Tactic Device request: 2388MB Available: 2203MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:07] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2388 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:10] [W] [TRT] Tactic Device request: 2386MB Available: 2203MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2386 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:10] [W] [TRT] Tactic Device request: 2386MB Available: 2176MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:10] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2386 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:13] [W] [TRT] Tactic Device request: 4775MB Available: 2174MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:13] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4775 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:13] [W] [TRT] Tactic Device request: 4775MB Available: 2034MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:13] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4775 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:14] [W] [TRT] Tactic Device request: 4775MB Available: 2034MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:14] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4775 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:19] [W] [TRT] Tactic Device request: 4772MB Available: 2034MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:19] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4772 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:20] [W] [TRT] Tactic Device request: 4772MB Available: 2034MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:20] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4772 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:24] [W] [TRT] Tactic Device request: 4774MB Available: 2033MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:24] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:25] [W] [TRT] Tactic Device request: 4774MB Available: 2033MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:25] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4774 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:26] [W] [TRT] Tactic Device request: 4774MB Available: 2034MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:26] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:31] [W] [TRT] Tactic Device request: 4771MB Available: 2034MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4771 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:37:31] [W] [TRT] Tactic Device request: 4771MB Available: 2034MB. Device memory is insufficient to use tactic. +[12/28/2023-08:37:32] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4771 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:43:58] [W] [TRT] Tactic Device request: 2454MB Available: 1908MB. Device memory is insufficient to use tactic. +[12/28/2023-08:43:58] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:43:58] [W] [TRT] Tactic Device request: 2454MB Available: 1908MB. Device memory is insufficient to use tactic. +[12/28/2023-08:43:58] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2454 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:43:58] [W] [TRT] Tactic Device request: 2454MB Available: 1907MB. Device memory is insufficient to use tactic. +[12/28/2023-08:43:58] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:43:59] [W] [TRT] Tactic Device request: 2453MB Available: 1907MB. Device memory is insufficient to use tactic. +[12/28/2023-08:43:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2453 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:43:59] [W] [TRT] Tactic Device request: 2453MB Available: 1906MB. Device memory is insufficient to use tactic. +[12/28/2023-08:43:59] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2453 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:44:01] [W] [TRT] Tactic Device request: 2463MB Available: 1901MB. Device memory is insufficient to use tactic. +[12/28/2023-08:44:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:44:02] [W] [TRT] Tactic Device request: 2463MB Available: 1909MB. Device memory is insufficient to use tactic. +[12/28/2023-08:44:02] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2463 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:44:03] [W] [TRT] Tactic Device request: 2463MB Available: 1910MB. Device memory is insufficient to use tactic. +[12/28/2023-08:44:03] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:44:07] [W] [TRT] Tactic Device request: 2462MB Available: 1898MB. Device memory is insufficient to use tactic. +[12/28/2023-08:44:07] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2462 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:44:07] [W] [TRT] Tactic Device request: 2462MB Available: 1898MB. Device memory is insufficient to use tactic. +[12/28/2023-08:44:07] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2462 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:44:10] [W] [TRT] Tactic Device request: 2463MB Available: 1898MB. Device memory is insufficient to use tactic. +[12/28/2023-08:44:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:44:11] [W] [TRT] Tactic Device request: 2463MB Available: 1898MB. Device memory is insufficient to use tactic. +[12/28/2023-08:44:11] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2463 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:44:12] [W] [TRT] Tactic Device request: 2463MB Available: 1898MB. Device memory is insufficient to use tactic. +[12/28/2023-08:44:12] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:44:16] [W] [TRT] Tactic Device request: 2462MB Available: 1899MB. Device memory is insufficient to use tactic. +[12/28/2023-08:44:16] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2462 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:44:17] [W] [TRT] Tactic Device request: 2462MB Available: 1898MB. Device memory is insufficient to use tactic. +[12/28/2023-08:44:17] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2462 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-08:45:16] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-08:45:16] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-08:45:16] [W] * GPU compute time is unstable, with coefficient of variance = 2.78032%. +[12/28/2023-08:45:16] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_m_fp16.onnx.fp16.engine.log b/yolo_nas_pose_m_fp16.onnx.fp16.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..2c14b9897f924df047b173f37afe96d283fb38e4 --- /dev/null +++ b/yolo_nas_pose_m_fp16.onnx.fp16.engine.log @@ -0,0 +1,309 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp16.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp16.onnx.fp16.engine +[12/28/2023-07:47:57] [I] === Model Options === +[12/28/2023-07:47:57] [I] Format: ONNX +[12/28/2023-07:47:57] [I] Model: yolo_nas_pose_m_fp16.onnx +[12/28/2023-07:47:57] [I] Output: +[12/28/2023-07:47:57] [I] === Build Options === +[12/28/2023-07:47:57] [I] Max batch: explicit batch +[12/28/2023-07:47:57] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-07:47:57] [I] minTiming: 1 +[12/28/2023-07:47:57] [I] avgTiming: 8 +[12/28/2023-07:47:57] [I] Precision: FP32+FP16 +[12/28/2023-07:47:57] [I] LayerPrecisions: +[12/28/2023-07:47:57] [I] Calibration: +[12/28/2023-07:47:57] [I] Refit: Disabled +[12/28/2023-07:47:57] [I] Sparsity: Disabled +[12/28/2023-07:47:57] [I] Safe mode: Disabled +[12/28/2023-07:47:57] [I] DirectIO mode: Disabled +[12/28/2023-07:47:57] [I] Restricted mode: Disabled +[12/28/2023-07:47:57] [I] Build only: Disabled +[12/28/2023-07:47:57] [I] Save engine: yolo_nas_pose_m_fp16.onnx.fp16.engine +[12/28/2023-07:47:57] [I] Load engine: +[12/28/2023-07:47:57] [I] Profiling verbosity: 0 +[12/28/2023-07:47:57] [I] Tactic sources: Using default tactic sources +[12/28/2023-07:47:57] [I] timingCacheMode: local +[12/28/2023-07:47:57] [I] timingCacheFile: +[12/28/2023-07:47:57] [I] Heuristic: Disabled +[12/28/2023-07:47:57] [I] Preview Features: Use default preview flags. +[12/28/2023-07:47:57] [I] Input(s)s format: fp32:CHW +[12/28/2023-07:47:57] [I] Output(s)s format: fp32:CHW +[12/28/2023-07:47:57] [I] Input build shapes: model +[12/28/2023-07:47:57] [I] Input calibration shapes: model +[12/28/2023-07:47:57] [I] === System Options === +[12/28/2023-07:47:57] [I] Device: 0 +[12/28/2023-07:47:57] [I] DLACore: +[12/28/2023-07:47:57] [I] Plugins: +[12/28/2023-07:47:57] [I] === Inference Options === +[12/28/2023-07:47:57] [I] Batch: Explicit +[12/28/2023-07:47:57] [I] Input inference shapes: model +[12/28/2023-07:47:57] [I] Iterations: 10 +[12/28/2023-07:47:57] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-07:47:57] [I] Sleep time: 0ms +[12/28/2023-07:47:57] [I] Idle time: 0ms +[12/28/2023-07:47:57] [I] Streams: 1 +[12/28/2023-07:47:57] [I] ExposeDMA: Disabled +[12/28/2023-07:47:57] [I] Data transfers: Enabled +[12/28/2023-07:47:57] [I] Spin-wait: Disabled +[12/28/2023-07:47:57] [I] Multithreading: Disabled +[12/28/2023-07:47:57] [I] CUDA Graph: Disabled +[12/28/2023-07:47:57] [I] Separate profiling: Disabled +[12/28/2023-07:47:57] [I] Time Deserialize: Disabled +[12/28/2023-07:47:57] [I] Time Refit: Disabled +[12/28/2023-07:47:57] [I] NVTX verbosity: 0 +[12/28/2023-07:47:57] [I] Persistent Cache Ratio: 0 +[12/28/2023-07:47:57] [I] Inputs: +[12/28/2023-07:47:57] [I] === Reporting Options === +[12/28/2023-07:47:57] [I] Verbose: Disabled +[12/28/2023-07:47:57] [I] Averages: 100 inferences +[12/28/2023-07:47:57] [I] Percentiles: 90,95,99 +[12/28/2023-07:47:57] [I] Dump refittable layers:Disabled +[12/28/2023-07:47:57] [I] Dump output: Disabled +[12/28/2023-07:47:57] [I] Profile: Disabled +[12/28/2023-07:47:57] [I] Export timing to JSON file: +[12/28/2023-07:47:57] [I] Export output to JSON file: +[12/28/2023-07:47:57] [I] Export profile to JSON file: +[12/28/2023-07:47:57] [I] +[12/28/2023-07:47:57] [I] === Device Information === +[12/28/2023-07:47:57] [I] Selected Device: Orin +[12/28/2023-07:47:57] [I] Compute Capability: 8.7 +[12/28/2023-07:47:57] [I] SMs: 8 +[12/28/2023-07:47:57] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-07:47:57] [I] Device Global Memory: 7471 MiB +[12/28/2023-07:47:57] [I] Shared Memory per SM: 164 KiB +[12/28/2023-07:47:57] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-07:47:57] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-07:47:57] [I] +[12/28/2023-07:47:57] [I] TensorRT version: 8.5.2 +[12/28/2023-07:47:58] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3022 (MiB) +[12/28/2023-07:48:00] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3325 (MiB) +[12/28/2023-07:48:00] [I] Start parsing network model +[12/28/2023-07:48:01] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-07:48:01] [I] [TRT] Input filename: yolo_nas_pose_m_fp16.onnx +[12/28/2023-07:48:01] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-07:48:01] [I] [TRT] Opset version: 17 +[12/28/2023-07:48:01] [I] [TRT] Producer name: pytorch +[12/28/2023-07:48:01] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-07:48:01] [I] [TRT] Domain: +[12/28/2023-07:48:01] [I] [TRT] Model version: 0 +[12/28/2023-07:48:01] [I] [TRT] Doc string: +[12/28/2023-07:48:01] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-07:48:01] [I] Finish parsing network model +[12/28/2023-07:48:01] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-07:48:01] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 410) [Constant] +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 411) [Constant] +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 412) [Constant] +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 208) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 216) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 247) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 270) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 301) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 324) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 332) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1...cast_boxes_to_fp32]} +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 414) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-07:48:01] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-07:48:12] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +430, now: CPU 1195, GPU 3835 (MiB) +[12/28/2023-07:48:14] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +64, now: CPU 1278, GPU 3899 (MiB) +[12/28/2023-07:48:14] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-08:44:43] [I] [TRT] Total Activation Memory: 8017417728 +[12/28/2023-08:44:44] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-08:44:54] [I] [TRT] Total Host Persistent Memory: 330944 +[12/28/2023-08:44:54] [I] [TRT] Total Device Persistent Memory: 115712 +[12/28/2023-08:44:54] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-08:44:54] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 40 MiB, GPU 2398 MiB +[12/28/2023-08:44:54] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 181 steps to complete. +[12/28/2023-08:44:54] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 85.1996ms to assign 14 blocks to 181 nodes requiring 155574784 bytes. +[12/28/2023-08:44:54] [I] [TRT] Total Activation Memory: 155574784 +[12/28/2023-08:44:59] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1625, GPU 5501 (MiB) +[12/28/2023-08:44:59] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +10, GPU +128, now: CPU 10, GPU 128 (MiB) +[12/28/2023-08:45:00] [I] Engine built in 3422.28 sec. +[12/28/2023-08:45:00] [I] [TRT] Loaded engine size: 76 MiB +[12/28/2023-08:45:00] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1317, GPU 5134 (MiB) +[12/28/2023-08:45:00] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +74, now: CPU 0, GPU 74 (MiB) +[12/28/2023-08:45:00] [I] Engine deserialized in 0.264761 sec. +[12/28/2023-08:45:00] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1317, GPU 5134 (MiB) +[12/28/2023-08:45:01] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +148, now: CPU 0, GPU 222 (MiB) +[12/28/2023-08:45:01] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-08:45:01] [I] Using random values for input onnx::Cast_0 +[12/28/2023-08:45:01] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-08:45:01] [I] Using random values for output graph2_flat_predictions +[12/28/2023-08:45:01] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-08:45:01] [I] Starting inference +[12/28/2023-08:45:16] [I] Warmup completed 2 queries over 200 ms +[12/28/2023-08:45:16] [I] Timing trace has 569 queries over 15.0311 s +[12/28/2023-08:45:16] [I] +[12/28/2023-08:45:16] [I] === Trace details === +[12/28/2023-08:45:16] [I] Trace averages of 100 runs: +[12/28/2023-08:45:16] [I] Average on 100 runs - GPU latency: 26.4061 ms - Host latency: 26.5181 ms (enqueue 26.4696 ms) +[12/28/2023-08:45:16] [I] Average on 100 runs - GPU latency: 26.2062 ms - Host latency: 26.3239 ms (enqueue 26.2673 ms) +[12/28/2023-08:45:16] [I] Average on 100 runs - GPU latency: 26.2202 ms - Host latency: 26.3345 ms (enqueue 26.2829 ms) +[12/28/2023-08:45:16] [I] Average on 100 runs - GPU latency: 26.1667 ms - Host latency: 26.2764 ms (enqueue 26.2337 ms) +[12/28/2023-08:45:16] [I] Average on 100 runs - GPU latency: 26.4629 ms - Host latency: 26.5727 ms (enqueue 26.5274 ms) +[12/28/2023-08:45:16] [I] +[12/28/2023-08:45:16] [I] === Performance summary === +[12/28/2023-08:45:16] [I] Throughput: 37.8547 qps +[12/28/2023-08:45:16] [I] Latency: min = 25.1152 ms, max = 32.1547 ms, mean = 26.393 ms, median = 26.3442 ms, percentile(90%) = 27.0601 ms, percentile(95%) = 27.4189 ms, percentile(99%) = 29.4863 ms +[12/28/2023-08:45:16] [I] Enqueue Time: min = 25.0762 ms, max = 32.1133 ms, mean = 26.3436 ms, median = 26.2896 ms, percentile(90%) = 26.9199 ms, percentile(95%) = 27.0813 ms, percentile(99%) = 29.4521 ms +[12/28/2023-08:45:16] [I] H2D Latency: min = 0.0800781 ms, max = 0.117188 ms, mean = 0.0960489 ms, median = 0.097168 ms, percentile(90%) = 0.0996094 ms, percentile(95%) = 0.100098 ms, percentile(99%) = 0.101318 ms +[12/28/2023-08:45:16] [I] GPU Compute Time: min = 25.0037 ms, max = 32.0377 ms, mean = 26.281 ms, median = 26.2241 ms, percentile(90%) = 26.9429 ms, percentile(95%) = 27.3164 ms, percentile(99%) = 29.3721 ms +[12/28/2023-08:45:16] [I] D2H Latency: min = 0.00292969 ms, max = 0.0490723 ms, mean = 0.0159815 ms, median = 0.0146484 ms, percentile(90%) = 0.0253906 ms, percentile(95%) = 0.0271912 ms, percentile(99%) = 0.0324707 ms +[12/28/2023-08:45:16] [I] Total Host Walltime: 15.0311 s +[12/28/2023-08:45:16] [I] Total GPU Compute Time: 14.9539 s +[12/28/2023-08:45:16] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-08:45:16] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp16.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp16.onnx.fp16.engine diff --git a/yolo_nas_pose_m_fp16.onnx.int8.engine.err b/yolo_nas_pose_m_fp16.onnx.int8.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..ebb955a13d194134630592a441850c62eb80406b --- /dev/null +++ b/yolo_nas_pose_m_fp16.onnx.int8.engine.err @@ -0,0 +1,8 @@ +[12/28/2023-10:38:24] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-10:38:24] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-10:38:24] [E] Error[4]: [network.cpp::validate::2925] Error Code 4: Internal Error (fp16 precision has been set for a layer or layer output, but fp16 is not configured in the builder) +[12/28/2023-10:38:24] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/28/2023-10:38:24] [E] Engine could not be created from network +[12/28/2023-10:38:24] [E] Building engine failed +[12/28/2023-10:38:24] [E] Failed to create engine from model or file. +[12/28/2023-10:38:24] [E] Engine set up failed diff --git a/yolo_nas_pose_m_fp16.onnx.int8.engine.log b/yolo_nas_pose_m_fp16.onnx.int8.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..a393f445e74fd9e6665f656c6974ce2088ec8ea5 --- /dev/null +++ b/yolo_nas_pose_m_fp16.onnx.int8.engine.log @@ -0,0 +1,92 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp16.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp16.onnx.int8.engine +[12/28/2023-10:38:12] [I] === Model Options === +[12/28/2023-10:38:12] [I] Format: ONNX +[12/28/2023-10:38:12] [I] Model: yolo_nas_pose_m_fp16.onnx +[12/28/2023-10:38:12] [I] Output: +[12/28/2023-10:38:12] [I] === Build Options === +[12/28/2023-10:38:12] [I] Max batch: explicit batch +[12/28/2023-10:38:12] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-10:38:12] [I] minTiming: 1 +[12/28/2023-10:38:12] [I] avgTiming: 8 +[12/28/2023-10:38:12] [I] Precision: FP32+INT8 +[12/28/2023-10:38:12] [I] LayerPrecisions: +[12/28/2023-10:38:12] [I] Calibration: Dynamic +[12/28/2023-10:38:12] [I] Refit: Disabled +[12/28/2023-10:38:12] [I] Sparsity: Disabled +[12/28/2023-10:38:12] [I] Safe mode: Disabled +[12/28/2023-10:38:12] [I] DirectIO mode: Disabled +[12/28/2023-10:38:12] [I] Restricted mode: Disabled +[12/28/2023-10:38:12] [I] Build only: Disabled +[12/28/2023-10:38:12] [I] Save engine: yolo_nas_pose_m_fp16.onnx.int8.engine +[12/28/2023-10:38:12] [I] Load engine: +[12/28/2023-10:38:12] [I] Profiling verbosity: 0 +[12/28/2023-10:38:12] [I] Tactic sources: Using default tactic sources +[12/28/2023-10:38:12] [I] timingCacheMode: local +[12/28/2023-10:38:12] [I] timingCacheFile: +[12/28/2023-10:38:12] [I] Heuristic: Disabled +[12/28/2023-10:38:12] [I] Preview Features: Use default preview flags. +[12/28/2023-10:38:12] [I] Input(s)s format: fp32:CHW +[12/28/2023-10:38:12] [I] Output(s)s format: fp32:CHW +[12/28/2023-10:38:12] [I] Input build shapes: model +[12/28/2023-10:38:12] [I] Input calibration shapes: model +[12/28/2023-10:38:12] [I] === System Options === +[12/28/2023-10:38:12] [I] Device: 0 +[12/28/2023-10:38:12] [I] DLACore: +[12/28/2023-10:38:12] [I] Plugins: +[12/28/2023-10:38:12] [I] === Inference Options === +[12/28/2023-10:38:12] [I] Batch: Explicit +[12/28/2023-10:38:12] [I] Input inference shapes: model +[12/28/2023-10:38:12] [I] Iterations: 10 +[12/28/2023-10:38:12] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-10:38:12] [I] Sleep time: 0ms +[12/28/2023-10:38:12] [I] Idle time: 0ms +[12/28/2023-10:38:12] [I] Streams: 1 +[12/28/2023-10:38:12] [I] ExposeDMA: Disabled +[12/28/2023-10:38:12] [I] Data transfers: Enabled +[12/28/2023-10:38:12] [I] Spin-wait: Disabled +[12/28/2023-10:38:12] [I] Multithreading: Disabled +[12/28/2023-10:38:12] [I] CUDA Graph: Disabled +[12/28/2023-10:38:12] [I] Separate profiling: Disabled +[12/28/2023-10:38:12] [I] Time Deserialize: Disabled +[12/28/2023-10:38:12] [I] Time Refit: Disabled +[12/28/2023-10:38:12] [I] NVTX verbosity: 0 +[12/28/2023-10:38:12] [I] Persistent Cache Ratio: 0 +[12/28/2023-10:38:12] [I] Inputs: +[12/28/2023-10:38:12] [I] === Reporting Options === +[12/28/2023-10:38:12] [I] Verbose: Disabled +[12/28/2023-10:38:12] [I] Averages: 100 inferences +[12/28/2023-10:38:12] [I] Percentiles: 90,95,99 +[12/28/2023-10:38:12] [I] Dump refittable layers:Disabled +[12/28/2023-10:38:12] [I] Dump output: Disabled +[12/28/2023-10:38:12] [I] Profile: Disabled +[12/28/2023-10:38:12] [I] Export timing to JSON file: +[12/28/2023-10:38:12] [I] Export output to JSON file: +[12/28/2023-10:38:12] [I] Export profile to JSON file: +[12/28/2023-10:38:12] [I] +[12/28/2023-10:38:12] [I] === Device Information === +[12/28/2023-10:38:12] [I] Selected Device: Orin +[12/28/2023-10:38:12] [I] Compute Capability: 8.7 +[12/28/2023-10:38:12] [I] SMs: 8 +[12/28/2023-10:38:12] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-10:38:12] [I] Device Global Memory: 7471 MiB +[12/28/2023-10:38:12] [I] Shared Memory per SM: 164 KiB +[12/28/2023-10:38:12] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-10:38:12] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-10:38:12] [I] +[12/28/2023-10:38:12] [I] TensorRT version: 8.5.2 +[12/28/2023-10:38:18] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2980 (MiB) +[12/28/2023-10:38:23] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +286, now: CPU 574, GPU 3285 (MiB) +[12/28/2023-10:38:23] [I] Start parsing network model +[12/28/2023-10:38:24] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-10:38:24] [I] [TRT] Input filename: yolo_nas_pose_m_fp16.onnx +[12/28/2023-10:38:24] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-10:38:24] [I] [TRT] Opset version: 17 +[12/28/2023-10:38:24] [I] [TRT] Producer name: pytorch +[12/28/2023-10:38:24] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-10:38:24] [I] [TRT] Domain: +[12/28/2023-10:38:24] [I] [TRT] Model version: 0 +[12/28/2023-10:38:24] [I] [TRT] Doc string: +[12/28/2023-10:38:24] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-10:38:24] [I] Finish parsing network model +[12/28/2023-10:38:24] [I] FP32 and INT8 precisions have been specified - more performance might be enabled by additionally specifying --fp16 or --best +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp16.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp16.onnx.int8.engine diff --git a/yolo_nas_pose_m_fp16.onnx.usage.txt b/yolo_nas_pose_m_fp16.onnx.usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..6a54b1cb4dc7d19c5575cabdc77ebdce5ecd9b45 --- /dev/null +++ b/yolo_nas_pose_m_fp16.onnx.usage.txt @@ -0,0 +1,58 @@ + +Model exported successfully to yolo_nas_pose_m_fp16.onnx +Model expects input image of shape [1, 3, 640, 640] +Input image dtype is torch.uint8 + +Exported model already contains preprocessing (normalization) step, so you don't need to do it manually. +Preprocessing steps to be applied to input image are: +Sequential( + (0): CastTensorTo(dtype=torch.float16) + (1): ChannelSelect(channels_indexes=tensor([2, 1, 0], device='cuda:0')) + (2): ApplyMeanStd(mean=[0.], scale=[255.]) +) + + +Exported model contains postprocessing (NMS) step with the following parameters: + num_pre_nms_predictions=1000 + max_predictions_per_image=10 + nms_threshold=0.5 + confidence_threshold=0.15 + output_predictions_format=flat + + +Exported model is in ONNX format and can be used with ONNXRuntime +To run inference with ONNXRuntime, please use the following code snippet: + + import onnxruntime + import numpy as np + session = onnxruntime.InferenceSession("yolo_nas_pose_m_fp16.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + example_input_image = np.zeros((1, 3, 640, 640)).astype(np.uint8) + predictions = session.run(outputs, {inputs[0]: example_input_image}) + +Exported model can also be used with TensorRT +To run inference with TensorRT, please see TensorRT deployment documentation +You can benchmark the model using the following code snippet: + + trtexec --onnx=yolo_nas_pose_m_fp16.onnx --fp16 --avgRuns=100 --duration=15 + + +Exported model has predictions in flat format: + +# flat_predictions is a 2D array of [N,K] shape +# Each row represents (image_index, x_min, y_min, x_max, y_max, confidence, joints...) +# Please note all values are floats, so you have to convert them to integers if needed + +[flat_predictions] = predictions +pred_bboxes = flat_predictions[:, 1:5] +pred_scores = flat_predictions[:, 5] +pred_joints = flat_predictions[:, 6:].reshape((len(pred_bboxes), -1, 3)) +for i in range(len(pred_bboxes)): + confidence = pred_scores[i] + x_min, y_min, x_max, y_max = pred_bboxes[i] + print(f"Detected pose with confidence={{confidence}}, x_min={{x_min}}, y_min={{y_min}}, x_max={{x_max}}, y_max={{y_max}}") + for joint_index, (x, y, confidence) in enumerate(pred_joints[i]):") + print(f"Joint {{joint_index}} has coordinates x={{x}}, y={{y}}, confidence={{confidence}}") + diff --git a/yolo_nas_pose_m_fp32.onnx b/yolo_nas_pose_m_fp32.onnx new file mode 100644 index 0000000000000000000000000000000000000000..6fce8263a55fe6746c671f061d5274a2f40c6ea9 --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d535c1b16741c82edb81d0eaf11604ed2c4cd4e094a3cedb0ac4d175885b0502 +size 155996929 diff --git a/yolo_nas_pose_m_fp32.onnx.best.engine b/yolo_nas_pose_m_fp32.onnx.best.engine new file mode 100644 index 0000000000000000000000000000000000000000..a4514eca203ff4fc24c5aa7a637d2e3cf4d75bb4 --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.best.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aacf81075603bac1a47cc9877eee270592301a5238478d61a6d8b5f144825ad +size 41362745 diff --git a/yolo_nas_pose_m_fp32.onnx.best.engine.err b/yolo_nas_pose_m_fp32.onnx.best.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..9a25904dfb63000ca21be466805e2fedfa53b6f9 --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.best.engine.err @@ -0,0 +1,495 @@ +[12/28/2023-05:05:18] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-05:05:18] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-05:05:19] [W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool. +[12/28/2023-05:32:46] [W] [TRT] Tactic Device request: 6262MB Available: 3151MB. Device memory is insufficient to use tactic. +[12/28/2023-05:32:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6262 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:32:46] [W] [TRT] Tactic Device request: 6262MB Available: 3152MB. Device memory is insufficient to use tactic. +[12/28/2023-05:32:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6262 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:32:47] [W] [TRT] Tactic Device request: 6262MB Available: 3152MB. Device memory is insufficient to use tactic. +[12/28/2023-05:32:47] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6262 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:32:49] [W] [TRT] Tactic Device request: 6251MB Available: 3152MB. Device memory is insufficient to use tactic. +[12/28/2023-05:32:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6251 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:32:49] [W] [TRT] Tactic Device request: 6251MB Available: 3152MB. Device memory is insufficient to use tactic. +[12/28/2023-05:32:49] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6251 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:41:04] [W] [TRT] Tactic Device request: 4711MB Available: 2293MB. Device memory is insufficient to use tactic. +[12/28/2023-05:41:04] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:41:04] [W] [TRT] Tactic Device request: 4711MB Available: 2293MB. Device memory is insufficient to use tactic. +[12/28/2023-05:41:04] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:41:04] [W] [TRT] Tactic Device request: 4711MB Available: 2293MB. Device memory is insufficient to use tactic. +[12/28/2023-05:41:04] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:41:06] [W] [TRT] Tactic Device request: 4701MB Available: 2293MB. Device memory is insufficient to use tactic. +[12/28/2023-05:41:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4701 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:41:06] [W] [TRT] Tactic Device request: 4701MB Available: 2293MB. Device memory is insufficient to use tactic. +[12/28/2023-05:41:06] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4701 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:41:25] [W] [TRT] Tactic Device request: 6275MB Available: 2292MB. Device memory is insufficient to use tactic. +[12/28/2023-05:41:25] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:41:26] [W] [TRT] Tactic Device request: 6275MB Available: 2292MB. Device memory is insufficient to use tactic. +[12/28/2023-05:41:26] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6275 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:41:26] [W] [TRT] Tactic Device request: 6275MB Available: 2292MB. Device memory is insufficient to use tactic. +[12/28/2023-05:41:26] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:41:27] [W] [TRT] Tactic Device request: 6270MB Available: 2292MB. Device memory is insufficient to use tactic. +[12/28/2023-05:41:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:41:27] [W] [TRT] Tactic Device request: 6270MB Available: 2292MB. Device memory is insufficient to use tactic. +[12/28/2023-05:41:27] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:11] [W] [TRT] Tactic Device request: 7056MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:11] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:11] [W] [TRT] Tactic Device request: 7056MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:11] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:12] [W] [TRT] Tactic Device request: 7056MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:12] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:13] [W] [TRT] Tactic Device request: 7050MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:13] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7050 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:14] [W] [TRT] Tactic Device request: 7050MB Available: 2128MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:14] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 7050 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:34] [W] [TRT] Tactic Device request: 6354MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:35] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:35] [W] [TRT] Tactic Device request: 6354MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:35] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6354 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:35] [W] [TRT] Tactic Device request: 6354MB Available: 2127MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:35] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:37] [W] [TRT] Tactic Device request: 6351MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:37] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6351 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:37] [W] [TRT] Tactic Device request: 6351MB Available: 2129MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:37] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6351 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:41] [W] [TRT] Tactic Device request: 2127MB Available: 2101MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:41] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2127 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:42] [W] [TRT] Tactic Device request: 2127MB Available: 2101MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:42] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2127 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:44] [W] [TRT] Tactic Device request: 2124MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:45] [W] [TRT] Tactic Device request: 2124MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:45] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:48] [W] [TRT] Tactic Device request: 2125MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:48] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:48] [W] [TRT] Tactic Device request: 2125MB Available: 2099MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:48] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2125 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:48] [W] [TRT] Tactic Device request: 2125MB Available: 2099MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:49] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:51] [W] [TRT] Tactic Device request: 2124MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:51] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:51] [W] [TRT] Tactic Device request: 2124MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:51] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:54] [W] [TRT] Tactic Device request: 2125MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:54] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:55] [W] [TRT] Tactic Device request: 2125MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:55] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2125 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:55] [W] [TRT] Tactic Device request: 2125MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:55] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:57] [W] [TRT] Tactic Device request: 2124MB Available: 2099MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:57] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:52:58] [W] [TRT] Tactic Device request: 2124MB Available: 2100MB. Device memory is insufficient to use tactic. +[12/28/2023-05:52:58] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:08] [W] [TRT] Tactic Device request: 2394MB Available: 2002MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:08] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:08] [W] [TRT] Tactic Device request: 2394MB Available: 2001MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:08] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:09] [W] [TRT] Tactic Device request: 2392MB Available: 1999MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:09] [W] [TRT] Tactic Device request: 2392MB Available: 2000MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:09] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:09] [W] [TRT] Tactic Device request: 2392MB Available: 2000MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:09] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:10] [W] [TRT] Tactic Device request: 2391MB Available: 2020MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2391 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:11] [W] [TRT] Tactic Device request: 2391MB Available: 2020MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:11] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2391 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:11] [W] [TRT] Tactic Device request: 2390MB Available: 2020MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:11] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:11] [W] [TRT] Tactic Device request: 2390MB Available: 2018MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:11] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:28] [W] [TRT] Tactic Device request: 4906MB Available: 2020MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:28] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:28] [W] [TRT] Tactic Device request: 4906MB Available: 2020MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:28] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4906 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:28] [W] [TRT] Tactic Device request: 4906MB Available: 2020MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:28] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:30] [W] [TRT] Tactic Device request: 4905MB Available: 2021MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:30] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4905 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:07:30] [W] [TRT] Tactic Device request: 4905MB Available: 2021MB. Device memory is insufficient to use tactic. +[12/28/2023-06:07:30] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4905 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:15:33] [W] [TRT] Tactic Device request: 4906MB Available: 1883MB. Device memory is insufficient to use tactic. +[12/28/2023-06:15:33] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:15:33] [W] [TRT] Tactic Device request: 4906MB Available: 1894MB. Device memory is insufficient to use tactic. +[12/28/2023-06:15:33] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4906 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:15:33] [W] [TRT] Tactic Device request: 4906MB Available: 1894MB. Device memory is insufficient to use tactic. +[12/28/2023-06:15:33] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:15:34] [W] [TRT] Tactic Device request: 4905MB Available: 1887MB. Device memory is insufficient to use tactic. +[12/28/2023-06:15:34] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4905 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:15:34] [W] [TRT] Tactic Device request: 4905MB Available: 1887MB. Device memory is insufficient to use tactic. +[12/28/2023-06:15:35] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4905 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:15:37] [W] [TRT] Tactic Device request: 2457MB Available: 1889MB. Device memory is insufficient to use tactic. +[12/28/2023-06:15:37] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:15:37] [W] [TRT] Tactic Device request: 2457MB Available: 1889MB. Device memory is insufficient to use tactic. +[12/28/2023-06:15:37] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:15:37] [W] [TRT] Tactic Device request: 2457MB Available: 1888MB. Device memory is insufficient to use tactic. +[12/28/2023-06:15:37] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:15:38] [W] [TRT] Tactic Device request: 2456MB Available: 1888MB. Device memory is insufficient to use tactic. +[12/28/2023-06:15:38] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:15:38] [W] [TRT] Tactic Device request: 2456MB Available: 1888MB. Device memory is insufficient to use tactic. +[12/28/2023-06:15:38] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:16:06] [W] [TRT] Tactic Device request: 3587MB Available: 1811MB. Device memory is insufficient to use tactic. +[12/28/2023-06:16:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:16:06] [W] [TRT] Tactic Device request: 3587MB Available: 1810MB. Device memory is insufficient to use tactic. +[12/28/2023-06:16:06] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:16:06] [W] [TRT] Tactic Device request: 3587MB Available: 1810MB. Device memory is insufficient to use tactic. +[12/28/2023-06:16:06] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:16:08] [W] [TRT] Tactic Device request: 3585MB Available: 1810MB. Device memory is insufficient to use tactic. +[12/28/2023-06:16:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3585 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:16:08] [W] [TRT] Tactic Device request: 3585MB Available: 1810MB. Device memory is insufficient to use tactic. +[12/28/2023-06:16:08] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3585 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:16:10] [W] [TRT] Tactic Device request: 2385MB Available: 1811MB. Device memory is insufficient to use tactic. +[12/28/2023-06:16:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:16:10] [W] [TRT] Tactic Device request: 2385MB Available: 1811MB. Device memory is insufficient to use tactic. +[12/28/2023-06:16:10] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:16:10] [W] [TRT] Tactic Device request: 2385MB Available: 1811MB. Device memory is insufficient to use tactic. +[12/28/2023-06:16:10] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:16:11] [W] [TRT] Tactic Device request: 2384MB Available: 1810MB. Device memory is insufficient to use tactic. +[12/28/2023-06:16:11] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2384 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:16:11] [W] [TRT] Tactic Device request: 2384MB Available: 1810MB. Device memory is insufficient to use tactic. +[12/28/2023-06:16:11] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2384 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:24:26] [W] [TRT] Tactic Device request: 3556MB Available: 1659MB. Device memory is insufficient to use tactic. +[12/28/2023-06:24:26] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:24:26] [W] [TRT] Tactic Device request: 3556MB Available: 1659MB. Device memory is insufficient to use tactic. +[12/28/2023-06:24:26] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:24:27] [W] [TRT] Tactic Device request: 3556MB Available: 1659MB. Device memory is insufficient to use tactic. +[12/28/2023-06:24:27] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:24:28] [W] [TRT] Tactic Device request: 3551MB Available: 1659MB. Device memory is insufficient to use tactic. +[12/28/2023-06:24:28] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3551 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:24:28] [W] [TRT] Tactic Device request: 3551MB Available: 1659MB. Device memory is insufficient to use tactic. +[12/28/2023-06:24:28] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3551 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:34:52] [W] [TRT] Tactic Device request: 1581MB Available: 1492MB. Device memory is insufficient to use tactic. +[12/28/2023-06:34:52] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1581 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:34:52] [W] [TRT] Tactic Device request: 1581MB Available: 1492MB. Device memory is insufficient to use tactic. +[12/28/2023-06:34:52] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1581 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:34:53] [W] [TRT] Tactic Device request: 1579MB Available: 1493MB. Device memory is insufficient to use tactic. +[12/28/2023-06:34:53] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1579 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:34:53] [W] [TRT] Tactic Device request: 1579MB Available: 1493MB. Device memory is insufficient to use tactic. +[12/28/2023-06:34:53] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 1579 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:34:54] [W] [TRT] Tactic Device request: 2359MB Available: 1494MB. Device memory is insufficient to use tactic. +[12/28/2023-06:34:54] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:34:55] [W] [TRT] Tactic Device request: 2359MB Available: 1494MB. Device memory is insufficient to use tactic. +[12/28/2023-06:34:55] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2359 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:34:55] [W] [TRT] Tactic Device request: 2359MB Available: 1493MB. Device memory is insufficient to use tactic. +[12/28/2023-06:34:55] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:34:56] [W] [TRT] Tactic Device request: 2355MB Available: 1494MB. Device memory is insufficient to use tactic. +[12/28/2023-06:34:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2355 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:34:56] [W] [TRT] Tactic Device request: 2355MB Available: 1494MB. Device memory is insufficient to use tactic. +[12/28/2023-06:34:56] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2355 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:35:03] [W] [TRT] Tactic Device request: 2362MB Available: 1565MB. Device memory is insufficient to use tactic. +[12/28/2023-06:35:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2362 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:35:03] [W] [TRT] Tactic Device request: 2362MB Available: 1564MB. Device memory is insufficient to use tactic. +[12/28/2023-06:35:03] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2362 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:35:03] [W] [TRT] Tactic Device request: 2362MB Available: 1564MB. Device memory is insufficient to use tactic. +[12/28/2023-06:35:03] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2362 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:35:06] [W] [TRT] Tactic Device request: 2357MB Available: 1724MB. Device memory is insufficient to use tactic. +[12/28/2023-06:35:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2357 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:35:06] [W] [TRT] Tactic Device request: 2357MB Available: 1723MB. Device memory is insufficient to use tactic. +[12/28/2023-06:35:06] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2357 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:35:09] [W] [TRT] Tactic Device request: 2359MB Available: 1742MB. Device memory is insufficient to use tactic. +[12/28/2023-06:35:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:35:09] [W] [TRT] Tactic Device request: 2359MB Available: 1742MB. Device memory is insufficient to use tactic. +[12/28/2023-06:35:09] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2359 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:35:10] [W] [TRT] Tactic Device request: 2359MB Available: 1741MB. Device memory is insufficient to use tactic. +[12/28/2023-06:35:10] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:35:12] [W] [TRT] Tactic Device request: 2356MB Available: 1743MB. Device memory is insufficient to use tactic. +[12/28/2023-06:35:12] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2356 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:35:12] [W] [TRT] Tactic Device request: 2356MB Available: 1743MB. Device memory is insufficient to use tactic. +[12/28/2023-06:35:13] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2356 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:04] [W] [TRT] Tactic Device request: 3575MB Available: 2161MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:04] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3575 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:04] [W] [TRT] Tactic Device request: 3575MB Available: 2161MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:04] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3575 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:04] [W] [TRT] Tactic Device request: 3575MB Available: 2162MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:04] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3575 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:05] [W] [TRT] Tactic Device request: 3572MB Available: 2160MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:05] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3572 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:06] [W] [TRT] Tactic Device request: 3572MB Available: 2160MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:06] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3572 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:14] [W] [TRT] Tactic Device request: 2390MB Available: 1984MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:14] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:14] [W] [TRT] Tactic Device request: 2390MB Available: 1901MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:14] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:15] [W] [TRT] Tactic Device request: 2390MB Available: 1897MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:15] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:18] [W] [TRT] Tactic Device request: 2387MB Available: 1895MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:18] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2387 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:18] [W] [TRT] Tactic Device request: 2387MB Available: 1878MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:18] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2387 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:21] [W] [TRT] Tactic Device request: 2388MB Available: 1869MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2388 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:22] [W] [TRT] Tactic Device request: 2388MB Available: 1826MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:22] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2388 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:22] [W] [TRT] Tactic Device request: 2388MB Available: 1822MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:22] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2388 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:25] [W] [TRT] Tactic Device request: 2386MB Available: 1820MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:25] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2386 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:25] [W] [TRT] Tactic Device request: 2386MB Available: 1796MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:25] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2386 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:29] [W] [TRT] Tactic Device request: 4775MB Available: 1792MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4775 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:30] [W] [TRT] Tactic Device request: 4775MB Available: 1460MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:30] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4775 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:31] [W] [TRT] Tactic Device request: 4775MB Available: 1452MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:31] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4775 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:36] [W] [TRT] Tactic Device request: 4772MB Available: 1450MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:36] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4772 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:36] [W] [TRT] Tactic Device request: 4772MB Available: 1402MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:36] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4772 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:42] [W] [TRT] Tactic Device request: 4774MB Available: 1429MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:42] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:42] [W] [TRT] Tactic Device request: 4774MB Available: 1387MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:42] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4774 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:43] [W] [TRT] Tactic Device request: 4774MB Available: 1382MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:43] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:48] [W] [TRT] Tactic Device request: 4771MB Available: 1378MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:48] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4771 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:49] [W] [TRT] Tactic Device request: 4771MB Available: 1372MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:49] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4771 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:54] [W] [TRT] Tactic Device request: 1637MB Available: 1372MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:54] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:54] [W] [TRT] Tactic Device request: 1637MB Available: 1372MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:54] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1637 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:54] [W] [TRT] Tactic Device request: 1637MB Available: 1372MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:54] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:55] [W] [TRT] Tactic Device request: 1636MB Available: 1372MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:55] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1636 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:44:55] [W] [TRT] Tactic Device request: 1636MB Available: 1371MB. Device memory is insufficient to use tactic. +[12/28/2023-06:44:55] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 1636 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:45:07] [W] [TRT] Tactic Device request: 4774MB Available: 1354MB. Device memory is insufficient to use tactic. +[12/28/2023-06:45:07] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:45:08] [W] [TRT] Tactic Device request: 4774MB Available: 1364MB. Device memory is insufficient to use tactic. +[12/28/2023-06:45:08] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4774 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:45:09] [W] [TRT] Tactic Device request: 4774MB Available: 1364MB. Device memory is insufficient to use tactic. +[12/28/2023-06:45:09] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:45:14] [W] [TRT] Tactic Device request: 4771MB Available: 1366MB. Device memory is insufficient to use tactic. +[12/28/2023-06:45:14] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4771 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:45:15] [W] [TRT] Tactic Device request: 4771MB Available: 1377MB. Device memory is insufficient to use tactic. +[12/28/2023-06:45:15] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4771 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:29] [W] [TRT] Tactic Device request: 1638MB Available: 1160MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:29] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1638 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:29] [W] [TRT] Tactic Device request: 1638MB Available: 1157MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:29] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1638 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:29] [W] [TRT] Tactic Device request: 1637MB Available: 1155MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:29] [W] [TRT] Tactic Device request: 1637MB Available: 1156MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:29] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 1637 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:31] [W] [TRT] Tactic Device request: 2454MB Available: 1154MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:31] [W] [TRT] Tactic Device request: 2454MB Available: 1154MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:31] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2454 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:32] [W] [TRT] Tactic Device request: 2454MB Available: 1154MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:32] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:33] [W] [TRT] Tactic Device request: 2453MB Available: 1154MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:33] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2453 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:33] [W] [TRT] Tactic Device request: 2453MB Available: 1154MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:33] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2453 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:35] [W] [TRT] Tactic Device request: 2463MB Available: 1154MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:35] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:35] [W] [TRT] Tactic Device request: 1231MB Available: 1154MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:35] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 1231 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:35] [W] [TRT] Tactic Device request: 2463MB Available: 1154MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:35] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2463 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:35] [W] [TRT] Tactic Device request: 1231MB Available: 1154MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:35] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 1231 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:36] [W] [TRT] Tactic Device request: 2463MB Available: 1152MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:36] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:36] [W] [TRT] Tactic Device request: 1231MB Available: 1152MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:36] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 1231 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:39] [W] [TRT] Tactic Device request: 2462MB Available: 1152MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:39] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2462 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:39] [W] [TRT] Tactic Device request: 1229MB Available: 1152MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:39] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 1229 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:39] [W] [TRT] Tactic Device request: 2462MB Available: 1152MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:39] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2462 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:39] [W] [TRT] Tactic Device request: 1229MB Available: 1152MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:39] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 1229 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:43] [W] [TRT] Tactic Device request: 2463MB Available: 1153MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:43] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:43] [W] [TRT] Tactic Device request: 1230MB Available: 1153MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:43] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 1230 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:43] [W] [TRT] Tactic Device request: 2463MB Available: 1153MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:44] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2463 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:44] [W] [TRT] Tactic Device request: 1230MB Available: 1153MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:44] [W] [TRT] Skipping tactic 10 due to insufficient memory on requested size of 1230 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:44] [W] [TRT] Tactic Device request: 2463MB Available: 1152MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:44] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:44] [W] [TRT] Tactic Device request: 1230MB Available: 1152MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:44] [W] [TRT] Skipping tactic 16 due to insufficient memory on requested size of 1230 detected for tactic 0x0000000000000075. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:47] [W] [TRT] Tactic Device request: 2462MB Available: 1152MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2462 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:47] [W] [TRT] Tactic Device request: 1229MB Available: 1152MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:47] [W] [TRT] Skipping tactic 4 due to insufficient memory on requested size of 1229 detected for tactic 0x0000000000000005. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:47] [W] [TRT] Tactic Device request: 2462MB Available: 1152MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:47] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2462 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:55:47] [W] [TRT] Tactic Device request: 1229MB Available: 1152MB. Device memory is insufficient to use tactic. +[12/28/2023-06:55:47] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 1229 detected for tactic 0x000000000000003d. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-06:56:41] [W] [TRT] TensorRT encountered issues when converting weights between types and that could affect accuracy. +[12/28/2023-06:56:41] [W] [TRT] If this is not the desired behavior, please modify the weights or retrain with regularization to adjust the magnitude of the weights. +[12/28/2023-06:56:41] [W] [TRT] Check verbose logs for the list of affected weights. +[12/28/2023-06:56:41] [W] [TRT] - 103 weights are affected by this issue: Detected subnormal FP16 values. +[12/28/2023-06:56:41] [W] [TRT] - 33 weights are affected by this issue: Detected values less than smallest positive FP16 subnormal value and converted them to the FP16 minimum subnormalized value. +[12/28/2023-06:56:58] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-06:56:58] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-06:56:58] [W] * GPU compute time is unstable, with coefficient of variance = 4.26897%. +[12/28/2023-06:56:58] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_m_fp32.onnx.best.engine.log b/yolo_nas_pose_m_fp32.onnx.best.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..41a5934211a49f295de1b8b858bb439aa31f23b9 --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.best.engine.log @@ -0,0 +1,312 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp32.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp32.onnx.best.engine +[12/28/2023-05:05:07] [I] === Model Options === +[12/28/2023-05:05:07] [I] Format: ONNX +[12/28/2023-05:05:07] [I] Model: yolo_nas_pose_m_fp32.onnx +[12/28/2023-05:05:07] [I] Output: +[12/28/2023-05:05:07] [I] === Build Options === +[12/28/2023-05:05:07] [I] Max batch: explicit batch +[12/28/2023-05:05:07] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-05:05:07] [I] minTiming: 1 +[12/28/2023-05:05:07] [I] avgTiming: 8 +[12/28/2023-05:05:07] [I] Precision: FP32+FP16+INT8 +[12/28/2023-05:05:07] [I] LayerPrecisions: +[12/28/2023-05:05:07] [I] Calibration: Dynamic +[12/28/2023-05:05:07] [I] Refit: Disabled +[12/28/2023-05:05:07] [I] Sparsity: Disabled +[12/28/2023-05:05:07] [I] Safe mode: Disabled +[12/28/2023-05:05:07] [I] DirectIO mode: Disabled +[12/28/2023-05:05:07] [I] Restricted mode: Disabled +[12/28/2023-05:05:07] [I] Build only: Disabled +[12/28/2023-05:05:07] [I] Save engine: yolo_nas_pose_m_fp32.onnx.best.engine +[12/28/2023-05:05:07] [I] Load engine: +[12/28/2023-05:05:07] [I] Profiling verbosity: 0 +[12/28/2023-05:05:07] [I] Tactic sources: Using default tactic sources +[12/28/2023-05:05:07] [I] timingCacheMode: local +[12/28/2023-05:05:07] [I] timingCacheFile: +[12/28/2023-05:05:07] [I] Heuristic: Disabled +[12/28/2023-05:05:07] [I] Preview Features: Use default preview flags. +[12/28/2023-05:05:07] [I] Input(s)s format: fp32:CHW +[12/28/2023-05:05:07] [I] Output(s)s format: fp32:CHW +[12/28/2023-05:05:07] [I] Input build shapes: model +[12/28/2023-05:05:07] [I] Input calibration shapes: model +[12/28/2023-05:05:07] [I] === System Options === +[12/28/2023-05:05:07] [I] Device: 0 +[12/28/2023-05:05:07] [I] DLACore: +[12/28/2023-05:05:07] [I] Plugins: +[12/28/2023-05:05:07] [I] === Inference Options === +[12/28/2023-05:05:07] [I] Batch: Explicit +[12/28/2023-05:05:07] [I] Input inference shapes: model +[12/28/2023-05:05:07] [I] Iterations: 10 +[12/28/2023-05:05:07] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-05:05:07] [I] Sleep time: 0ms +[12/28/2023-05:05:07] [I] Idle time: 0ms +[12/28/2023-05:05:07] [I] Streams: 1 +[12/28/2023-05:05:07] [I] ExposeDMA: Disabled +[12/28/2023-05:05:07] [I] Data transfers: Enabled +[12/28/2023-05:05:07] [I] Spin-wait: Disabled +[12/28/2023-05:05:07] [I] Multithreading: Disabled +[12/28/2023-05:05:07] [I] CUDA Graph: Disabled +[12/28/2023-05:05:07] [I] Separate profiling: Disabled +[12/28/2023-05:05:07] [I] Time Deserialize: Disabled +[12/28/2023-05:05:07] [I] Time Refit: Disabled +[12/28/2023-05:05:07] [I] NVTX verbosity: 0 +[12/28/2023-05:05:07] [I] Persistent Cache Ratio: 0 +[12/28/2023-05:05:07] [I] Inputs: +[12/28/2023-05:05:07] [I] === Reporting Options === +[12/28/2023-05:05:07] [I] Verbose: Disabled +[12/28/2023-05:05:07] [I] Averages: 100 inferences +[12/28/2023-05:05:07] [I] Percentiles: 90,95,99 +[12/28/2023-05:05:07] [I] Dump refittable layers:Disabled +[12/28/2023-05:05:07] [I] Dump output: Disabled +[12/28/2023-05:05:07] [I] Profile: Disabled +[12/28/2023-05:05:07] [I] Export timing to JSON file: +[12/28/2023-05:05:07] [I] Export output to JSON file: +[12/28/2023-05:05:07] [I] Export profile to JSON file: +[12/28/2023-05:05:07] [I] +[12/28/2023-05:05:07] [I] === Device Information === +[12/28/2023-05:05:07] [I] Selected Device: Orin +[12/28/2023-05:05:07] [I] Compute Capability: 8.7 +[12/28/2023-05:05:07] [I] SMs: 8 +[12/28/2023-05:05:07] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-05:05:07] [I] Device Global Memory: 7471 MiB +[12/28/2023-05:05:07] [I] Shared Memory per SM: 164 KiB +[12/28/2023-05:05:07] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-05:05:07] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-05:05:07] [I] +[12/28/2023-05:05:07] [I] TensorRT version: 8.5.2 +[12/28/2023-05:05:12] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2997 (MiB) +[12/28/2023-05:05:16] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3304 (MiB) +[12/28/2023-05:05:16] [I] Start parsing network model +[12/28/2023-05:05:18] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-05:05:18] [I] [TRT] Input filename: yolo_nas_pose_m_fp32.onnx +[12/28/2023-05:05:18] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-05:05:18] [I] [TRT] Opset version: 17 +[12/28/2023-05:05:18] [I] [TRT] Producer name: pytorch +[12/28/2023-05:05:18] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-05:05:18] [I] [TRT] Domain: +[12/28/2023-05:05:18] [I] [TRT] Model version: 0 +[12/28/2023-05:05:18] [I] [TRT] Doc string: +[12/28/2023-05:05:18] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-05:05:19] [I] Finish parsing network model +[12/28/2023-05:05:19] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-05:05:19] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 407) [Constant] +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 408) [Constant] +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 409) [Constant] +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 208) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 216) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 247) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 270) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 301) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 324) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 332) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 411) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-05:05:19] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-05:05:29] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +388, now: CPU 1280, GPU 3896 (MiB) +[12/28/2023-05:05:30] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +81, now: CPU 1363, GPU 3977 (MiB) +[12/28/2023-05:05:30] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-06:56:20] [I] [TRT] Total Activation Memory: 7941884416 +[12/28/2023-06:56:20] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-06:56:33] [I] [TRT] Total Host Persistent Memory: 294816 +[12/28/2023-06:56:33] [I] [TRT] Total Device Persistent Memory: 38912 +[12/28/2023-06:56:33] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-06:56:33] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 60 MiB, GPU 2398 MiB +[12/28/2023-06:56:33] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 158 steps to complete. +[12/28/2023-06:56:33] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 79.8205ms to assign 13 blocks to 158 nodes requiring 144903168 bytes. +[12/28/2023-06:56:33] [I] [TRT] Total Activation Memory: 144903168 +[12/28/2023-06:56:41] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -19, now: CPU 1751, GPU 5827 (MiB) +[12/28/2023-06:56:41] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +38, GPU +64, now: CPU 38, GPU 64 (MiB) +[12/28/2023-06:56:41] [I] Engine built in 6693.98 sec. +[12/28/2023-06:56:42] [I] [TRT] Loaded engine size: 39 MiB +[12/28/2023-06:56:43] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1283, GPU 5413 (MiB) +[12/28/2023-06:56:43] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +37, now: CPU 0, GPU 37 (MiB) +[12/28/2023-06:56:43] [I] Engine deserialized in 0.263232 sec. +[12/28/2023-06:56:43] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1283, GPU 5413 (MiB) +[12/28/2023-06:56:43] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +138, now: CPU 0, GPU 175 (MiB) +[12/28/2023-06:56:43] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-06:56:43] [I] Using random values for input onnx::Cast_0 +[12/28/2023-06:56:43] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-06:56:43] [I] Using random values for output graph2_flat_predictions +[12/28/2023-06:56:43] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-06:56:43] [I] Starting inference +[12/28/2023-06:56:58] [I] Warmup completed 2 queries over 200 ms +[12/28/2023-06:56:58] [I] Timing trace has 872 queries over 15.0266 s +[12/28/2023-06:56:58] [I] +[12/28/2023-06:56:58] [I] === Trace details === +[12/28/2023-06:56:58] [I] Trace averages of 100 runs: +[12/28/2023-06:56:58] [I] Average on 100 runs - GPU latency: 17.3465 ms - Host latency: 17.467 ms (enqueue 17.4036 ms) +[12/28/2023-06:56:58] [I] Average on 100 runs - GPU latency: 17.4154 ms - Host latency: 17.5365 ms (enqueue 17.4757 ms) +[12/28/2023-06:56:58] [I] Average on 100 runs - GPU latency: 17.3679 ms - Host latency: 17.4875 ms (enqueue 17.4262 ms) +[12/28/2023-06:56:58] [I] Average on 100 runs - GPU latency: 17.5258 ms - Host latency: 17.6474 ms (enqueue 17.5855 ms) +[12/28/2023-06:56:58] [I] Average on 100 runs - GPU latency: 17.315 ms - Host latency: 17.4354 ms (enqueue 17.3775 ms) +[12/28/2023-06:56:58] [I] Average on 100 runs - GPU latency: 16.9008 ms - Host latency: 17.0146 ms (enqueue 16.9691 ms) +[12/28/2023-06:56:58] [I] Average on 100 runs - GPU latency: 16.5099 ms - Host latency: 16.6213 ms (enqueue 16.5847 ms) +[12/28/2023-06:56:58] [I] Average on 100 runs - GPU latency: 16.4721 ms - Host latency: 16.5843 ms (enqueue 16.5453 ms) +[12/28/2023-06:56:58] [I] +[12/28/2023-06:56:58] [I] === Performance summary === +[12/28/2023-06:56:58] [I] Throughput: 58.0306 qps +[12/28/2023-06:56:58] [I] Latency: min = 15.9121 ms, max = 21.7354 ms, mean = 17.1982 ms, median = 17.2047 ms, percentile(90%) = 17.9839 ms, percentile(95%) = 18.5039 ms, percentile(99%) = 19.7083 ms +[12/28/2023-06:56:58] [I] Enqueue Time: min = 15.8809 ms, max = 21.6855 ms, mean = 17.1468 ms, median = 17.1523 ms, percentile(90%) = 17.9199 ms, percentile(95%) = 18.2314 ms, percentile(99%) = 20.02 ms +[12/28/2023-06:56:58] [I] H2D Latency: min = 0.0800781 ms, max = 0.153687 ms, mean = 0.0979622 ms, median = 0.0981445 ms, percentile(90%) = 0.100586 ms, percentile(95%) = 0.101562 ms, percentile(99%) = 0.110352 ms +[12/28/2023-06:56:58] [I] GPU Compute Time: min = 15.8008 ms, max = 21.6191 ms, mean = 17.0811 ms, median = 17.0859 ms, percentile(90%) = 17.8599 ms, percentile(95%) = 18.3804 ms, percentile(99%) = 19.5974 ms +[12/28/2023-06:56:58] [I] D2H Latency: min = 0.00292969 ms, max = 0.0549316 ms, mean = 0.019114 ms, median = 0.0200195 ms, percentile(90%) = 0.0280762 ms, percentile(95%) = 0.0292969 ms, percentile(99%) = 0.0395508 ms +[12/28/2023-06:56:58] [I] Total Host Walltime: 15.0266 s +[12/28/2023-06:56:58] [I] Total GPU Compute Time: 14.8947 s +[12/28/2023-06:56:58] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-06:56:58] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp32.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp32.onnx.best.engine diff --git a/yolo_nas_pose_m_fp32.onnx.engine b/yolo_nas_pose_m_fp32.onnx.engine new file mode 100644 index 0000000000000000000000000000000000000000..cdde75c97064cef77dc7d1bee1d3fafc3f953fcb --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee50960125929065820108740b009a002d8e0a539c52faa537c2f9f875038893 +size 157910277 diff --git a/yolo_nas_pose_m_fp32.onnx.engine.err b/yolo_nas_pose_m_fp32.onnx.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..d2b87e8b079dfaaf86c68c523824824ea8e36651 --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.engine.err @@ -0,0 +1,213 @@ +[12/28/2023-03:46:53] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-03:46:53] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-03:48:16] [W] [TRT] Tactic Device request: 6262MB Available: 2381MB. Device memory is insufficient to use tactic. +[12/28/2023-03:48:16] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6262 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:48:16] [W] [TRT] Tactic Device request: 6262MB Available: 2380MB. Device memory is insufficient to use tactic. +[12/28/2023-03:48:16] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6262 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:48:16] [W] [TRT] Tactic Device request: 6262MB Available: 2380MB. Device memory is insufficient to use tactic. +[12/28/2023-03:48:16] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6262 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:50:09] [W] [TRT] Tactic Device request: 4711MB Available: 2450MB. Device memory is insufficient to use tactic. +[12/28/2023-03:50:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:50:09] [W] [TRT] Tactic Device request: 4711MB Available: 2451MB. Device memory is insufficient to use tactic. +[12/28/2023-03:50:09] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:50:09] [W] [TRT] Tactic Device request: 4711MB Available: 2451MB. Device memory is insufficient to use tactic. +[12/28/2023-03:50:09] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:50:20] [W] [TRT] Tactic Device request: 6275MB Available: 2450MB. Device memory is insufficient to use tactic. +[12/28/2023-03:50:20] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:50:20] [W] [TRT] Tactic Device request: 6275MB Available: 2450MB. Device memory is insufficient to use tactic. +[12/28/2023-03:50:20] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6275 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:50:20] [W] [TRT] Tactic Device request: 6275MB Available: 2449MB. Device memory is insufficient to use tactic. +[12/28/2023-03:50:20] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:52:42] [W] [TRT] Tactic Device request: 7056MB Available: 2415MB. Device memory is insufficient to use tactic. +[12/28/2023-03:52:42] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:52:42] [W] [TRT] Tactic Device request: 7056MB Available: 2415MB. Device memory is insufficient to use tactic. +[12/28/2023-03:52:42] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:52:43] [W] [TRT] Tactic Device request: 7056MB Available: 2415MB. Device memory is insufficient to use tactic. +[12/28/2023-03:52:43] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:52:55] [W] [TRT] Tactic Device request: 6354MB Available: 2419MB. Device memory is insufficient to use tactic. +[12/28/2023-03:52:55] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:52:55] [W] [TRT] Tactic Device request: 6354MB Available: 2419MB. Device memory is insufficient to use tactic. +[12/28/2023-03:52:55] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6354 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:52:55] [W] [TRT] Tactic Device request: 6354MB Available: 2419MB. Device memory is insufficient to use tactic. +[12/28/2023-03:52:55] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:55:57] [W] [TRT] Tactic Device request: 2394MB Available: 2362MB. Device memory is insufficient to use tactic. +[12/28/2023-03:55:57] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:55:57] [W] [TRT] Tactic Device request: 2394MB Available: 2362MB. Device memory is insufficient to use tactic. +[12/28/2023-03:55:57] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:55:57] [W] [TRT] Tactic Device request: 2394MB Available: 2361MB. Device memory is insufficient to use tactic. +[12/28/2023-03:55:57] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:55:58] [W] [TRT] Tactic Device request: 2392MB Available: 2362MB. Device memory is insufficient to use tactic. +[12/28/2023-03:55:58] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:55:58] [W] [TRT] Tactic Device request: 2392MB Available: 2362MB. Device memory is insufficient to use tactic. +[12/28/2023-03:55:58] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:55:58] [W] [TRT] Tactic Device request: 2392MB Available: 2362MB. Device memory is insufficient to use tactic. +[12/28/2023-03:55:58] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:56:08] [W] [TRT] Tactic Device request: 4906MB Available: 2351MB. Device memory is insufficient to use tactic. +[12/28/2023-03:56:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:56:08] [W] [TRT] Tactic Device request: 4906MB Available: 2351MB. Device memory is insufficient to use tactic. +[12/28/2023-03:56:08] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4906 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:56:08] [W] [TRT] Tactic Device request: 4906MB Available: 2351MB. Device memory is insufficient to use tactic. +[12/28/2023-03:56:08] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:57:49] [W] [TRT] Tactic Device request: 2457MB Available: 2324MB. Device memory is insufficient to use tactic. +[12/28/2023-03:57:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:57:49] [W] [TRT] Tactic Device request: 2457MB Available: 2324MB. Device memory is insufficient to use tactic. +[12/28/2023-03:57:49] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:57:49] [W] [TRT] Tactic Device request: 2457MB Available: 2324MB. Device memory is insufficient to use tactic. +[12/28/2023-03:57:49] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:58:03] [W] [TRT] Tactic Device request: 3587MB Available: 2315MB. Device memory is insufficient to use tactic. +[12/28/2023-03:58:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:58:03] [W] [TRT] Tactic Device request: 3587MB Available: 2315MB. Device memory is insufficient to use tactic. +[12/28/2023-03:58:03] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:58:03] [W] [TRT] Tactic Device request: 3587MB Available: 2315MB. Device memory is insufficient to use tactic. +[12/28/2023-03:58:03] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:58:05] [W] [TRT] Tactic Device request: 2385MB Available: 2315MB. Device memory is insufficient to use tactic. +[12/28/2023-03:58:05] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:58:05] [W] [TRT] Tactic Device request: 2385MB Available: 2315MB. Device memory is insufficient to use tactic. +[12/28/2023-03:58:05] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:58:05] [W] [TRT] Tactic Device request: 2385MB Available: 2315MB. Device memory is insufficient to use tactic. +[12/28/2023-03:58:05] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:59:52] [W] [TRT] Tactic Device request: 3556MB Available: 2283MB. Device memory is insufficient to use tactic. +[12/28/2023-03:59:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:59:52] [W] [TRT] Tactic Device request: 3556MB Available: 2283MB. Device memory is insufficient to use tactic. +[12/28/2023-03:59:52] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:59:52] [W] [TRT] Tactic Device request: 3556MB Available: 2283MB. Device memory is insufficient to use tactic. +[12/28/2023-03:59:52] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:02:10] [W] [TRT] Tactic Device request: 2359MB Available: 2265MB. Device memory is insufficient to use tactic. +[12/28/2023-04:02:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:02:10] [W] [TRT] Tactic Device request: 2359MB Available: 2265MB. Device memory is insufficient to use tactic. +[12/28/2023-04:02:10] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2359 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:02:10] [W] [TRT] Tactic Device request: 2359MB Available: 2265MB. Device memory is insufficient to use tactic. +[12/28/2023-04:02:10] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:02:13] [W] [TRT] Tactic Device request: 2362MB Available: 2265MB. Device memory is insufficient to use tactic. +[12/28/2023-04:02:13] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2362 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:02:13] [W] [TRT] Tactic Device request: 2362MB Available: 2264MB. Device memory is insufficient to use tactic. +[12/28/2023-04:02:13] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2362 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:02:13] [W] [TRT] Tactic Device request: 2362MB Available: 2264MB. Device memory is insufficient to use tactic. +[12/28/2023-04:02:13] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2362 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:02:16] [W] [TRT] Tactic Device request: 2359MB Available: 2264MB. Device memory is insufficient to use tactic. +[12/28/2023-04:02:16] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:02:16] [W] [TRT] Tactic Device request: 2359MB Available: 2264MB. Device memory is insufficient to use tactic. +[12/28/2023-04:02:16] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2359 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:02:17] [W] [TRT] Tactic Device request: 2359MB Available: 2264MB. Device memory is insufficient to use tactic. +[12/28/2023-04:02:17] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:22] [W] [TRT] Tactic Device request: 3575MB Available: 2232MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:22] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3575 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:22] [W] [TRT] Tactic Device request: 3575MB Available: 2232MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:22] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3575 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:23] [W] [TRT] Tactic Device request: 3575MB Available: 2232MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:23] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3575 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:27] [W] [TRT] Tactic Device request: 2390MB Available: 2231MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:27] [W] [TRT] Tactic Device request: 2390MB Available: 2231MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:27] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:28] [W] [TRT] Tactic Device request: 2390MB Available: 2231MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:28] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:31] [W] [TRT] Tactic Device request: 2388MB Available: 2231MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2388 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:32] [W] [TRT] Tactic Device request: 2388MB Available: 2231MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:32] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2388 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:32] [W] [TRT] Tactic Device request: 2388MB Available: 2232MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:32] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2388 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:36] [W] [TRT] Tactic Device request: 4775MB Available: 2230MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:36] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4775 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:37] [W] [TRT] Tactic Device request: 4775MB Available: 2230MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:37] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4775 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:38] [W] [TRT] Tactic Device request: 4775MB Available: 2230MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:38] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4775 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:44] [W] [TRT] Tactic Device request: 4774MB Available: 2230MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:45] [W] [TRT] Tactic Device request: 4774MB Available: 2231MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:45] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4774 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:04:46] [W] [TRT] Tactic Device request: 4774MB Available: 2229MB. Device memory is insufficient to use tactic. +[12/28/2023-04:04:47] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:07:03] [W] [TRT] Tactic Device request: 2454MB Available: 2211MB. Device memory is insufficient to use tactic. +[12/28/2023-04:07:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:07:03] [W] [TRT] Tactic Device request: 2454MB Available: 2211MB. Device memory is insufficient to use tactic. +[12/28/2023-04:07:03] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2454 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:07:03] [W] [TRT] Tactic Device request: 2454MB Available: 2211MB. Device memory is insufficient to use tactic. +[12/28/2023-04:07:03] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:07:05] [W] [TRT] Tactic Device request: 2463MB Available: 2213MB. Device memory is insufficient to use tactic. +[12/28/2023-04:07:05] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:07:06] [W] [TRT] Tactic Device request: 2463MB Available: 2211MB. Device memory is insufficient to use tactic. +[12/28/2023-04:07:06] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2463 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:07:07] [W] [TRT] Tactic Device request: 2463MB Available: 2191MB. Device memory is insufficient to use tactic. +[12/28/2023-04:07:07] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:07:12] [W] [TRT] Tactic Device request: 2463MB Available: 2211MB. Device memory is insufficient to use tactic. +[12/28/2023-04:07:12] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:07:13] [W] [TRT] Tactic Device request: 2463MB Available: 2217MB. Device memory is insufficient to use tactic. +[12/28/2023-04:07:13] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2463 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:07:14] [W] [TRT] Tactic Device request: 2463MB Available: 2217MB. Device memory is insufficient to use tactic. +[12/28/2023-04:07:14] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:07:52] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-04:07:52] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-04:07:52] [W] * GPU compute time is unstable, with coefficient of variance = 1.7282%. +[12/28/2023-04:07:52] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_m_fp32.onnx.engine.log b/yolo_nas_pose_m_fp32.onnx.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..f246cabb801358cd51eb8ed42021a6fc55e0183a --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.engine.log @@ -0,0 +1,306 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp32.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp32.onnx.engine +[12/28/2023-03:46:47] [I] === Model Options === +[12/28/2023-03:46:47] [I] Format: ONNX +[12/28/2023-03:46:47] [I] Model: yolo_nas_pose_m_fp32.onnx +[12/28/2023-03:46:47] [I] Output: +[12/28/2023-03:46:47] [I] === Build Options === +[12/28/2023-03:46:47] [I] Max batch: explicit batch +[12/28/2023-03:46:47] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-03:46:47] [I] minTiming: 1 +[12/28/2023-03:46:47] [I] avgTiming: 8 +[12/28/2023-03:46:47] [I] Precision: FP32 +[12/28/2023-03:46:47] [I] LayerPrecisions: +[12/28/2023-03:46:47] [I] Calibration: +[12/28/2023-03:46:47] [I] Refit: Disabled +[12/28/2023-03:46:47] [I] Sparsity: Disabled +[12/28/2023-03:46:47] [I] Safe mode: Disabled +[12/28/2023-03:46:47] [I] DirectIO mode: Disabled +[12/28/2023-03:46:47] [I] Restricted mode: Disabled +[12/28/2023-03:46:47] [I] Build only: Disabled +[12/28/2023-03:46:47] [I] Save engine: yolo_nas_pose_m_fp32.onnx.engine +[12/28/2023-03:46:47] [I] Load engine: +[12/28/2023-03:46:47] [I] Profiling verbosity: 0 +[12/28/2023-03:46:47] [I] Tactic sources: Using default tactic sources +[12/28/2023-03:46:47] [I] timingCacheMode: local +[12/28/2023-03:46:47] [I] timingCacheFile: +[12/28/2023-03:46:47] [I] Heuristic: Disabled +[12/28/2023-03:46:47] [I] Preview Features: Use default preview flags. +[12/28/2023-03:46:47] [I] Input(s)s format: fp32:CHW +[12/28/2023-03:46:47] [I] Output(s)s format: fp32:CHW +[12/28/2023-03:46:47] [I] Input build shapes: model +[12/28/2023-03:46:47] [I] Input calibration shapes: model +[12/28/2023-03:46:47] [I] === System Options === +[12/28/2023-03:46:47] [I] Device: 0 +[12/28/2023-03:46:47] [I] DLACore: +[12/28/2023-03:46:47] [I] Plugins: +[12/28/2023-03:46:47] [I] === Inference Options === +[12/28/2023-03:46:47] [I] Batch: Explicit +[12/28/2023-03:46:47] [I] Input inference shapes: model +[12/28/2023-03:46:47] [I] Iterations: 10 +[12/28/2023-03:46:47] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-03:46:47] [I] Sleep time: 0ms +[12/28/2023-03:46:47] [I] Idle time: 0ms +[12/28/2023-03:46:47] [I] Streams: 1 +[12/28/2023-03:46:47] [I] ExposeDMA: Disabled +[12/28/2023-03:46:47] [I] Data transfers: Enabled +[12/28/2023-03:46:47] [I] Spin-wait: Disabled +[12/28/2023-03:46:47] [I] Multithreading: Disabled +[12/28/2023-03:46:47] [I] CUDA Graph: Disabled +[12/28/2023-03:46:47] [I] Separate profiling: Disabled +[12/28/2023-03:46:47] [I] Time Deserialize: Disabled +[12/28/2023-03:46:47] [I] Time Refit: Disabled +[12/28/2023-03:46:47] [I] NVTX verbosity: 0 +[12/28/2023-03:46:47] [I] Persistent Cache Ratio: 0 +[12/28/2023-03:46:47] [I] Inputs: +[12/28/2023-03:46:47] [I] === Reporting Options === +[12/28/2023-03:46:47] [I] Verbose: Disabled +[12/28/2023-03:46:47] [I] Averages: 100 inferences +[12/28/2023-03:46:47] [I] Percentiles: 90,95,99 +[12/28/2023-03:46:47] [I] Dump refittable layers:Disabled +[12/28/2023-03:46:47] [I] Dump output: Disabled +[12/28/2023-03:46:47] [I] Profile: Disabled +[12/28/2023-03:46:47] [I] Export timing to JSON file: +[12/28/2023-03:46:47] [I] Export output to JSON file: +[12/28/2023-03:46:47] [I] Export profile to JSON file: +[12/28/2023-03:46:47] [I] +[12/28/2023-03:46:47] [I] === Device Information === +[12/28/2023-03:46:47] [I] Selected Device: Orin +[12/28/2023-03:46:47] [I] Compute Capability: 8.7 +[12/28/2023-03:46:47] [I] SMs: 8 +[12/28/2023-03:46:47] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-03:46:47] [I] Device Global Memory: 7471 MiB +[12/28/2023-03:46:47] [I] Shared Memory per SM: 164 KiB +[12/28/2023-03:46:47] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-03:46:47] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-03:46:47] [I] +[12/28/2023-03:46:47] [I] TensorRT version: 8.5.2 +[12/28/2023-03:46:48] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3179 (MiB) +[12/28/2023-03:46:50] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +428, now: CPU 574, GPU 3629 (MiB) +[12/28/2023-03:46:51] [I] Start parsing network model +[12/28/2023-03:46:53] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-03:46:53] [I] [TRT] Input filename: yolo_nas_pose_m_fp32.onnx +[12/28/2023-03:46:53] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-03:46:53] [I] [TRT] Opset version: 17 +[12/28/2023-03:46:53] [I] [TRT] Producer name: pytorch +[12/28/2023-03:46:53] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-03:46:53] [I] [TRT] Domain: +[12/28/2023-03:46:53] [I] [TRT] Model version: 0 +[12/28/2023-03:46:53] [I] [TRT] Doc string: +[12/28/2023-03:46:53] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-03:46:53] [I] Finish parsing network model +[12/28/2023-03:46:53] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-03:46:53] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 407) [Constant] +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 408) [Constant] +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 409) [Constant] +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 208) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 216) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 247) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 270) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 301) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 324) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 332) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 411) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-03:46:53] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-03:46:55] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +715, now: CPU 1279, GPU 4619 (MiB) +[12/28/2023-03:46:55] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +112, now: CPU 1362, GPU 4731 (MiB) +[12/28/2023-03:46:55] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-04:07:26] [I] [TRT] Total Activation Memory: 8204060160 +[12/28/2023-04:07:26] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-04:07:32] [I] [TRT] Total Host Persistent Memory: 339472 +[12/28/2023-04:07:32] [I] [TRT] Total Device Persistent Memory: 41472 +[12/28/2023-04:07:32] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-04:07:32] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 30 MiB, GPU 2131 MiB +[12/28/2023-04:07:32] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 192 steps to complete. +[12/28/2023-04:07:32] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 116.441ms to assign 16 blocks to 192 nodes requiring 178505216 bytes. +[12/28/2023-04:07:32] [I] [TRT] Total Activation Memory: 178505216 +[12/28/2023-04:07:34] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1706, GPU 5423 (MiB) +[12/28/2023-04:07:34] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +20, GPU +256, now: CPU 20, GPU 256 (MiB) +[12/28/2023-04:07:35] [I] Engine built in 1247.53 sec. +[12/28/2023-04:07:36] [I] [TRT] Loaded engine size: 150 MiB +[12/28/2023-04:07:36] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1392, GPU 4815 (MiB) +[12/28/2023-04:07:36] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +148, now: CPU 0, GPU 148 (MiB) +[12/28/2023-04:07:36] [I] Engine deserialized in 0.27583 sec. +[12/28/2023-04:07:36] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1392, GPU 4815 (MiB) +[12/28/2023-04:07:36] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +170, now: CPU 0, GPU 318 (MiB) +[12/28/2023-04:07:36] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-04:07:36] [I] Using random values for input onnx::Cast_0 +[12/28/2023-04:07:36] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-04:07:36] [I] Using random values for output graph2_flat_predictions +[12/28/2023-04:07:36] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-04:07:36] [I] Starting inference +[12/28/2023-04:07:52] [I] Warmup completed 4 queries over 200 ms +[12/28/2023-04:07:52] [I] Timing trace has 286 queries over 15.0841 s +[12/28/2023-04:07:52] [I] +[12/28/2023-04:07:52] [I] === Trace details === +[12/28/2023-04:07:52] [I] Trace averages of 100 runs: +[12/28/2023-04:07:52] [I] Average on 100 runs - GPU latency: 52.6677 ms - Host latency: 52.7709 ms (enqueue 52.6762 ms) +[12/28/2023-04:07:52] [I] Average on 100 runs - GPU latency: 52.6271 ms - Host latency: 52.7354 ms (enqueue 52.6689 ms) +[12/28/2023-04:07:52] [I] +[12/28/2023-04:07:52] [I] === Performance summary === +[12/28/2023-04:07:52] [I] Throughput: 18.9603 qps +[12/28/2023-04:07:52] [I] Latency: min = 50.0151 ms, max = 56.8789 ms, mean = 52.7465 ms, median = 52.6514 ms, percentile(90%) = 53.687 ms, percentile(95%) = 54.0391 ms, percentile(99%) = 56.5283 ms +[12/28/2023-04:07:52] [I] Enqueue Time: min = 49.9697 ms, max = 57.2051 ms, mean = 52.6699 ms, median = 52.5957 ms, percentile(90%) = 53.8911 ms, percentile(95%) = 54.0244 ms, percentile(99%) = 56.3145 ms +[12/28/2023-04:07:52] [I] H2D Latency: min = 0.0810547 ms, max = 0.110352 ms, mean = 0.0939393 ms, median = 0.097168 ms, percentile(90%) = 0.101562 ms, percentile(95%) = 0.101562 ms, percentile(99%) = 0.109375 ms +[12/28/2023-04:07:52] [I] GPU Compute Time: min = 49.8867 ms, max = 56.7793 ms, mean = 52.6403 ms, median = 52.5444 ms, percentile(90%) = 53.5859 ms, percentile(95%) = 53.9502 ms, percentile(99%) = 56.4229 ms +[12/28/2023-04:07:52] [I] D2H Latency: min = 0.00292969 ms, max = 0.0556641 ms, mean = 0.0122266 ms, median = 0.0126953 ms, percentile(90%) = 0.0205078 ms, percentile(95%) = 0.0234375 ms, percentile(99%) = 0.0366211 ms +[12/28/2023-04:07:52] [I] Total Host Walltime: 15.0841 s +[12/28/2023-04:07:52] [I] Total GPU Compute Time: 15.0551 s +[12/28/2023-04:07:52] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-04:07:52] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp32.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp32.onnx.engine diff --git a/yolo_nas_pose_m_fp32.onnx.fp16.engine b/yolo_nas_pose_m_fp32.onnx.fp16.engine new file mode 100644 index 0000000000000000000000000000000000000000..c400a4e423c66fbc1c49a764f12af003fabcaecb --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.fp16.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:812d9036a993eb2f87be02d6a9490e6b4e756f5b04f960a44dfcec3ea9fff6a5 +size 79680845 diff --git a/yolo_nas_pose_m_fp32.onnx.fp16.engine.err b/yolo_nas_pose_m_fp32.onnx.fp16.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..76eb12d0fc697104a8252b802d03bcf60eb2019b --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.fp16.engine.err @@ -0,0 +1,356 @@ +[12/28/2023-04:08:05] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-04:08:05] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-04:12:32] [W] [TRT] Tactic Device request: 6262MB Available: 2837MB. Device memory is insufficient to use tactic. +[12/28/2023-04:12:32] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6262 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:12:32] [W] [TRT] Tactic Device request: 6262MB Available: 2836MB. Device memory is insufficient to use tactic. +[12/28/2023-04:12:32] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6262 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:12:32] [W] [TRT] Tactic Device request: 6262MB Available: 2836MB. Device memory is insufficient to use tactic. +[12/28/2023-04:12:32] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6262 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:12:34] [W] [TRT] Tactic Device request: 6251MB Available: 2835MB. Device memory is insufficient to use tactic. +[12/28/2023-04:12:34] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6251 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:12:35] [W] [TRT] Tactic Device request: 6251MB Available: 2835MB. Device memory is insufficient to use tactic. +[12/28/2023-04:12:35] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6251 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:17:37] [W] [TRT] Tactic Device request: 4711MB Available: 2387MB. Device memory is insufficient to use tactic. +[12/28/2023-04:17:37] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:17:37] [W] [TRT] Tactic Device request: 4711MB Available: 2387MB. Device memory is insufficient to use tactic. +[12/28/2023-04:17:37] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:17:37] [W] [TRT] Tactic Device request: 4711MB Available: 2387MB. Device memory is insufficient to use tactic. +[12/28/2023-04:17:37] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:17:39] [W] [TRT] Tactic Device request: 4701MB Available: 2386MB. Device memory is insufficient to use tactic. +[12/28/2023-04:17:39] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4701 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:17:39] [W] [TRT] Tactic Device request: 4701MB Available: 2386MB. Device memory is insufficient to use tactic. +[12/28/2023-04:17:39] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4701 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:17:54] [W] [TRT] Tactic Device request: 6275MB Available: 2383MB. Device memory is insufficient to use tactic. +[12/28/2023-04:17:54] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:17:54] [W] [TRT] Tactic Device request: 6275MB Available: 2383MB. Device memory is insufficient to use tactic. +[12/28/2023-04:17:54] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6275 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:17:54] [W] [TRT] Tactic Device request: 6275MB Available: 2383MB. Device memory is insufficient to use tactic. +[12/28/2023-04:17:54] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:17:56] [W] [TRT] Tactic Device request: 6270MB Available: 2384MB. Device memory is insufficient to use tactic. +[12/28/2023-04:17:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6270 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:17:56] [W] [TRT] Tactic Device request: 6270MB Available: 2384MB. Device memory is insufficient to use tactic. +[12/28/2023-04:17:56] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6270 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:24:25] [W] [TRT] Tactic Device request: 7056MB Available: 2283MB. Device memory is insufficient to use tactic. +[12/28/2023-04:24:25] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:24:25] [W] [TRT] Tactic Device request: 7056MB Available: 2283MB. Device memory is insufficient to use tactic. +[12/28/2023-04:24:25] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:24:25] [W] [TRT] Tactic Device request: 7056MB Available: 2283MB. Device memory is insufficient to use tactic. +[12/28/2023-04:24:25] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:24:27] [W] [TRT] Tactic Device request: 7050MB Available: 2284MB. Device memory is insufficient to use tactic. +[12/28/2023-04:24:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7050 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:24:27] [W] [TRT] Tactic Device request: 7050MB Available: 2284MB. Device memory is insufficient to use tactic. +[12/28/2023-04:24:27] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 7050 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:24:44] [W] [TRT] Tactic Device request: 6354MB Available: 2284MB. Device memory is insufficient to use tactic. +[12/28/2023-04:24:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:24:44] [W] [TRT] Tactic Device request: 6354MB Available: 2284MB. Device memory is insufficient to use tactic. +[12/28/2023-04:24:44] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6354 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:24:44] [W] [TRT] Tactic Device request: 6354MB Available: 2284MB. Device memory is insufficient to use tactic. +[12/28/2023-04:24:44] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:24:46] [W] [TRT] Tactic Device request: 6351MB Available: 2283MB. Device memory is insufficient to use tactic. +[12/28/2023-04:24:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6351 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:24:46] [W] [TRT] Tactic Device request: 6351MB Available: 2283MB. Device memory is insufficient to use tactic. +[12/28/2023-04:24:46] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 6351 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:29] [W] [TRT] Tactic Device request: 2394MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:29] [W] [TRT] Tactic Device request: 2394MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:29] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:29] [W] [TRT] Tactic Device request: 2394MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:29] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:30] [W] [TRT] Tactic Device request: 2392MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:30] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:30] [W] [TRT] Tactic Device request: 2392MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:30] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:30] [W] [TRT] Tactic Device request: 2392MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:30] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:31] [W] [TRT] Tactic Device request: 2391MB Available: 2366MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2391 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:31] [W] [TRT] Tactic Device request: 2391MB Available: 2367MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:31] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2391 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:31] [W] [TRT] Tactic Device request: 2390MB Available: 2367MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:31] [W] [TRT] Tactic Device request: 2390MB Available: 2367MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:31] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:45] [W] [TRT] Tactic Device request: 4906MB Available: 2123MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:45] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:45] [W] [TRT] Tactic Device request: 4906MB Available: 2122MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:45] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4906 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:45] [W] [TRT] Tactic Device request: 4906MB Available: 2123MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:45] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:47] [W] [TRT] Tactic Device request: 4905MB Available: 2125MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4905 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:33:47] [W] [TRT] Tactic Device request: 4905MB Available: 2123MB. Device memory is insufficient to use tactic. +[12/28/2023-04:33:47] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4905 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:38:43] [W] [TRT] Tactic Device request: 2457MB Available: 2422MB. Device memory is insufficient to use tactic. +[12/28/2023-04:38:43] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:38:43] [W] [TRT] Tactic Device request: 2457MB Available: 2422MB. Device memory is insufficient to use tactic. +[12/28/2023-04:38:43] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:38:43] [W] [TRT] Tactic Device request: 2457MB Available: 2422MB. Device memory is insufficient to use tactic. +[12/28/2023-04:38:43] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:38:44] [W] [TRT] Tactic Device request: 2456MB Available: 2423MB. Device memory is insufficient to use tactic. +[12/28/2023-04:38:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:38:44] [W] [TRT] Tactic Device request: 2456MB Available: 2423MB. Device memory is insufficient to use tactic. +[12/28/2023-04:38:44] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:39:04] [W] [TRT] Tactic Device request: 3587MB Available: 2036MB. Device memory is insufficient to use tactic. +[12/28/2023-04:39:05] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:39:05] [W] [TRT] Tactic Device request: 3587MB Available: 2036MB. Device memory is insufficient to use tactic. +[12/28/2023-04:39:05] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:39:05] [W] [TRT] Tactic Device request: 3587MB Available: 2036MB. Device memory is insufficient to use tactic. +[12/28/2023-04:39:05] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:39:06] [W] [TRT] Tactic Device request: 3585MB Available: 2078MB. Device memory is insufficient to use tactic. +[12/28/2023-04:39:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3585 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:39:06] [W] [TRT] Tactic Device request: 3585MB Available: 2078MB. Device memory is insufficient to use tactic. +[12/28/2023-04:39:06] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3585 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:39:07] [W] [TRT] Tactic Device request: 2385MB Available: 2077MB. Device memory is insufficient to use tactic. +[12/28/2023-04:39:07] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:39:07] [W] [TRT] Tactic Device request: 2385MB Available: 2077MB. Device memory is insufficient to use tactic. +[12/28/2023-04:39:07] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:39:07] [W] [TRT] Tactic Device request: 2385MB Available: 2075MB. Device memory is insufficient to use tactic. +[12/28/2023-04:39:07] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:39:08] [W] [TRT] Tactic Device request: 2384MB Available: 2075MB. Device memory is insufficient to use tactic. +[12/28/2023-04:39:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2384 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:39:08] [W] [TRT] Tactic Device request: 2384MB Available: 2075MB. Device memory is insufficient to use tactic. +[12/28/2023-04:39:08] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2384 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:44:13] [W] [TRT] Tactic Device request: 3556MB Available: 1938MB. Device memory is insufficient to use tactic. +[12/28/2023-04:44:13] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:44:13] [W] [TRT] Tactic Device request: 3556MB Available: 1938MB. Device memory is insufficient to use tactic. +[12/28/2023-04:44:13] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:44:14] [W] [TRT] Tactic Device request: 3556MB Available: 1938MB. Device memory is insufficient to use tactic. +[12/28/2023-04:44:14] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:44:15] [W] [TRT] Tactic Device request: 3551MB Available: 1939MB. Device memory is insufficient to use tactic. +[12/28/2023-04:44:15] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3551 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:44:15] [W] [TRT] Tactic Device request: 3551MB Available: 1939MB. Device memory is insufficient to use tactic. +[12/28/2023-04:44:15] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3551 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:47] [W] [TRT] Tactic Device request: 2359MB Available: 2030MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:47] [W] [TRT] Tactic Device request: 2359MB Available: 2030MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:47] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2359 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:47] [W] [TRT] Tactic Device request: 2359MB Available: 2030MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:47] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:48] [W] [TRT] Tactic Device request: 2355MB Available: 2031MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:48] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2355 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:48] [W] [TRT] Tactic Device request: 2355MB Available: 2031MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:48] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2355 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:52] [W] [TRT] Tactic Device request: 2362MB Available: 2030MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2362 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:53] [W] [TRT] Tactic Device request: 2362MB Available: 2030MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:53] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2362 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:53] [W] [TRT] Tactic Device request: 2362MB Available: 2029MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:53] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2362 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:56] [W] [TRT] Tactic Device request: 2357MB Available: 2031MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2357 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:56] [W] [TRT] Tactic Device request: 2357MB Available: 2031MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:56] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2357 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:57] [W] [TRT] Tactic Device request: 2359MB Available: 2032MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:57] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:58] [W] [TRT] Tactic Device request: 2359MB Available: 2032MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:58] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2359 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:50:58] [W] [TRT] Tactic Device request: 2359MB Available: 2032MB. Device memory is insufficient to use tactic. +[12/28/2023-04:50:58] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:51:01] [W] [TRT] Tactic Device request: 2356MB Available: 2031MB. Device memory is insufficient to use tactic. +[12/28/2023-04:51:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2356 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:51:01] [W] [TRT] Tactic Device request: 2356MB Available: 2031MB. Device memory is insufficient to use tactic. +[12/28/2023-04:51:01] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2356 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:38] [W] [TRT] Tactic Device request: 3575MB Available: 1948MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:38] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3575 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:38] [W] [TRT] Tactic Device request: 3575MB Available: 1948MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:38] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3575 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:38] [W] [TRT] Tactic Device request: 3575MB Available: 1948MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:39] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3575 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:40] [W] [TRT] Tactic Device request: 3572MB Available: 1948MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:40] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3572 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:40] [W] [TRT] Tactic Device request: 3572MB Available: 1948MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:40] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3572 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:45] [W] [TRT] Tactic Device request: 2390MB Available: 1931MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:45] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:46] [W] [TRT] Tactic Device request: 2390MB Available: 1931MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:46] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:46] [W] [TRT] Tactic Device request: 2390MB Available: 1931MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:46] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:49] [W] [TRT] Tactic Device request: 2387MB Available: 1932MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2387 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:50] [W] [TRT] Tactic Device request: 2387MB Available: 1932MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:50] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2387 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:52] [W] [TRT] Tactic Device request: 2388MB Available: 1932MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2388 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:52] [W] [TRT] Tactic Device request: 2388MB Available: 1933MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:52] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2388 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:53] [W] [TRT] Tactic Device request: 2388MB Available: 1932MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:53] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2388 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:56] [W] [TRT] Tactic Device request: 2386MB Available: 1932MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2386 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:56] [W] [TRT] Tactic Device request: 2386MB Available: 1932MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:56] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2386 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:56:59] [W] [TRT] Tactic Device request: 4775MB Available: 1931MB. Device memory is insufficient to use tactic. +[12/28/2023-04:56:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4775 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:57:00] [W] [TRT] Tactic Device request: 4775MB Available: 1931MB. Device memory is insufficient to use tactic. +[12/28/2023-04:57:00] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4775 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:57:01] [W] [TRT] Tactic Device request: 4775MB Available: 1931MB. Device memory is insufficient to use tactic. +[12/28/2023-04:57:01] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4775 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:57:06] [W] [TRT] Tactic Device request: 4772MB Available: 1931MB. Device memory is insufficient to use tactic. +[12/28/2023-04:57:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4772 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:57:07] [W] [TRT] Tactic Device request: 4772MB Available: 1931MB. Device memory is insufficient to use tactic. +[12/28/2023-04:57:07] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4772 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:57:11] [W] [TRT] Tactic Device request: 4774MB Available: 1926MB. Device memory is insufficient to use tactic. +[12/28/2023-04:57:11] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:57:12] [W] [TRT] Tactic Device request: 4774MB Available: 1926MB. Device memory is insufficient to use tactic. +[12/28/2023-04:57:12] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4774 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:57:13] [W] [TRT] Tactic Device request: 4774MB Available: 1933MB. Device memory is insufficient to use tactic. +[12/28/2023-04:57:13] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:57:18] [W] [TRT] Tactic Device request: 4771MB Available: 1930MB. Device memory is insufficient to use tactic. +[12/28/2023-04:57:18] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4771 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-04:57:19] [W] [TRT] Tactic Device request: 4771MB Available: 1931MB. Device memory is insufficient to use tactic. +[12/28/2023-04:57:19] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4771 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:03:46] [W] [TRT] Tactic Device request: 2454MB Available: 1885MB. Device memory is insufficient to use tactic. +[12/28/2023-05:03:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:03:46] [W] [TRT] Tactic Device request: 2454MB Available: 1885MB. Device memory is insufficient to use tactic. +[12/28/2023-05:03:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2454 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:03:46] [W] [TRT] Tactic Device request: 2454MB Available: 1885MB. Device memory is insufficient to use tactic. +[12/28/2023-05:03:46] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:03:47] [W] [TRT] Tactic Device request: 2453MB Available: 1885MB. Device memory is insufficient to use tactic. +[12/28/2023-05:03:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2453 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:03:47] [W] [TRT] Tactic Device request: 2453MB Available: 1886MB. Device memory is insufficient to use tactic. +[12/28/2023-05:03:47] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2453 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:03:49] [W] [TRT] Tactic Device request: 2463MB Available: 1884MB. Device memory is insufficient to use tactic. +[12/28/2023-05:03:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:03:50] [W] [TRT] Tactic Device request: 2463MB Available: 1884MB. Device memory is insufficient to use tactic. +[12/28/2023-05:03:50] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2463 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:03:51] [W] [TRT] Tactic Device request: 2463MB Available: 1882MB. Device memory is insufficient to use tactic. +[12/28/2023-05:03:51] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:03:55] [W] [TRT] Tactic Device request: 2462MB Available: 1882MB. Device memory is insufficient to use tactic. +[12/28/2023-05:03:55] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2462 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:03:55] [W] [TRT] Tactic Device request: 2462MB Available: 1883MB. Device memory is insufficient to use tactic. +[12/28/2023-05:03:55] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2462 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:03:59] [W] [TRT] Tactic Device request: 2463MB Available: 1881MB. Device memory is insufficient to use tactic. +[12/28/2023-05:03:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:04:00] [W] [TRT] Tactic Device request: 2463MB Available: 1881MB. Device memory is insufficient to use tactic. +[12/28/2023-05:04:00] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2463 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:04:00] [W] [TRT] Tactic Device request: 2463MB Available: 1881MB. Device memory is insufficient to use tactic. +[12/28/2023-05:04:00] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:04:05] [W] [TRT] Tactic Device request: 2462MB Available: 1874MB. Device memory is insufficient to use tactic. +[12/28/2023-05:04:05] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2462 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:04:05] [W] [TRT] Tactic Device request: 2462MB Available: 1872MB. Device memory is insufficient to use tactic. +[12/28/2023-05:04:05] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2462 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-05:04:46] [W] [TRT] TensorRT encountered issues when converting weights between types and that could affect accuracy. +[12/28/2023-05:04:46] [W] [TRT] If this is not the desired behavior, please modify the weights or retrain with regularization to adjust the magnitude of the weights. +[12/28/2023-05:04:46] [W] [TRT] Check verbose logs for the list of affected weights. +[12/28/2023-05:04:46] [W] [TRT] - 103 weights are affected by this issue: Detected subnormal FP16 values. +[12/28/2023-05:04:46] [W] [TRT] - 33 weights are affected by this issue: Detected values less than smallest positive FP16 subnormal value and converted them to the FP16 minimum subnormalized value. +[12/28/2023-05:05:03] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-05:05:03] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-05:05:03] [W] * GPU compute time is unstable, with coefficient of variance = 3.46355%. +[12/28/2023-05:05:03] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_m_fp32.onnx.fp16.engine.log b/yolo_nas_pose_m_fp32.onnx.fp16.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..8c06deebf1f8041d285364925ccd7588e43265a2 --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.fp16.engine.log @@ -0,0 +1,309 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp32.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp32.onnx.fp16.engine +[12/28/2023-04:07:56] [I] === Model Options === +[12/28/2023-04:07:56] [I] Format: ONNX +[12/28/2023-04:07:56] [I] Model: yolo_nas_pose_m_fp32.onnx +[12/28/2023-04:07:56] [I] Output: +[12/28/2023-04:07:56] [I] === Build Options === +[12/28/2023-04:07:56] [I] Max batch: explicit batch +[12/28/2023-04:07:56] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-04:07:56] [I] minTiming: 1 +[12/28/2023-04:07:56] [I] avgTiming: 8 +[12/28/2023-04:07:56] [I] Precision: FP32+FP16 +[12/28/2023-04:07:56] [I] LayerPrecisions: +[12/28/2023-04:07:56] [I] Calibration: +[12/28/2023-04:07:56] [I] Refit: Disabled +[12/28/2023-04:07:56] [I] Sparsity: Disabled +[12/28/2023-04:07:56] [I] Safe mode: Disabled +[12/28/2023-04:07:56] [I] DirectIO mode: Disabled +[12/28/2023-04:07:56] [I] Restricted mode: Disabled +[12/28/2023-04:07:56] [I] Build only: Disabled +[12/28/2023-04:07:56] [I] Save engine: yolo_nas_pose_m_fp32.onnx.fp16.engine +[12/28/2023-04:07:56] [I] Load engine: +[12/28/2023-04:07:56] [I] Profiling verbosity: 0 +[12/28/2023-04:07:56] [I] Tactic sources: Using default tactic sources +[12/28/2023-04:07:56] [I] timingCacheMode: local +[12/28/2023-04:07:56] [I] timingCacheFile: +[12/28/2023-04:07:56] [I] Heuristic: Disabled +[12/28/2023-04:07:56] [I] Preview Features: Use default preview flags. +[12/28/2023-04:07:56] [I] Input(s)s format: fp32:CHW +[12/28/2023-04:07:56] [I] Output(s)s format: fp32:CHW +[12/28/2023-04:07:56] [I] Input build shapes: model +[12/28/2023-04:07:56] [I] Input calibration shapes: model +[12/28/2023-04:07:56] [I] === System Options === +[12/28/2023-04:07:56] [I] Device: 0 +[12/28/2023-04:07:56] [I] DLACore: +[12/28/2023-04:07:56] [I] Plugins: +[12/28/2023-04:07:56] [I] === Inference Options === +[12/28/2023-04:07:56] [I] Batch: Explicit +[12/28/2023-04:07:56] [I] Input inference shapes: model +[12/28/2023-04:07:56] [I] Iterations: 10 +[12/28/2023-04:07:56] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-04:07:56] [I] Sleep time: 0ms +[12/28/2023-04:07:56] [I] Idle time: 0ms +[12/28/2023-04:07:56] [I] Streams: 1 +[12/28/2023-04:07:56] [I] ExposeDMA: Disabled +[12/28/2023-04:07:56] [I] Data transfers: Enabled +[12/28/2023-04:07:56] [I] Spin-wait: Disabled +[12/28/2023-04:07:56] [I] Multithreading: Disabled +[12/28/2023-04:07:56] [I] CUDA Graph: Disabled +[12/28/2023-04:07:56] [I] Separate profiling: Disabled +[12/28/2023-04:07:56] [I] Time Deserialize: Disabled +[12/28/2023-04:07:56] [I] Time Refit: Disabled +[12/28/2023-04:07:56] [I] NVTX verbosity: 0 +[12/28/2023-04:07:56] [I] Persistent Cache Ratio: 0 +[12/28/2023-04:07:56] [I] Inputs: +[12/28/2023-04:07:56] [I] === Reporting Options === +[12/28/2023-04:07:56] [I] Verbose: Disabled +[12/28/2023-04:07:56] [I] Averages: 100 inferences +[12/28/2023-04:07:56] [I] Percentiles: 90,95,99 +[12/28/2023-04:07:56] [I] Dump refittable layers:Disabled +[12/28/2023-04:07:56] [I] Dump output: Disabled +[12/28/2023-04:07:56] [I] Profile: Disabled +[12/28/2023-04:07:56] [I] Export timing to JSON file: +[12/28/2023-04:07:56] [I] Export output to JSON file: +[12/28/2023-04:07:56] [I] Export profile to JSON file: +[12/28/2023-04:07:56] [I] +[12/28/2023-04:07:57] [I] === Device Information === +[12/28/2023-04:07:57] [I] Selected Device: Orin +[12/28/2023-04:07:57] [I] Compute Capability: 8.7 +[12/28/2023-04:07:57] [I] SMs: 8 +[12/28/2023-04:07:57] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-04:07:57] [I] Device Global Memory: 7471 MiB +[12/28/2023-04:07:57] [I] Shared Memory per SM: 164 KiB +[12/28/2023-04:07:57] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-04:07:57] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-04:07:57] [I] +[12/28/2023-04:07:57] [I] TensorRT version: 8.5.2 +[12/28/2023-04:07:57] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2942 (MiB) +[12/28/2023-04:08:03] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +285, now: CPU 574, GPU 3248 (MiB) +[12/28/2023-04:08:03] [I] Start parsing network model +[12/28/2023-04:08:05] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-04:08:05] [I] [TRT] Input filename: yolo_nas_pose_m_fp32.onnx +[12/28/2023-04:08:05] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-04:08:05] [I] [TRT] Opset version: 17 +[12/28/2023-04:08:05] [I] [TRT] Producer name: pytorch +[12/28/2023-04:08:05] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-04:08:05] [I] [TRT] Domain: +[12/28/2023-04:08:05] [I] [TRT] Model version: 0 +[12/28/2023-04:08:05] [I] [TRT] Doc string: +[12/28/2023-04:08:05] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-04:08:05] [I] Finish parsing network model +[12/28/2023-04:08:06] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-04:08:06] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 407) [Constant] +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 408) [Constant] +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 409) [Constant] +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 208) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 216) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 247) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 270) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 301) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 324) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 332) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 411) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-04:08:06] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-04:08:13] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +383, now: CPU 1279, GPU 3862 (MiB) +[12/28/2023-04:08:15] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +72, now: CPU 1362, GPU 3934 (MiB) +[12/28/2023-04:08:15] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-05:04:31] [I] [TRT] Total Activation Memory: 8017953280 +[12/28/2023-05:04:31] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-05:04:42] [I] [TRT] Total Host Persistent Memory: 326560 +[12/28/2023-05:04:42] [I] [TRT] Total Device Persistent Memory: 79360 +[12/28/2023-05:04:42] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-05:04:42] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 96 MiB, GPU 2131 MiB +[12/28/2023-05:04:42] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 184 steps to complete. +[12/28/2023-05:04:42] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 85.5735ms to assign 16 blocks to 184 nodes requiring 155606016 bytes. +[12/28/2023-05:04:42] [I] [TRT] Total Activation Memory: 155606016 +[12/28/2023-05:04:46] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -1, now: CPU 1783, GPU 5776 (MiB) +[12/28/2023-05:04:46] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +75, GPU +128, now: CPU 75, GPU 128 (MiB) +[12/28/2023-05:04:47] [I] Engine built in 3410.28 sec. +[12/28/2023-05:04:48] [I] [TRT] Loaded engine size: 75 MiB +[12/28/2023-05:04:48] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1316, GPU 5357 (MiB) +[12/28/2023-05:04:48] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +74, now: CPU 0, GPU 74 (MiB) +[12/28/2023-05:04:48] [I] Engine deserialized in 0.200114 sec. +[12/28/2023-05:04:48] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1317, GPU 5357 (MiB) +[12/28/2023-05:04:48] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +148, now: CPU 0, GPU 222 (MiB) +[12/28/2023-05:04:48] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-05:04:48] [I] Using random values for input onnx::Cast_0 +[12/28/2023-05:04:48] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-05:04:48] [I] Using random values for output graph2_flat_predictions +[12/28/2023-05:04:48] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-05:04:48] [I] Starting inference +[12/28/2023-05:05:03] [I] Warmup completed 6 queries over 200 ms +[12/28/2023-05:05:03] [I] Timing trace has 560 queries over 15.0566 s +[12/28/2023-05:05:03] [I] +[12/28/2023-05:05:03] [I] === Trace details === +[12/28/2023-05:05:03] [I] Trace averages of 100 runs: +[12/28/2023-05:05:03] [I] Average on 100 runs - GPU latency: 26.7836 ms - Host latency: 26.8887 ms (enqueue 26.8405 ms) +[12/28/2023-05:05:03] [I] Average on 100 runs - GPU latency: 26.6616 ms - Host latency: 26.7672 ms (enqueue 26.7257 ms) +[12/28/2023-05:05:03] [I] Average on 100 runs - GPU latency: 27.0566 ms - Host latency: 27.1691 ms (enqueue 27.1178 ms) +[12/28/2023-05:05:03] [I] Average on 100 runs - GPU latency: 26.8462 ms - Host latency: 26.9542 ms (enqueue 26.9064 ms) +[12/28/2023-05:05:03] [I] Average on 100 runs - GPU latency: 26.4367 ms - Host latency: 26.5397 ms (enqueue 26.5044 ms) +[12/28/2023-05:05:03] [I] +[12/28/2023-05:05:03] [I] === Performance summary === +[12/28/2023-05:05:03] [I] Throughput: 37.193 qps +[12/28/2023-05:05:03] [I] Latency: min = 25.1719 ms, max = 31.8945 ms, mean = 26.8572 ms, median = 26.8267 ms, percentile(90%) = 27.713 ms, percentile(95%) = 28.2578 ms, percentile(99%) = 31.0742 ms +[12/28/2023-05:05:03] [I] Enqueue Time: min = 25.1318 ms, max = 31.8486 ms, mean = 26.812 ms, median = 26.7715 ms, percentile(90%) = 27.5806 ms, percentile(95%) = 28.2051 ms, percentile(99%) = 31.043 ms +[12/28/2023-05:05:03] [I] H2D Latency: min = 0.0795898 ms, max = 0.112305 ms, mean = 0.0899329 ms, median = 0.0897217 ms, percentile(90%) = 0.0953979 ms, percentile(95%) = 0.0966797 ms, percentile(99%) = 0.0986328 ms +[12/28/2023-05:05:03] [I] GPU Compute Time: min = 25.0645 ms, max = 31.7861 ms, mean = 26.7509 ms, median = 26.7131 ms, percentile(90%) = 27.6116 ms, percentile(95%) = 28.1631 ms, percentile(99%) = 30.9727 ms +[12/28/2023-05:05:03] [I] D2H Latency: min = 0.00292969 ms, max = 0.0419922 ms, mean = 0.0164229 ms, median = 0.0146484 ms, percentile(90%) = 0.0263672 ms, percentile(95%) = 0.0273438 ms, percentile(99%) = 0.0322266 ms +[12/28/2023-05:05:03] [I] Total Host Walltime: 15.0566 s +[12/28/2023-05:05:03] [I] Total GPU Compute Time: 14.9805 s +[12/28/2023-05:05:03] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-05:05:03] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp32.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp32.onnx.fp16.engine diff --git a/yolo_nas_pose_m_fp32.onnx.int8.engine b/yolo_nas_pose_m_fp32.onnx.int8.engine new file mode 100644 index 0000000000000000000000000000000000000000..4bb827a805ca83ca2e92a8e479fef2b11d47e2cb --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.int8.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3accfd3c7d9f3317f284d4ee19d201dcd9009f2dc8c4190cc9ac5b41175e41e5 +size 41432029 diff --git a/yolo_nas_pose_m_fp32.onnx.int8.engine.err b/yolo_nas_pose_m_fp32.onnx.int8.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..c39a64b93ead0347976567203e2e47dec05c2c48 --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.int8.engine.err @@ -0,0 +1,232 @@ +[12/28/2023-06:57:14] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-06:57:14] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-06:57:15] [W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool. +[12/28/2023-07:02:59] [W] [TRT] Tactic Device request: 6262MB Available: 2704MB. Device memory is insufficient to use tactic. +[12/28/2023-07:03:00] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6262 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:03:00] [W] [TRT] Tactic Device request: 6262MB Available: 2703MB. Device memory is insufficient to use tactic. +[12/28/2023-07:03:00] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6262 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:03:00] [W] [TRT] Tactic Device request: 6262MB Available: 2703MB. Device memory is insufficient to use tactic. +[12/28/2023-07:03:00] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6262 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:07:17] [W] [TRT] Tactic Device request: 4711MB Available: 2332MB. Device memory is insufficient to use tactic. +[12/28/2023-07:07:17] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:07:17] [W] [TRT] Tactic Device request: 4711MB Available: 2332MB. Device memory is insufficient to use tactic. +[12/28/2023-07:07:17] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:07:17] [W] [TRT] Tactic Device request: 4711MB Available: 2332MB. Device memory is insufficient to use tactic. +[12/28/2023-07:07:17] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:07:31] [W] [TRT] Tactic Device request: 6275MB Available: 2331MB. Device memory is insufficient to use tactic. +[12/28/2023-07:07:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:07:31] [W] [TRT] Tactic Device request: 6275MB Available: 2330MB. Device memory is insufficient to use tactic. +[12/28/2023-07:07:31] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6275 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:07:31] [W] [TRT] Tactic Device request: 6275MB Available: 2331MB. Device memory is insufficient to use tactic. +[12/28/2023-07:07:31] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6275 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:13:11] [W] [TRT] Tactic Device request: 7056MB Available: 2230MB. Device memory is insufficient to use tactic. +[12/28/2023-07:13:11] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:13:11] [W] [TRT] Tactic Device request: 7056MB Available: 2230MB. Device memory is insufficient to use tactic. +[12/28/2023-07:13:11] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:13:11] [W] [TRT] Tactic Device request: 7056MB Available: 2230MB. Device memory is insufficient to use tactic. +[12/28/2023-07:13:11] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:13:27] [W] [TRT] Tactic Device request: 6354MB Available: 2231MB. Device memory is insufficient to use tactic. +[12/28/2023-07:13:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:13:27] [W] [TRT] Tactic Device request: 6354MB Available: 2231MB. Device memory is insufficient to use tactic. +[12/28/2023-07:13:27] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 6354 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:13:27] [W] [TRT] Tactic Device request: 6354MB Available: 2231MB. Device memory is insufficient to use tactic. +[12/28/2023-07:13:27] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 6354 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:21:18] [W] [TRT] Tactic Device request: 2394MB Available: 2352MB. Device memory is insufficient to use tactic. +[12/28/2023-07:21:18] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:21:18] [W] [TRT] Tactic Device request: 2394MB Available: 2352MB. Device memory is insufficient to use tactic. +[12/28/2023-07:21:18] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:21:18] [W] [TRT] Tactic Device request: 2394MB Available: 2352MB. Device memory is insufficient to use tactic. +[12/28/2023-07:21:18] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:21:19] [W] [TRT] Tactic Device request: 2392MB Available: 2353MB. Device memory is insufficient to use tactic. +[12/28/2023-07:21:19] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:21:19] [W] [TRT] Tactic Device request: 2392MB Available: 2353MB. Device memory is insufficient to use tactic. +[12/28/2023-07:21:19] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:21:19] [W] [TRT] Tactic Device request: 2392MB Available: 2353MB. Device memory is insufficient to use tactic. +[12/28/2023-07:21:19] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:21:31] [W] [TRT] Tactic Device request: 4906MB Available: 2080MB. Device memory is insufficient to use tactic. +[12/28/2023-07:21:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:21:32] [W] [TRT] Tactic Device request: 4906MB Available: 2080MB. Device memory is insufficient to use tactic. +[12/28/2023-07:21:32] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4906 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:21:32] [W] [TRT] Tactic Device request: 4906MB Available: 2080MB. Device memory is insufficient to use tactic. +[12/28/2023-07:21:32] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:25:36] [W] [TRT] Tactic Device request: 4906MB Available: 2254MB. Device memory is insufficient to use tactic. +[12/28/2023-07:25:36] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:25:36] [W] [TRT] Tactic Device request: 4906MB Available: 2254MB. Device memory is insufficient to use tactic. +[12/28/2023-07:25:37] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4906 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:25:37] [W] [TRT] Tactic Device request: 4906MB Available: 2254MB. Device memory is insufficient to use tactic. +[12/28/2023-07:25:37] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4906 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:25:40] [W] [TRT] Tactic Device request: 2457MB Available: 2255MB. Device memory is insufficient to use tactic. +[12/28/2023-07:25:40] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:25:40] [W] [TRT] Tactic Device request: 2457MB Available: 2255MB. Device memory is insufficient to use tactic. +[12/28/2023-07:25:40] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:25:40] [W] [TRT] Tactic Device request: 2457MB Available: 2255MB. Device memory is insufficient to use tactic. +[12/28/2023-07:25:40] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:26:00] [W] [TRT] Tactic Device request: 3587MB Available: 2121MB. Device memory is insufficient to use tactic. +[12/28/2023-07:26:00] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:26:00] [W] [TRT] Tactic Device request: 3587MB Available: 2121MB. Device memory is insufficient to use tactic. +[12/28/2023-07:26:00] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:26:00] [W] [TRT] Tactic Device request: 3587MB Available: 2121MB. Device memory is insufficient to use tactic. +[12/28/2023-07:26:00] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:26:02] [W] [TRT] Tactic Device request: 2385MB Available: 2120MB. Device memory is insufficient to use tactic. +[12/28/2023-07:26:02] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:26:02] [W] [TRT] Tactic Device request: 2385MB Available: 2120MB. Device memory is insufficient to use tactic. +[12/28/2023-07:26:02] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:26:02] [W] [TRT] Tactic Device request: 2385MB Available: 2120MB. Device memory is insufficient to use tactic. +[12/28/2023-07:26:02] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:30:15] [W] [TRT] Tactic Device request: 3556MB Available: 1927MB. Device memory is insufficient to use tactic. +[12/28/2023-07:30:15] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:30:15] [W] [TRT] Tactic Device request: 3556MB Available: 1927MB. Device memory is insufficient to use tactic. +[12/28/2023-07:30:15] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:30:15] [W] [TRT] Tactic Device request: 3556MB Available: 1927MB. Device memory is insufficient to use tactic. +[12/28/2023-07:30:15] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:35:43] [W] [TRT] Tactic Device request: 2359MB Available: 1889MB. Device memory is insufficient to use tactic. +[12/28/2023-07:35:43] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:35:43] [W] [TRT] Tactic Device request: 2359MB Available: 1888MB. Device memory is insufficient to use tactic. +[12/28/2023-07:35:43] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2359 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:35:43] [W] [TRT] Tactic Device request: 2359MB Available: 1889MB. Device memory is insufficient to use tactic. +[12/28/2023-07:35:43] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:35:48] [W] [TRT] Tactic Device request: 2362MB Available: 1889MB. Device memory is insufficient to use tactic. +[12/28/2023-07:35:48] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2362 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:35:48] [W] [TRT] Tactic Device request: 2362MB Available: 1889MB. Device memory is insufficient to use tactic. +[12/28/2023-07:35:48] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2362 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:35:49] [W] [TRT] Tactic Device request: 2362MB Available: 1889MB. Device memory is insufficient to use tactic. +[12/28/2023-07:35:49] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2362 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:35:52] [W] [TRT] Tactic Device request: 2359MB Available: 1888MB. Device memory is insufficient to use tactic. +[12/28/2023-07:35:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:35:52] [W] [TRT] Tactic Device request: 2359MB Available: 1889MB. Device memory is insufficient to use tactic. +[12/28/2023-07:35:52] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2359 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:35:53] [W] [TRT] Tactic Device request: 2359MB Available: 1889MB. Device memory is insufficient to use tactic. +[12/28/2023-07:35:53] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2359 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:29] [W] [TRT] Tactic Device request: 3575MB Available: 1823MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3575 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:29] [W] [TRT] Tactic Device request: 3575MB Available: 1823MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:29] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3575 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:29] [W] [TRT] Tactic Device request: 3575MB Available: 1823MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:29] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3575 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:35] [W] [TRT] Tactic Device request: 2390MB Available: 1823MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:35] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:36] [W] [TRT] Tactic Device request: 2390MB Available: 1813MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:36] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:36] [W] [TRT] Tactic Device request: 2390MB Available: 1813MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:36] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:40] [W] [TRT] Tactic Device request: 2388MB Available: 1813MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:40] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2388 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:41] [W] [TRT] Tactic Device request: 2388MB Available: 1813MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:41] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2388 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:41] [W] [TRT] Tactic Device request: 2388MB Available: 1813MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:41] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2388 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:46] [W] [TRT] Tactic Device request: 4775MB Available: 1814MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4775 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:47] [W] [TRT] Tactic Device request: 4775MB Available: 1811MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:47] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4775 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:48] [W] [TRT] Tactic Device request: 4775MB Available: 1813MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:48] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4775 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:56] [W] [TRT] Tactic Device request: 4774MB Available: 1813MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:56] [W] [TRT] Tactic Device request: 4774MB Available: 1813MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:56] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4774 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:40:57] [W] [TRT] Tactic Device request: 4774MB Available: 1812MB. Device memory is insufficient to use tactic. +[12/28/2023-07:40:58] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:41:15] [W] [TRT] Tactic Device request: 4774MB Available: 1758MB. Device memory is insufficient to use tactic. +[12/28/2023-07:41:15] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:41:16] [W] [TRT] Tactic Device request: 4774MB Available: 1757MB. Device memory is insufficient to use tactic. +[12/28/2023-07:41:16] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 4774 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:41:17] [W] [TRT] Tactic Device request: 4774MB Available: 1756MB. Device memory is insufficient to use tactic. +[12/28/2023-07:41:17] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 4774 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:46:39] [W] [TRT] Tactic Device request: 2454MB Available: 1720MB. Device memory is insufficient to use tactic. +[12/28/2023-07:46:39] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:46:39] [W] [TRT] Tactic Device request: 2454MB Available: 1720MB. Device memory is insufficient to use tactic. +[12/28/2023-07:46:39] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2454 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:46:39] [W] [TRT] Tactic Device request: 2454MB Available: 1720MB. Device memory is insufficient to use tactic. +[12/28/2023-07:46:39] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:46:42] [W] [TRT] Tactic Device request: 2463MB Available: 1720MB. Device memory is insufficient to use tactic. +[12/28/2023-07:46:42] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:46:43] [W] [TRT] Tactic Device request: 2463MB Available: 1719MB. Device memory is insufficient to use tactic. +[12/28/2023-07:46:43] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2463 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:46:44] [W] [TRT] Tactic Device request: 2463MB Available: 1718MB. Device memory is insufficient to use tactic. +[12/28/2023-07:46:44] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:46:50] [W] [TRT] Tactic Device request: 2463MB Available: 1718MB. Device memory is insufficient to use tactic. +[12/28/2023-07:46:50] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:46:51] [W] [TRT] Tactic Device request: 2463MB Available: 1718MB. Device memory is insufficient to use tactic. +[12/28/2023-07:46:51] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2463 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:46:51] [W] [TRT] Tactic Device request: 2463MB Available: 1717MB. Device memory is insufficient to use tactic. +[12/28/2023-07:46:52] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2463 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-07:47:42] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-07:47:42] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-07:47:42] [W] * GPU compute time is unstable, with coefficient of variance = 3.2943%. +[12/28/2023-07:47:42] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_m_fp32.onnx.int8.engine.log b/yolo_nas_pose_m_fp32.onnx.int8.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..d33ffb0adb23b5e53e858357cce6a9784e3c8dd3 --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.int8.engine.log @@ -0,0 +1,314 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp32.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp32.onnx.int8.engine +[12/28/2023-06:57:02] [I] === Model Options === +[12/28/2023-06:57:02] [I] Format: ONNX +[12/28/2023-06:57:02] [I] Model: yolo_nas_pose_m_fp32.onnx +[12/28/2023-06:57:02] [I] Output: +[12/28/2023-06:57:02] [I] === Build Options === +[12/28/2023-06:57:02] [I] Max batch: explicit batch +[12/28/2023-06:57:02] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-06:57:02] [I] minTiming: 1 +[12/28/2023-06:57:02] [I] avgTiming: 8 +[12/28/2023-06:57:02] [I] Precision: FP32+INT8 +[12/28/2023-06:57:02] [I] LayerPrecisions: +[12/28/2023-06:57:02] [I] Calibration: Dynamic +[12/28/2023-06:57:02] [I] Refit: Disabled +[12/28/2023-06:57:02] [I] Sparsity: Disabled +[12/28/2023-06:57:02] [I] Safe mode: Disabled +[12/28/2023-06:57:02] [I] DirectIO mode: Disabled +[12/28/2023-06:57:02] [I] Restricted mode: Disabled +[12/28/2023-06:57:02] [I] Build only: Disabled +[12/28/2023-06:57:02] [I] Save engine: yolo_nas_pose_m_fp32.onnx.int8.engine +[12/28/2023-06:57:02] [I] Load engine: +[12/28/2023-06:57:02] [I] Profiling verbosity: 0 +[12/28/2023-06:57:02] [I] Tactic sources: Using default tactic sources +[12/28/2023-06:57:02] [I] timingCacheMode: local +[12/28/2023-06:57:02] [I] timingCacheFile: +[12/28/2023-06:57:02] [I] Heuristic: Disabled +[12/28/2023-06:57:02] [I] Preview Features: Use default preview flags. +[12/28/2023-06:57:02] [I] Input(s)s format: fp32:CHW +[12/28/2023-06:57:02] [I] Output(s)s format: fp32:CHW +[12/28/2023-06:57:02] [I] Input build shapes: model +[12/28/2023-06:57:02] [I] Input calibration shapes: model +[12/28/2023-06:57:02] [I] === System Options === +[12/28/2023-06:57:02] [I] Device: 0 +[12/28/2023-06:57:02] [I] DLACore: +[12/28/2023-06:57:02] [I] Plugins: +[12/28/2023-06:57:02] [I] === Inference Options === +[12/28/2023-06:57:02] [I] Batch: Explicit +[12/28/2023-06:57:02] [I] Input inference shapes: model +[12/28/2023-06:57:02] [I] Iterations: 10 +[12/28/2023-06:57:02] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-06:57:02] [I] Sleep time: 0ms +[12/28/2023-06:57:02] [I] Idle time: 0ms +[12/28/2023-06:57:02] [I] Streams: 1 +[12/28/2023-06:57:02] [I] ExposeDMA: Disabled +[12/28/2023-06:57:02] [I] Data transfers: Enabled +[12/28/2023-06:57:02] [I] Spin-wait: Disabled +[12/28/2023-06:57:02] [I] Multithreading: Disabled +[12/28/2023-06:57:02] [I] CUDA Graph: Disabled +[12/28/2023-06:57:02] [I] Separate profiling: Disabled +[12/28/2023-06:57:02] [I] Time Deserialize: Disabled +[12/28/2023-06:57:02] [I] Time Refit: Disabled +[12/28/2023-06:57:02] [I] NVTX verbosity: 0 +[12/28/2023-06:57:02] [I] Persistent Cache Ratio: 0 +[12/28/2023-06:57:02] [I] Inputs: +[12/28/2023-06:57:02] [I] === Reporting Options === +[12/28/2023-06:57:02] [I] Verbose: Disabled +[12/28/2023-06:57:02] [I] Averages: 100 inferences +[12/28/2023-06:57:02] [I] Percentiles: 90,95,99 +[12/28/2023-06:57:02] [I] Dump refittable layers:Disabled +[12/28/2023-06:57:02] [I] Dump output: Disabled +[12/28/2023-06:57:02] [I] Profile: Disabled +[12/28/2023-06:57:02] [I] Export timing to JSON file: +[12/28/2023-06:57:02] [I] Export output to JSON file: +[12/28/2023-06:57:02] [I] Export profile to JSON file: +[12/28/2023-06:57:02] [I] +[12/28/2023-06:57:02] [I] === Device Information === +[12/28/2023-06:57:02] [I] Selected Device: Orin +[12/28/2023-06:57:02] [I] Compute Capability: 8.7 +[12/28/2023-06:57:02] [I] SMs: 8 +[12/28/2023-06:57:02] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-06:57:02] [I] Device Global Memory: 7471 MiB +[12/28/2023-06:57:02] [I] Shared Memory per SM: 164 KiB +[12/28/2023-06:57:02] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-06:57:02] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-06:57:02] [I] +[12/28/2023-06:57:02] [I] TensorRT version: 8.5.2 +[12/28/2023-06:57:07] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3082 (MiB) +[12/28/2023-06:57:11] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +289, now: CPU 574, GPU 3392 (MiB) +[12/28/2023-06:57:11] [I] Start parsing network model +[12/28/2023-06:57:14] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-06:57:14] [I] [TRT] Input filename: yolo_nas_pose_m_fp32.onnx +[12/28/2023-06:57:14] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-06:57:14] [I] [TRT] Opset version: 17 +[12/28/2023-06:57:14] [I] [TRT] Producer name: pytorch +[12/28/2023-06:57:14] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-06:57:14] [I] [TRT] Domain: +[12/28/2023-06:57:14] [I] [TRT] Model version: 0 +[12/28/2023-06:57:14] [I] [TRT] Doc string: +[12/28/2023-06:57:14] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-06:57:15] [I] Finish parsing network model +[12/28/2023-06:57:15] [I] FP32 and INT8 precisions have been specified - more performance might be enabled by additionally specifying --fp16 or --best +[12/28/2023-06:57:15] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-06:57:15] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 407) [Constant] +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 408) [Constant] +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 409) [Constant] +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv1/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 208) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 216) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 247) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 270) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 301) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 324) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 332) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 411) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-06:57:15] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-06:57:27] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +262, now: CPU 1280, GPU 3928 (MiB) +[12/28/2023-06:57:29] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +77, now: CPU 1363, GPU 4005 (MiB) +[12/28/2023-06:57:29] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-07:47:10] [I] [TRT] Total Activation Memory: 7938088448 +[12/28/2023-07:47:10] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-07:47:21] [I] [TRT] Total Host Persistent Memory: 294880 +[12/28/2023-07:47:21] [I] [TRT] Total Device Persistent Memory: 38912 +[12/28/2023-07:47:21] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-07:47:21] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 59 MiB, GPU 2131 MiB +[12/28/2023-07:47:21] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 142 steps to complete. +[12/28/2023-07:47:21] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 43.1919ms to assign 13 blocks to 142 nodes requiring 144954880 bytes. +[12/28/2023-07:47:21] [I] [TRT] Total Activation Memory: 144954880 +[12/28/2023-07:47:25] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1749, GPU 5859 (MiB) +[12/28/2023-07:47:25] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +37, GPU +64, now: CPU 37, GPU 64 (MiB) +[12/28/2023-07:47:26] [I] Engine built in 3023.29 sec. +[12/28/2023-07:47:26] [I] [TRT] Loaded engine size: 39 MiB +[12/28/2023-07:47:26] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1284, GPU 5473 (MiB) +[12/28/2023-07:47:26] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +37, now: CPU 0, GPU 37 (MiB) +[12/28/2023-07:47:26] [I] Engine deserialized in 0.163177 sec. +[12/28/2023-07:47:26] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1284, GPU 5473 (MiB) +[12/28/2023-07:47:26] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +138, now: CPU 0, GPU 175 (MiB) +[12/28/2023-07:47:26] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-07:47:26] [I] Using random values for input onnx::Cast_0 +[12/28/2023-07:47:26] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-07:47:26] [I] Using random values for output graph2_flat_predictions +[12/28/2023-07:47:26] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-07:47:26] [I] Starting inference +[12/28/2023-07:47:42] [I] Warmup completed 2 queries over 200 ms +[12/28/2023-07:47:42] [I] Timing trace has 902 queries over 15.0397 s +[12/28/2023-07:47:42] [I] +[12/28/2023-07:47:42] [I] === Trace details === +[12/28/2023-07:47:42] [I] Trace averages of 100 runs: +[12/28/2023-07:47:42] [I] Average on 100 runs - GPU latency: 16.3472 ms - Host latency: 16.459 ms (enqueue 16.4251 ms) +[12/28/2023-07:47:42] [I] Average on 100 runs - GPU latency: 16.15 ms - Host latency: 16.2612 ms (enqueue 16.2291 ms) +[12/28/2023-07:47:42] [I] Average on 100 runs - GPU latency: 16.1256 ms - Host latency: 16.2354 ms (enqueue 16.2036 ms) +[12/28/2023-07:47:42] [I] Average on 100 runs - GPU latency: 16.1875 ms - Host latency: 16.2984 ms (enqueue 16.2633 ms) +[12/28/2023-07:47:42] [I] Average on 100 runs - GPU latency: 16.71 ms - Host latency: 16.8252 ms (enqueue 16.7681 ms) +[12/28/2023-07:47:42] [I] Average on 100 runs - GPU latency: 16.8472 ms - Host latency: 16.9645 ms (enqueue 16.9091 ms) +[12/28/2023-07:47:42] [I] Average on 100 runs - GPU latency: 16.7344 ms - Host latency: 16.8529 ms (enqueue 16.795 ms) +[12/28/2023-07:47:42] [I] Average on 100 runs - GPU latency: 16.8209 ms - Host latency: 16.938 ms (enqueue 16.8812 ms) +[12/28/2023-07:47:42] [I] Average on 100 runs - GPU latency: 16.808 ms - Host latency: 16.9249 ms (enqueue 16.8674 ms) +[12/28/2023-07:47:42] [I] +[12/28/2023-07:47:42] [I] === Performance summary === +[12/28/2023-07:47:42] [I] Throughput: 59.9746 qps +[12/28/2023-07:47:42] [I] Latency: min = 15.6501 ms, max = 21.7812 ms, mean = 16.6397 ms, median = 16.6777 ms, percentile(90%) = 17.0371 ms, percentile(95%) = 17.3477 ms, percentile(99%) = 18.3193 ms +[12/28/2023-07:47:42] [I] Enqueue Time: min = 15.6228 ms, max = 21.7477 ms, mean = 16.5933 ms, median = 16.6235 ms, percentile(90%) = 16.9873 ms, percentile(95%) = 17.2817 ms, percentile(99%) = 18.3242 ms +[12/28/2023-07:47:42] [I] H2D Latency: min = 0.0800781 ms, max = 0.183487 ms, mean = 0.0964996 ms, median = 0.0966797 ms, percentile(90%) = 0.0991211 ms, percentile(95%) = 0.0993652 ms, percentile(99%) = 0.10083 ms +[12/28/2023-07:47:42] [I] GPU Compute Time: min = 15.5415 ms, max = 21.581 ms, mean = 16.5254 ms, median = 16.561 ms, percentile(90%) = 16.9219 ms, percentile(95%) = 17.2334 ms, percentile(99%) = 18.2041 ms +[12/28/2023-07:47:42] [I] D2H Latency: min = 0.00292969 ms, max = 0.046875 ms, mean = 0.0178162 ms, median = 0.0195312 ms, percentile(90%) = 0.0253906 ms, percentile(95%) = 0.0263672 ms, percentile(99%) = 0.0361328 ms +[12/28/2023-07:47:42] [I] Total Host Walltime: 15.0397 s +[12/28/2023-07:47:42] [I] Total GPU Compute Time: 14.9059 s +[12/28/2023-07:47:42] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-07:47:42] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_fp32.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_fp32.onnx.int8.engine diff --git a/yolo_nas_pose_m_fp32.onnx.usage.txt b/yolo_nas_pose_m_fp32.onnx.usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..82c86d8a86bb1f2c568a7efd37c7bceeda694bb3 --- /dev/null +++ b/yolo_nas_pose_m_fp32.onnx.usage.txt @@ -0,0 +1,58 @@ + +Model exported successfully to yolo_nas_pose_m_fp32.onnx +Model expects input image of shape [1, 3, 640, 640] +Input image dtype is torch.uint8 + +Exported model already contains preprocessing (normalization) step, so you don't need to do it manually. +Preprocessing steps to be applied to input image are: +Sequential( + (0): CastTensorTo(dtype=torch.float32) + (1): ChannelSelect(channels_indexes=tensor([2, 1, 0])) + (2): ApplyMeanStd(mean=[0.], scale=[255.]) +) + + +Exported model contains postprocessing (NMS) step with the following parameters: + num_pre_nms_predictions=1000 + max_predictions_per_image=10 + nms_threshold=0.5 + confidence_threshold=0.15 + output_predictions_format=flat + + +Exported model is in ONNX format and can be used with ONNXRuntime +To run inference with ONNXRuntime, please use the following code snippet: + + import onnxruntime + import numpy as np + session = onnxruntime.InferenceSession("yolo_nas_pose_m_fp32.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + example_input_image = np.zeros((1, 3, 640, 640)).astype(np.uint8) + predictions = session.run(outputs, {inputs[0]: example_input_image}) + +Exported model can also be used with TensorRT +To run inference with TensorRT, please see TensorRT deployment documentation +You can benchmark the model using the following code snippet: + + trtexec --onnx=yolo_nas_pose_m_fp32.onnx --fp16 --avgRuns=100 --duration=15 + + +Exported model has predictions in flat format: + +# flat_predictions is a 2D array of [N,K] shape +# Each row represents (image_index, x_min, y_min, x_max, y_max, confidence, joints...) +# Please note all values are floats, so you have to convert them to integers if needed + +[flat_predictions] = predictions +pred_bboxes = flat_predictions[:, 1:5] +pred_scores = flat_predictions[:, 5] +pred_joints = flat_predictions[:, 6:].reshape((len(pred_bboxes), -1, 3)) +for i in range(len(pred_bboxes)): + confidence = pred_scores[i] + x_min, y_min, x_max, y_max = pred_bboxes[i] + print(f"Detected pose with confidence={{confidence}}, x_min={{x_min}}, y_min={{y_min}}, x_max={{x_max}}, y_max={{y_max}}") + for joint_index, (x, y, confidence) in enumerate(pred_joints[i]):") + print(f"Joint {{joint_index}} has coordinates x={{x}}, y={{y}}, confidence={{confidence}}") + diff --git a/yolo_nas_pose_m_int8.onnx b/yolo_nas_pose_m_int8.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4a9b0ddeb6e902be51f2981ed17eb4986c2a94a4 --- /dev/null +++ b/yolo_nas_pose_m_int8.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62ff9da802e83719d25b2e0040d2bac6c6e8bcce0904dc1e1be2a2eb397add97 +size 156781680 diff --git a/yolo_nas_pose_m_int8.onnx.best.engine b/yolo_nas_pose_m_int8.onnx.best.engine new file mode 100644 index 0000000000000000000000000000000000000000..b51d6816dae6ed10e0d8e572e3b90fef152b9214 --- /dev/null +++ b/yolo_nas_pose_m_int8.onnx.best.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:095580091b0635e20541e483c6357dbce9cf2ce19bfab25e1197c375f9ca1807 +size 41724504 diff --git a/yolo_nas_pose_m_int8.onnx.best.engine.err b/yolo_nas_pose_m_int8.onnx.best.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..9024b7907b4214505f5b71c187f522e79707eb29 --- /dev/null +++ b/yolo_nas_pose_m_int8.onnx.best.engine.err @@ -0,0 +1,7 @@ +[12/28/2023-10:38:46] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-10:38:46] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-10:38:50] [W] [TRT] Calibrator won't be used in explicit precision mode. Use quantization aware training to generate network with Quantize/Dequantize nodes. +[12/28/2023-11:17:13] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-11:17:13] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-11:17:13] [W] * GPU compute time is unstable, with coefficient of variance = 6.37849%. +[12/28/2023-11:17:13] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_m_int8.onnx.best.engine.log b/yolo_nas_pose_m_int8.onnx.best.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..6227b61b6c9c46ed8a93fbb1e4d18577c73cc42d --- /dev/null +++ b/yolo_nas_pose_m_int8.onnx.best.engine.log @@ -0,0 +1,333 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_int8.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_int8.onnx.best.engine +[12/28/2023-10:38:43] [I] === Model Options === +[12/28/2023-10:38:43] [I] Format: ONNX +[12/28/2023-10:38:43] [I] Model: yolo_nas_pose_m_int8.onnx +[12/28/2023-10:38:43] [I] Output: +[12/28/2023-10:38:43] [I] === Build Options === +[12/28/2023-10:38:43] [I] Max batch: explicit batch +[12/28/2023-10:38:43] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-10:38:43] [I] minTiming: 1 +[12/28/2023-10:38:43] [I] avgTiming: 8 +[12/28/2023-10:38:43] [I] Precision: FP32+FP16+INT8 +[12/28/2023-10:38:43] [I] LayerPrecisions: +[12/28/2023-10:38:43] [I] Calibration: Dynamic +[12/28/2023-10:38:43] [I] Refit: Disabled +[12/28/2023-10:38:43] [I] Sparsity: Disabled +[12/28/2023-10:38:43] [I] Safe mode: Disabled +[12/28/2023-10:38:43] [I] DirectIO mode: Disabled +[12/28/2023-10:38:43] [I] Restricted mode: Disabled +[12/28/2023-10:38:43] [I] Build only: Disabled +[12/28/2023-10:38:43] [I] Save engine: yolo_nas_pose_m_int8.onnx.best.engine +[12/28/2023-10:38:43] [I] Load engine: +[12/28/2023-10:38:43] [I] Profiling verbosity: 0 +[12/28/2023-10:38:43] [I] Tactic sources: Using default tactic sources +[12/28/2023-10:38:43] [I] timingCacheMode: local +[12/28/2023-10:38:43] [I] timingCacheFile: +[12/28/2023-10:38:43] [I] Heuristic: Disabled +[12/28/2023-10:38:43] [I] Preview Features: Use default preview flags. +[12/28/2023-10:38:43] [I] Input(s)s format: fp32:CHW +[12/28/2023-10:38:43] [I] Output(s)s format: fp32:CHW +[12/28/2023-10:38:43] [I] Input build shapes: model +[12/28/2023-10:38:43] [I] Input calibration shapes: model +[12/28/2023-10:38:43] [I] === System Options === +[12/28/2023-10:38:43] [I] Device: 0 +[12/28/2023-10:38:43] [I] DLACore: +[12/28/2023-10:38:43] [I] Plugins: +[12/28/2023-10:38:43] [I] === Inference Options === +[12/28/2023-10:38:43] [I] Batch: Explicit +[12/28/2023-10:38:43] [I] Input inference shapes: model +[12/28/2023-10:38:43] [I] Iterations: 10 +[12/28/2023-10:38:43] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-10:38:43] [I] Sleep time: 0ms +[12/28/2023-10:38:43] [I] Idle time: 0ms +[12/28/2023-10:38:43] [I] Streams: 1 +[12/28/2023-10:38:43] [I] ExposeDMA: Disabled +[12/28/2023-10:38:43] [I] Data transfers: Enabled +[12/28/2023-10:38:43] [I] Spin-wait: Disabled +[12/28/2023-10:38:43] [I] Multithreading: Disabled +[12/28/2023-10:38:43] [I] CUDA Graph: Disabled +[12/28/2023-10:38:43] [I] Separate profiling: Disabled +[12/28/2023-10:38:43] [I] Time Deserialize: Disabled +[12/28/2023-10:38:43] [I] Time Refit: Disabled +[12/28/2023-10:38:43] [I] NVTX verbosity: 0 +[12/28/2023-10:38:43] [I] Persistent Cache Ratio: 0 +[12/28/2023-10:38:43] [I] Inputs: +[12/28/2023-10:38:43] [I] === Reporting Options === +[12/28/2023-10:38:43] [I] Verbose: Disabled +[12/28/2023-10:38:43] [I] Averages: 100 inferences +[12/28/2023-10:38:43] [I] Percentiles: 90,95,99 +[12/28/2023-10:38:43] [I] Dump refittable layers:Disabled +[12/28/2023-10:38:43] [I] Dump output: Disabled +[12/28/2023-10:38:43] [I] Profile: Disabled +[12/28/2023-10:38:43] [I] Export timing to JSON file: +[12/28/2023-10:38:43] [I] Export output to JSON file: +[12/28/2023-10:38:43] [I] Export profile to JSON file: +[12/28/2023-10:38:43] [I] +[12/28/2023-10:38:43] [I] === Device Information === +[12/28/2023-10:38:43] [I] Selected Device: Orin +[12/28/2023-10:38:43] [I] Compute Capability: 8.7 +[12/28/2023-10:38:43] [I] SMs: 8 +[12/28/2023-10:38:43] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-10:38:43] [I] Device Global Memory: 7471 MiB +[12/28/2023-10:38:43] [I] Shared Memory per SM: 164 KiB +[12/28/2023-10:38:43] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-10:38:43] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-10:38:43] [I] +[12/28/2023-10:38:43] [I] TensorRT version: 8.5.2 +[12/28/2023-10:38:43] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2979 (MiB) +[12/28/2023-10:38:46] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3284 (MiB) +[12/28/2023-10:38:46] [I] Start parsing network model +[12/28/2023-10:38:46] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-10:38:46] [I] [TRT] Input filename: yolo_nas_pose_m_int8.onnx +[12/28/2023-10:38:46] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-10:38:46] [I] [TRT] Opset version: 17 +[12/28/2023-10:38:46] [I] [TRT] Producer name: pytorch +[12/28/2023-10:38:46] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-10:38:46] [I] [TRT] Domain: +[12/28/2023-10:38:46] [I] [TRT] Model version: 0 +[12/28/2023-10:38:46] [I] [TRT] Doc string: +[12/28/2023-10:38:46] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-10:38:50] [I] Finish parsing network model +[12/28/2023-10:38:54] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-10:38:54] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1274) [Constant] +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1275) [Constant] +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1276) [Constant] +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stem/conv/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stem.conv.rbr_reparam.weight + /model/backbone/stem/conv/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stem/conv/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.downsample.rbr_reparam.weight + /model/backbone/stage1/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/downsample/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv2.conv.weight + /model/backbone/stage1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv1.conv.weight + /model/backbone/stage1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 509) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 525) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv3.conv.weight + /model/backbone/stage1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv3/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_skip2.conv.weight + /model/neck/neck2/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.downsample.rbr_reparam.weight + /model/backbone/stage2/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/downsample/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.downsample.conv.weight + /model/neck/neck2/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/downsample/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv2.conv.weight + /model/backbone/stage2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv1.conv.weight + /model/backbone/stage2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 572) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 588) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 604) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_2 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv3.conv.weight + /model/backbone/stage2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv3/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip2.conv.weight + /model/neck/neck1/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip2/conv/Conv || model.neck.neck2.reduce_skip1.conv.weight + /model/neck/neck2/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.downsample.rbr_reparam.weight + /model/backbone/stage3/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/downsample/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.downsample.conv.weight + /model/neck/neck1/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/downsample/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv2.conv.weight + /model/backbone/stage3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv1.conv.weight + /model/backbone/stage3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 654) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 670) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 686) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 702) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.4.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.4.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 718) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_4 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_3 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_2 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv3.conv.weight + /model/backbone/stage3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv3/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip1.conv.weight + /model/neck/neck1/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.downsample.rbr_reparam.weight + /model/backbone/stage4/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/downsample/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv2.conv.weight + /model/backbone/stage4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv1.conv.weight + /model/backbone/stage4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 759) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 775) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv3.conv.weight + /model/backbone/stage4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv3/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv1.conv.weight + /model/backbone/context_module/cv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/m.2/MaxPool_output_0 copy +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv2.conv.weight + /model/backbone/context_module/cv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.conv.conv.weight + /model/neck/neck1/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/conv/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck1.upsample.weight + /model/neck/neck1/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_after_concat.conv.weight + /model/neck/neck1/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_after_concat/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv2.conv.weight + /model/neck/neck1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv1.conv.weight + /model/neck/neck1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 840) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 856) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv3.conv.weight + /model/neck/neck1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv3/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.conv.conv.weight + /model/neck/neck2/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/conv/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck2.upsample.weight + /model/neck/neck2/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/Concat_/model/neck/neck2/reduce_skip1/act/Relu_output_0_clone_1 copy +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_after_concat.conv.weight + /model/neck/neck2/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_after_concat/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv2.conv.weight + /model/neck/neck2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv1.conv.weight + /model/neck/neck2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 905) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 921) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 937) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv3.conv.weight + /model/neck/neck2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv3/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.bbox_stem.seq.conv.weight + /model/heads/head1/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/bbox_stem/seq/conv/Conv || model.heads.head1.pose_stem.seq.conv.weight + /model/heads/head1/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_stem/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.conv.conv.weight + /model/neck/neck3/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/conv/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_convs.0.seq.conv.weight + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head1.cls_convs.0.seq.conv.weight + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.0.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv2.conv.weight + /model/neck/neck3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv1.conv.weight + /model/neck/neck3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.cls_pred.weight + /model/heads/head1/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_pred/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_pred.weight + /model/heads/head1/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_pred/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.1.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_pred.weight + /model/heads/head1/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_pred/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 1002) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1043) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv3.conv.weight + /model/neck/neck3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv3/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_stem.seq.conv.weight + /model/heads/head2/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_stem/seq/conv/Conv || model.heads.head2.bbox_stem.seq.conv.weight + /model/heads/head2/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/bbox_stem/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.conv.conv.weight + /model/neck/neck4/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/conv/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_convs.0.seq.conv.weight + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head2.cls_convs.0.seq.conv.weight + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.0.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv2.conv.weight + /model/neck/neck4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv1.conv.weight + /model/neck/neck4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.cls_pred.weight + /model/heads/head2/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_pred/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_pred.weight + /model/heads/head2/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_pred/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.1.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_pred.weight + /model/heads/head2/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_pred/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 1108) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1149) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.2.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.2.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 1165) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv3.conv.weight + /model/neck/neck4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv3/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.bbox_stem.seq.conv.weight + /model/heads/head3/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/bbox_stem/seq/conv/Conv || model.heads.head3.pose_stem.seq.conv.weight + /model/heads/head3/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_stem/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_convs.0.seq.conv.weight + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head3.cls_convs.0.seq.conv.weight + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.0.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.cls_pred.weight + /model/heads/head3/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_pred/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_pred.weight + /model/heads/head3/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_pred/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.1.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.2.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_pred.weight + /model/heads/head3/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_pred/Conv +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] NMS: batched_nms_250 +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 1278) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-10:38:54] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-10:39:08] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +490, now: CPU 1413, GPU 4003 (MiB) +[12/28/2023-10:39:10] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +74, now: CPU 1496, GPU 4077 (MiB) +[12/28/2023-10:39:10] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-11:16:43] [I] [TRT] Total Activation Memory: 7962418688 +[12/28/2023-11:16:43] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-11:16:52] [I] [TRT] Total Host Persistent Memory: 320224 +[12/28/2023-11:16:52] [I] [TRT] Total Device Persistent Memory: 38912 +[12/28/2023-11:16:52] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-11:16:52] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 188 MiB, GPU 304 MiB +[12/28/2023-11:16:52] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 165 steps to complete. +[12/28/2023-11:16:52] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 49.1135ms to assign 14 blocks to 165 nodes requiring 151026176 bytes. +[12/28/2023-11:16:52] [I] [TRT] Total Activation Memory: 151026176 +[12/28/2023-11:16:56] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1, GPU +0, now: CPU 1870, GPU 5260 (MiB) +[12/28/2023-11:16:56] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +37, GPU +64, now: CPU 37, GPU 64 (MiB) +[12/28/2023-11:16:57] [I] Engine built in 2293.98 sec. +[12/28/2023-11:16:57] [I] [TRT] Loaded engine size: 39 MiB +[12/28/2023-11:16:57] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -1, now: CPU 1274, GPU 5179 (MiB) +[12/28/2023-11:16:57] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +37, now: CPU 0, GPU 37 (MiB) +[12/28/2023-11:16:58] [I] Engine deserialized in 0.373087 sec. +[12/28/2023-11:16:58] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1274, GPU 5179 (MiB) +[12/28/2023-11:16:58] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +144, now: CPU 0, GPU 181 (MiB) +[12/28/2023-11:16:58] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-11:16:58] [I] Using random values for input onnx::Cast_0 +[12/28/2023-11:16:58] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-11:16:58] [I] Using random values for output graph2_flat_predictions +[12/28/2023-11:16:58] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-11:16:58] [I] Starting inference +[12/28/2023-11:17:13] [I] Warmup completed 7 queries over 200 ms +[12/28/2023-11:17:13] [I] Timing trace has 674 queries over 15.0431 s +[12/28/2023-11:17:13] [I] +[12/28/2023-11:17:13] [I] === Trace details === +[12/28/2023-11:17:13] [I] Trace averages of 100 runs: +[12/28/2023-11:17:13] [I] Average on 100 runs - GPU latency: 22.5286 ms - Host latency: 22.6467 ms (enqueue 22.5798 ms) +[12/28/2023-11:17:13] [I] Average on 100 runs - GPU latency: 21.9606 ms - Host latency: 22.0766 ms (enqueue 22.0285 ms) +[12/28/2023-11:17:13] [I] Average on 100 runs - GPU latency: 21.7562 ms - Host latency: 21.8686 ms (enqueue 21.8288 ms) +[12/28/2023-11:17:13] [I] Average on 100 runs - GPU latency: 22.3657 ms - Host latency: 22.4842 ms (enqueue 22.4252 ms) +[12/28/2023-11:17:13] [I] Average on 100 runs - GPU latency: 22.2386 ms - Host latency: 22.3543 ms (enqueue 22.2991 ms) +[12/28/2023-11:17:13] [I] Average on 100 runs - GPU latency: 22.0637 ms - Host latency: 22.1763 ms (enqueue 22.1317 ms) +[12/28/2023-11:17:13] [I] +[12/28/2023-11:17:13] [I] === Performance summary === +[12/28/2023-11:17:13] [I] Throughput: 44.8046 qps +[12/28/2023-11:17:13] [I] Latency: min = 20.2925 ms, max = 33.0699 ms, mean = 22.2836 ms, median = 22.1313 ms, percentile(90%) = 22.8691 ms, percentile(95%) = 23.7305 ms, percentile(99%) = 30.8477 ms +[12/28/2023-11:17:13] [I] Enqueue Time: min = 20.2573 ms, max = 32.9944 ms, mean = 22.23 ms, median = 22.0717 ms, percentile(90%) = 22.8018 ms, percentile(95%) = 23.6631 ms, percentile(99%) = 29.8916 ms +[12/28/2023-11:17:13] [I] H2D Latency: min = 0.0800781 ms, max = 0.120361 ms, mean = 0.0955699 ms, median = 0.0957031 ms, percentile(90%) = 0.0986328 ms, percentile(95%) = 0.0991211 ms, percentile(99%) = 0.100586 ms +[12/28/2023-11:17:13] [I] GPU Compute Time: min = 20.1782 ms, max = 32.9492 ms, mean = 22.1676 ms, median = 22.0125 ms, percentile(90%) = 22.7468 ms, percentile(95%) = 23.6074 ms, percentile(99%) = 30.7334 ms +[12/28/2023-11:17:13] [I] D2H Latency: min = 0.00292969 ms, max = 0.0585938 ms, mean = 0.020397 ms, median = 0.0224609 ms, percentile(90%) = 0.0273438 ms, percentile(95%) = 0.0292969 ms, percentile(99%) = 0.0351562 ms +[12/28/2023-11:17:13] [I] Total Host Walltime: 15.0431 s +[12/28/2023-11:17:13] [I] Total GPU Compute Time: 14.941 s +[12/28/2023-11:17:13] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-11:17:13] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_int8.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_int8.onnx.best.engine diff --git a/yolo_nas_pose_m_int8.onnx.engine.err b/yolo_nas_pose_m_int8.onnx.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..eb815d6404a5aaa7e739d77ba74f973071be41ca --- /dev/null +++ b/yolo_nas_pose_m_int8.onnx.engine.err @@ -0,0 +1,8 @@ +[12/28/2023-10:38:30] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-10:38:30] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-10:38:34] [E] Error[4]: [network.cpp::validate::2922] Error Code 4: Internal Error (Int8 precision has been set for a layer or layer output, but int8 is not configured in the builder) +[12/28/2023-10:38:34] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/28/2023-10:38:34] [E] Engine could not be created from network +[12/28/2023-10:38:34] [E] Building engine failed +[12/28/2023-10:38:34] [E] Failed to create engine from model or file. +[12/28/2023-10:38:34] [E] Engine set up failed diff --git a/yolo_nas_pose_m_int8.onnx.engine.log b/yolo_nas_pose_m_int8.onnx.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..a06fa2805612a15ef1ceb0397e884c318a2979c1 --- /dev/null +++ b/yolo_nas_pose_m_int8.onnx.engine.log @@ -0,0 +1,91 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_int8.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_int8.onnx.engine +[12/28/2023-10:38:25] [I] === Model Options === +[12/28/2023-10:38:25] [I] Format: ONNX +[12/28/2023-10:38:25] [I] Model: yolo_nas_pose_m_int8.onnx +[12/28/2023-10:38:25] [I] Output: +[12/28/2023-10:38:25] [I] === Build Options === +[12/28/2023-10:38:25] [I] Max batch: explicit batch +[12/28/2023-10:38:25] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-10:38:25] [I] minTiming: 1 +[12/28/2023-10:38:25] [I] avgTiming: 8 +[12/28/2023-10:38:25] [I] Precision: FP32 +[12/28/2023-10:38:25] [I] LayerPrecisions: +[12/28/2023-10:38:25] [I] Calibration: +[12/28/2023-10:38:25] [I] Refit: Disabled +[12/28/2023-10:38:25] [I] Sparsity: Disabled +[12/28/2023-10:38:25] [I] Safe mode: Disabled +[12/28/2023-10:38:25] [I] DirectIO mode: Disabled +[12/28/2023-10:38:25] [I] Restricted mode: Disabled +[12/28/2023-10:38:25] [I] Build only: Disabled +[12/28/2023-10:38:25] [I] Save engine: yolo_nas_pose_m_int8.onnx.engine +[12/28/2023-10:38:25] [I] Load engine: +[12/28/2023-10:38:25] [I] Profiling verbosity: 0 +[12/28/2023-10:38:25] [I] Tactic sources: Using default tactic sources +[12/28/2023-10:38:25] [I] timingCacheMode: local +[12/28/2023-10:38:25] [I] timingCacheFile: +[12/28/2023-10:38:25] [I] Heuristic: Disabled +[12/28/2023-10:38:25] [I] Preview Features: Use default preview flags. +[12/28/2023-10:38:25] [I] Input(s)s format: fp32:CHW +[12/28/2023-10:38:25] [I] Output(s)s format: fp32:CHW +[12/28/2023-10:38:25] [I] Input build shapes: model +[12/28/2023-10:38:25] [I] Input calibration shapes: model +[12/28/2023-10:38:25] [I] === System Options === +[12/28/2023-10:38:25] [I] Device: 0 +[12/28/2023-10:38:25] [I] DLACore: +[12/28/2023-10:38:25] [I] Plugins: +[12/28/2023-10:38:25] [I] === Inference Options === +[12/28/2023-10:38:25] [I] Batch: Explicit +[12/28/2023-10:38:25] [I] Input inference shapes: model +[12/28/2023-10:38:25] [I] Iterations: 10 +[12/28/2023-10:38:25] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-10:38:25] [I] Sleep time: 0ms +[12/28/2023-10:38:25] [I] Idle time: 0ms +[12/28/2023-10:38:25] [I] Streams: 1 +[12/28/2023-10:38:25] [I] ExposeDMA: Disabled +[12/28/2023-10:38:25] [I] Data transfers: Enabled +[12/28/2023-10:38:25] [I] Spin-wait: Disabled +[12/28/2023-10:38:25] [I] Multithreading: Disabled +[12/28/2023-10:38:25] [I] CUDA Graph: Disabled +[12/28/2023-10:38:25] [I] Separate profiling: Disabled +[12/28/2023-10:38:25] [I] Time Deserialize: Disabled +[12/28/2023-10:38:25] [I] Time Refit: Disabled +[12/28/2023-10:38:25] [I] NVTX verbosity: 0 +[12/28/2023-10:38:25] [I] Persistent Cache Ratio: 0 +[12/28/2023-10:38:25] [I] Inputs: +[12/28/2023-10:38:25] [I] === Reporting Options === +[12/28/2023-10:38:25] [I] Verbose: Disabled +[12/28/2023-10:38:25] [I] Averages: 100 inferences +[12/28/2023-10:38:25] [I] Percentiles: 90,95,99 +[12/28/2023-10:38:25] [I] Dump refittable layers:Disabled +[12/28/2023-10:38:25] [I] Dump output: Disabled +[12/28/2023-10:38:25] [I] Profile: Disabled +[12/28/2023-10:38:25] [I] Export timing to JSON file: +[12/28/2023-10:38:25] [I] Export output to JSON file: +[12/28/2023-10:38:25] [I] Export profile to JSON file: +[12/28/2023-10:38:25] [I] +[12/28/2023-10:38:25] [I] === Device Information === +[12/28/2023-10:38:25] [I] Selected Device: Orin +[12/28/2023-10:38:25] [I] Compute Capability: 8.7 +[12/28/2023-10:38:25] [I] SMs: 8 +[12/28/2023-10:38:25] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-10:38:25] [I] Device Global Memory: 7471 MiB +[12/28/2023-10:38:25] [I] Shared Memory per SM: 164 KiB +[12/28/2023-10:38:25] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-10:38:25] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-10:38:25] [I] +[12/28/2023-10:38:25] [I] TensorRT version: 8.5.2 +[12/28/2023-10:38:25] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2980 (MiB) +[12/28/2023-10:38:28] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +286, now: CPU 574, GPU 3286 (MiB) +[12/28/2023-10:38:28] [I] Start parsing network model +[12/28/2023-10:38:30] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-10:38:30] [I] [TRT] Input filename: yolo_nas_pose_m_int8.onnx +[12/28/2023-10:38:30] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-10:38:30] [I] [TRT] Opset version: 17 +[12/28/2023-10:38:30] [I] [TRT] Producer name: pytorch +[12/28/2023-10:38:30] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-10:38:30] [I] [TRT] Domain: +[12/28/2023-10:38:30] [I] [TRT] Model version: 0 +[12/28/2023-10:38:30] [I] [TRT] Doc string: +[12/28/2023-10:38:30] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-10:38:34] [I] Finish parsing network model +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_int8.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_int8.onnx.engine diff --git a/yolo_nas_pose_m_int8.onnx.fp16.engine.err b/yolo_nas_pose_m_int8.onnx.fp16.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..8bca21abaa9dec234dcfa9110626f95a7818df66 --- /dev/null +++ b/yolo_nas_pose_m_int8.onnx.fp16.engine.err @@ -0,0 +1,8 @@ +[12/28/2023-10:38:39] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-10:38:39] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-10:38:42] [E] Error[4]: [network.cpp::validate::2922] Error Code 4: Internal Error (Int8 precision has been set for a layer or layer output, but int8 is not configured in the builder) +[12/28/2023-10:38:42] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/28/2023-10:38:42] [E] Engine could not be created from network +[12/28/2023-10:38:42] [E] Building engine failed +[12/28/2023-10:38:42] [E] Failed to create engine from model or file. +[12/28/2023-10:38:42] [E] Engine set up failed diff --git a/yolo_nas_pose_m_int8.onnx.fp16.engine.log b/yolo_nas_pose_m_int8.onnx.fp16.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..44cfb13bf28b8f6e907719f6c83fc67212225299 --- /dev/null +++ b/yolo_nas_pose_m_int8.onnx.fp16.engine.log @@ -0,0 +1,91 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_int8.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_int8.onnx.fp16.engine +[12/28/2023-10:38:35] [I] === Model Options === +[12/28/2023-10:38:35] [I] Format: ONNX +[12/28/2023-10:38:35] [I] Model: yolo_nas_pose_m_int8.onnx +[12/28/2023-10:38:35] [I] Output: +[12/28/2023-10:38:35] [I] === Build Options === +[12/28/2023-10:38:35] [I] Max batch: explicit batch +[12/28/2023-10:38:35] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-10:38:35] [I] minTiming: 1 +[12/28/2023-10:38:35] [I] avgTiming: 8 +[12/28/2023-10:38:35] [I] Precision: FP32+FP16 +[12/28/2023-10:38:35] [I] LayerPrecisions: +[12/28/2023-10:38:35] [I] Calibration: +[12/28/2023-10:38:35] [I] Refit: Disabled +[12/28/2023-10:38:35] [I] Sparsity: Disabled +[12/28/2023-10:38:35] [I] Safe mode: Disabled +[12/28/2023-10:38:35] [I] DirectIO mode: Disabled +[12/28/2023-10:38:35] [I] Restricted mode: Disabled +[12/28/2023-10:38:35] [I] Build only: Disabled +[12/28/2023-10:38:35] [I] Save engine: yolo_nas_pose_m_int8.onnx.fp16.engine +[12/28/2023-10:38:35] [I] Load engine: +[12/28/2023-10:38:35] [I] Profiling verbosity: 0 +[12/28/2023-10:38:35] [I] Tactic sources: Using default tactic sources +[12/28/2023-10:38:35] [I] timingCacheMode: local +[12/28/2023-10:38:35] [I] timingCacheFile: +[12/28/2023-10:38:35] [I] Heuristic: Disabled +[12/28/2023-10:38:35] [I] Preview Features: Use default preview flags. +[12/28/2023-10:38:35] [I] Input(s)s format: fp32:CHW +[12/28/2023-10:38:35] [I] Output(s)s format: fp32:CHW +[12/28/2023-10:38:35] [I] Input build shapes: model +[12/28/2023-10:38:35] [I] Input calibration shapes: model +[12/28/2023-10:38:35] [I] === System Options === +[12/28/2023-10:38:35] [I] Device: 0 +[12/28/2023-10:38:35] [I] DLACore: +[12/28/2023-10:38:35] [I] Plugins: +[12/28/2023-10:38:35] [I] === Inference Options === +[12/28/2023-10:38:35] [I] Batch: Explicit +[12/28/2023-10:38:35] [I] Input inference shapes: model +[12/28/2023-10:38:35] [I] Iterations: 10 +[12/28/2023-10:38:35] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-10:38:35] [I] Sleep time: 0ms +[12/28/2023-10:38:35] [I] Idle time: 0ms +[12/28/2023-10:38:35] [I] Streams: 1 +[12/28/2023-10:38:35] [I] ExposeDMA: Disabled +[12/28/2023-10:38:35] [I] Data transfers: Enabled +[12/28/2023-10:38:35] [I] Spin-wait: Disabled +[12/28/2023-10:38:35] [I] Multithreading: Disabled +[12/28/2023-10:38:35] [I] CUDA Graph: Disabled +[12/28/2023-10:38:35] [I] Separate profiling: Disabled +[12/28/2023-10:38:35] [I] Time Deserialize: Disabled +[12/28/2023-10:38:35] [I] Time Refit: Disabled +[12/28/2023-10:38:35] [I] NVTX verbosity: 0 +[12/28/2023-10:38:35] [I] Persistent Cache Ratio: 0 +[12/28/2023-10:38:35] [I] Inputs: +[12/28/2023-10:38:35] [I] === Reporting Options === +[12/28/2023-10:38:35] [I] Verbose: Disabled +[12/28/2023-10:38:35] [I] Averages: 100 inferences +[12/28/2023-10:38:35] [I] Percentiles: 90,95,99 +[12/28/2023-10:38:35] [I] Dump refittable layers:Disabled +[12/28/2023-10:38:35] [I] Dump output: Disabled +[12/28/2023-10:38:35] [I] Profile: Disabled +[12/28/2023-10:38:35] [I] Export timing to JSON file: +[12/28/2023-10:38:35] [I] Export output to JSON file: +[12/28/2023-10:38:35] [I] Export profile to JSON file: +[12/28/2023-10:38:35] [I] +[12/28/2023-10:38:35] [I] === Device Information === +[12/28/2023-10:38:35] [I] Selected Device: Orin +[12/28/2023-10:38:35] [I] Compute Capability: 8.7 +[12/28/2023-10:38:35] [I] SMs: 8 +[12/28/2023-10:38:35] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-10:38:35] [I] Device Global Memory: 7471 MiB +[12/28/2023-10:38:35] [I] Shared Memory per SM: 164 KiB +[12/28/2023-10:38:35] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-10:38:35] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-10:38:35] [I] +[12/28/2023-10:38:35] [I] TensorRT version: 8.5.2 +[12/28/2023-10:38:35] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2979 (MiB) +[12/28/2023-10:38:38] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3283 (MiB) +[12/28/2023-10:38:38] [I] Start parsing network model +[12/28/2023-10:38:38] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-10:38:38] [I] [TRT] Input filename: yolo_nas_pose_m_int8.onnx +[12/28/2023-10:38:38] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-10:38:38] [I] [TRT] Opset version: 17 +[12/28/2023-10:38:38] [I] [TRT] Producer name: pytorch +[12/28/2023-10:38:38] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-10:38:38] [I] [TRT] Domain: +[12/28/2023-10:38:38] [I] [TRT] Model version: 0 +[12/28/2023-10:38:38] [I] [TRT] Doc string: +[12/28/2023-10:38:38] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-10:38:42] [I] Finish parsing network model +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_int8.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_int8.onnx.fp16.engine diff --git a/yolo_nas_pose_m_int8.onnx.int8.engine b/yolo_nas_pose_m_int8.onnx.int8.engine new file mode 100644 index 0000000000000000000000000000000000000000..df6a1d1d7ee599d6d584b90e9db09c81e69be2b1 --- /dev/null +++ b/yolo_nas_pose_m_int8.onnx.int8.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:649a3dfa56cfade0026b0d476fab2983d60458937e7431604527c0fefc3c853c +size 41664431 diff --git a/yolo_nas_pose_m_int8.onnx.int8.engine.err b/yolo_nas_pose_m_int8.onnx.int8.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..1fd63fbe685e55f0b54da7740bb7d5dde224fabe --- /dev/null +++ b/yolo_nas_pose_m_int8.onnx.int8.engine.err @@ -0,0 +1,7 @@ +[12/28/2023-11:17:20] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-11:17:20] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-11:17:24] [W] [TRT] Calibrator won't be used in explicit precision mode. Use quantization aware training to generate network with Quantize/Dequantize nodes. +[12/28/2023-11:29:15] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-11:29:15] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-11:29:15] [W] * GPU compute time is unstable, with coefficient of variance = 6.95793%. +[12/28/2023-11:29:15] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_m_int8.onnx.int8.engine.log b/yolo_nas_pose_m_int8.onnx.int8.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..722f79bfa74f0902553794a7664aaf49e49979da --- /dev/null +++ b/yolo_nas_pose_m_int8.onnx.int8.engine.log @@ -0,0 +1,333 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_int8.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_int8.onnx.int8.engine +[12/28/2023-11:17:16] [I] === Model Options === +[12/28/2023-11:17:16] [I] Format: ONNX +[12/28/2023-11:17:16] [I] Model: yolo_nas_pose_m_int8.onnx +[12/28/2023-11:17:16] [I] Output: +[12/28/2023-11:17:16] [I] === Build Options === +[12/28/2023-11:17:16] [I] Max batch: explicit batch +[12/28/2023-11:17:16] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-11:17:16] [I] minTiming: 1 +[12/28/2023-11:17:16] [I] avgTiming: 8 +[12/28/2023-11:17:16] [I] Precision: FP32+INT8 +[12/28/2023-11:17:16] [I] LayerPrecisions: +[12/28/2023-11:17:16] [I] Calibration: Dynamic +[12/28/2023-11:17:16] [I] Refit: Disabled +[12/28/2023-11:17:16] [I] Sparsity: Disabled +[12/28/2023-11:17:16] [I] Safe mode: Disabled +[12/28/2023-11:17:16] [I] DirectIO mode: Disabled +[12/28/2023-11:17:16] [I] Restricted mode: Disabled +[12/28/2023-11:17:16] [I] Build only: Disabled +[12/28/2023-11:17:16] [I] Save engine: yolo_nas_pose_m_int8.onnx.int8.engine +[12/28/2023-11:17:16] [I] Load engine: +[12/28/2023-11:17:16] [I] Profiling verbosity: 0 +[12/28/2023-11:17:16] [I] Tactic sources: Using default tactic sources +[12/28/2023-11:17:16] [I] timingCacheMode: local +[12/28/2023-11:17:16] [I] timingCacheFile: +[12/28/2023-11:17:16] [I] Heuristic: Disabled +[12/28/2023-11:17:16] [I] Preview Features: Use default preview flags. +[12/28/2023-11:17:16] [I] Input(s)s format: fp32:CHW +[12/28/2023-11:17:16] [I] Output(s)s format: fp32:CHW +[12/28/2023-11:17:16] [I] Input build shapes: model +[12/28/2023-11:17:16] [I] Input calibration shapes: model +[12/28/2023-11:17:16] [I] === System Options === +[12/28/2023-11:17:16] [I] Device: 0 +[12/28/2023-11:17:16] [I] DLACore: +[12/28/2023-11:17:16] [I] Plugins: +[12/28/2023-11:17:16] [I] === Inference Options === +[12/28/2023-11:17:16] [I] Batch: Explicit +[12/28/2023-11:17:16] [I] Input inference shapes: model +[12/28/2023-11:17:16] [I] Iterations: 10 +[12/28/2023-11:17:16] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-11:17:16] [I] Sleep time: 0ms +[12/28/2023-11:17:16] [I] Idle time: 0ms +[12/28/2023-11:17:16] [I] Streams: 1 +[12/28/2023-11:17:16] [I] ExposeDMA: Disabled +[12/28/2023-11:17:16] [I] Data transfers: Enabled +[12/28/2023-11:17:16] [I] Spin-wait: Disabled +[12/28/2023-11:17:16] [I] Multithreading: Disabled +[12/28/2023-11:17:16] [I] CUDA Graph: Disabled +[12/28/2023-11:17:16] [I] Separate profiling: Disabled +[12/28/2023-11:17:16] [I] Time Deserialize: Disabled +[12/28/2023-11:17:16] [I] Time Refit: Disabled +[12/28/2023-11:17:16] [I] NVTX verbosity: 0 +[12/28/2023-11:17:16] [I] Persistent Cache Ratio: 0 +[12/28/2023-11:17:16] [I] Inputs: +[12/28/2023-11:17:16] [I] === Reporting Options === +[12/28/2023-11:17:16] [I] Verbose: Disabled +[12/28/2023-11:17:16] [I] Averages: 100 inferences +[12/28/2023-11:17:16] [I] Percentiles: 90,95,99 +[12/28/2023-11:17:16] [I] Dump refittable layers:Disabled +[12/28/2023-11:17:16] [I] Dump output: Disabled +[12/28/2023-11:17:16] [I] Profile: Disabled +[12/28/2023-11:17:16] [I] Export timing to JSON file: +[12/28/2023-11:17:16] [I] Export output to JSON file: +[12/28/2023-11:17:16] [I] Export profile to JSON file: +[12/28/2023-11:17:16] [I] +[12/28/2023-11:17:16] [I] === Device Information === +[12/28/2023-11:17:16] [I] Selected Device: Orin +[12/28/2023-11:17:16] [I] Compute Capability: 8.7 +[12/28/2023-11:17:16] [I] SMs: 8 +[12/28/2023-11:17:16] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-11:17:16] [I] Device Global Memory: 7471 MiB +[12/28/2023-11:17:16] [I] Shared Memory per SM: 164 KiB +[12/28/2023-11:17:16] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-11:17:16] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-11:17:16] [I] +[12/28/2023-11:17:16] [I] TensorRT version: 8.5.2 +[12/28/2023-11:17:17] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3000 (MiB) +[12/28/2023-11:17:20] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3306 (MiB) +[12/28/2023-11:17:20] [I] Start parsing network model +[12/28/2023-11:17:20] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-11:17:20] [I] [TRT] Input filename: yolo_nas_pose_m_int8.onnx +[12/28/2023-11:17:20] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-11:17:20] [I] [TRT] Opset version: 17 +[12/28/2023-11:17:20] [I] [TRT] Producer name: pytorch +[12/28/2023-11:17:20] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-11:17:20] [I] [TRT] Domain: +[12/28/2023-11:17:20] [I] [TRT] Model version: 0 +[12/28/2023-11:17:20] [I] [TRT] Doc string: +[12/28/2023-11:17:20] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-11:17:24] [I] Finish parsing network model +[12/28/2023-11:17:24] [I] FP32 and INT8 precisions have been specified - more performance might be enabled by additionally specifying --fp16 or --best +[12/28/2023-11:17:28] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-11:17:28] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1274) [Constant] +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1275) [Constant] +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1276) [Constant] +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stem/conv/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stem.conv.rbr_reparam.weight + /model/backbone/stem/conv/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stem/conv/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.downsample.rbr_reparam.weight + /model/backbone/stage1/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/downsample/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv2.conv.weight + /model/backbone/stage1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv1.conv.weight + /model/backbone/stage1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 509) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 525) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv3.conv.weight + /model/backbone/stage1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv3/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_skip2.conv.weight + /model/neck/neck2/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.downsample.rbr_reparam.weight + /model/backbone/stage2/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/downsample/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.downsample.conv.weight + /model/neck/neck2/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/downsample/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv2.conv.weight + /model/backbone/stage2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv1.conv.weight + /model/backbone/stage2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 572) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 588) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 604) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_2 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv3.conv.weight + /model/backbone/stage2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv3/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip2.conv.weight + /model/neck/neck1/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip2/conv/Conv || model.neck.neck2.reduce_skip1.conv.weight + /model/neck/neck2/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.downsample.rbr_reparam.weight + /model/backbone/stage3/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/downsample/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.downsample.conv.weight + /model/neck/neck1/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/downsample/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv2.conv.weight + /model/backbone/stage3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv1.conv.weight + /model/backbone/stage3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 654) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 670) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 686) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 702) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.4.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.4.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 718) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_4 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_3 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_2 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv3/conv/_input_quantizer/QuantizeLinear_clone_0 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv3.conv.weight + /model/backbone/stage3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv3/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip1.conv.weight + /model/neck/neck1/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.downsample.rbr_reparam.weight + /model/backbone/stage4/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/downsample/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv2.conv.weight + /model/backbone/stage4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv1.conv.weight + /model/backbone/stage4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 759) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 775) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv3.conv.weight + /model/backbone/stage4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv3/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv1.conv.weight + /model/backbone/context_module/cv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/m.2/MaxPool_output_0 copy +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv2.conv.weight + /model/backbone/context_module/cv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.conv.conv.weight + /model/neck/neck1/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/conv/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck1.upsample.weight + /model/neck/neck1/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_after_concat.conv.weight + /model/neck/neck1/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_after_concat/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv2.conv.weight + /model/neck/neck1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv1.conv.weight + /model/neck/neck1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 840) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 856) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv3.conv.weight + /model/neck/neck1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv3/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.conv.conv.weight + /model/neck/neck2/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/conv/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck2.upsample.weight + /model/neck/neck2/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/Concat_/model/neck/neck2/reduce_skip1/act/Relu_output_0_clone_1 copy +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_after_concat.conv.weight + /model/neck/neck2/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_after_concat/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv2.conv.weight + /model/neck/neck2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv1.conv.weight + /model/neck/neck2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 905) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 921) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 937) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv3.conv.weight + /model/neck/neck2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv3/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.bbox_stem.seq.conv.weight + /model/heads/head1/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/bbox_stem/seq/conv/Conv || model.heads.head1.pose_stem.seq.conv.weight + /model/heads/head1/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_stem/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.conv.conv.weight + /model/neck/neck3/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/conv/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_convs.0.seq.conv.weight + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head1.cls_convs.0.seq.conv.weight + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.0.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv2.conv.weight + /model/neck/neck3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv1.conv.weight + /model/neck/neck3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.cls_pred.weight + /model/heads/head1/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_pred/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_pred.weight + /model/heads/head1/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_pred/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.1.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_pred.weight + /model/heads/head1/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_pred/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 1002) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1043) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv3.conv.weight + /model/neck/neck3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv3/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_stem.seq.conv.weight + /model/heads/head2/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_stem/seq/conv/Conv || model.heads.head2.bbox_stem.seq.conv.weight + /model/heads/head2/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/bbox_stem/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.conv.conv.weight + /model/neck/neck4/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/conv/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_convs.0.seq.conv.weight + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head2.cls_convs.0.seq.conv.weight + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.0.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv2.conv.weight + /model/neck/neck4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv1.conv.weight + /model/neck/neck4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.cls_pred.weight + /model/heads/head2/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_pred/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_pred.weight + /model/heads/head2/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_pred/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.1.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_pred.weight + /model/heads/head2/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_pred/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 1108) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1149) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.2.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv1/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.2.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/cv2/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.2.alpha + (Unnamed Layer* 1165) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv3.conv.weight + /model/neck/neck4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv3/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.bbox_stem.seq.conv.weight + /model/heads/head3/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/bbox_stem/seq/conv/Conv || model.heads.head3.pose_stem.seq.conv.weight + /model/heads/head3/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_stem/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_convs.0.seq.conv.weight + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head3.cls_convs.0.seq.conv.weight + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.0.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.cls_pred.weight + /model/heads/head3/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_pred/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_pred.weight + /model/heads/head3/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_pred/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.1.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.2.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_pred.weight + /model/heads/head3/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_pred/Conv +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] NMS: batched_nms_250 +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 1278) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-11:17:28] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-11:17:30] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +489, now: CPU 1413, GPU 4098 (MiB) +[12/28/2023-11:17:30] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +78, now: CPU 1496, GPU 4176 (MiB) +[12/28/2023-11:17:30] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-11:28:56] [I] [TRT] Total Activation Memory: 8023950848 +[12/28/2023-11:28:56] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-11:28:59] [I] [TRT] Total Host Persistent Memory: 320224 +[12/28/2023-11:28:59] [I] [TRT] Total Device Persistent Memory: 38912 +[12/28/2023-11:28:59] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-11:28:59] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 188 MiB, GPU 132 MiB +[12/28/2023-11:28:59] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 173 steps to complete. +[12/28/2023-11:28:59] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 49.7039ms to assign 13 blocks to 173 nodes requiring 171802624 bytes. +[12/28/2023-11:28:59] [I] [TRT] Total Activation Memory: 171802624 +[12/28/2023-11:28:59] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1868, GPU 5567 (MiB) +[12/28/2023-11:29:00] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +37, GPU +64, now: CPU 37, GPU 64 (MiB) +[12/28/2023-11:29:00] [I] Engine built in 703.287 sec. +[12/28/2023-11:29:00] [I] [TRT] Loaded engine size: 39 MiB +[12/28/2023-11:29:00] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +1, now: CPU 1272, GPU 5375 (MiB) +[12/28/2023-11:29:00] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +37, now: CPU 0, GPU 37 (MiB) +[12/28/2023-11:29:00] [I] Engine deserialized in 0.100472 sec. +[12/28/2023-11:29:00] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1273, GPU 5375 (MiB) +[12/28/2023-11:29:00] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +164, now: CPU 0, GPU 201 (MiB) +[12/28/2023-11:29:00] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-11:29:00] [I] Using random values for input onnx::Cast_0 +[12/28/2023-11:29:00] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-11:29:00] [I] Using random values for output graph2_flat_predictions +[12/28/2023-11:29:00] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-11:29:00] [I] Starting inference +[12/28/2023-11:29:15] [I] Warmup completed 7 queries over 200 ms +[12/28/2023-11:29:15] [I] Timing trace has 582 queries over 15.0482 s +[12/28/2023-11:29:15] [I] +[12/28/2023-11:29:15] [I] === Trace details === +[12/28/2023-11:29:15] [I] Trace averages of 100 runs: +[12/28/2023-11:29:15] [I] Average on 100 runs - GPU latency: 25.7401 ms - Host latency: 25.8563 ms (enqueue 25.8089 ms) +[12/28/2023-11:29:15] [I] Average on 100 runs - GPU latency: 25.8754 ms - Host latency: 25.9928 ms (enqueue 25.9533 ms) +[12/28/2023-11:29:15] [I] Average on 100 runs - GPU latency: 25.7133 ms - Host latency: 25.8298 ms (enqueue 25.7757 ms) +[12/28/2023-11:29:15] [I] Average on 100 runs - GPU latency: 25.5464 ms - Host latency: 25.6609 ms (enqueue 25.6086 ms) +[12/28/2023-11:29:15] [I] Average on 100 runs - GPU latency: 25.5708 ms - Host latency: 25.6863 ms (enqueue 25.6337 ms) +[12/28/2023-11:29:15] [I] +[12/28/2023-11:29:15] [I] === Performance summary === +[12/28/2023-11:29:15] [I] Throughput: 38.6757 qps +[12/28/2023-11:29:15] [I] Latency: min = 23.6807 ms, max = 41.5288 ms, mean = 25.8206 ms, median = 25.4699 ms, percentile(90%) = 26.5234 ms, percentile(95%) = 28.7695 ms, percentile(99%) = 35.4653 ms +[12/28/2023-11:29:15] [I] Enqueue Time: min = 23.645 ms, max = 41.4478 ms, mean = 25.77 ms, median = 25.417 ms, percentile(90%) = 26.438 ms, percentile(95%) = 28.9404 ms, percentile(99%) = 36.1143 ms +[12/28/2023-11:29:15] [I] H2D Latency: min = 0.0810547 ms, max = 0.127686 ms, mean = 0.0964171 ms, median = 0.0966797 ms, percentile(90%) = 0.0996094 ms, percentile(95%) = 0.100098 ms, percentile(99%) = 0.107422 ms +[12/28/2023-11:29:15] [I] GPU Compute Time: min = 23.5708 ms, max = 41.3936 ms, mean = 25.7051 ms, median = 25.3489 ms, percentile(90%) = 26.3984 ms, percentile(95%) = 28.6699 ms, percentile(99%) = 35.3271 ms +[12/28/2023-11:29:15] [I] D2H Latency: min = 0.00292969 ms, max = 0.0625 ms, mean = 0.0190995 ms, median = 0.0175781 ms, percentile(90%) = 0.0273438 ms, percentile(95%) = 0.0292969 ms, percentile(99%) = 0.0380859 ms +[12/28/2023-11:29:15] [I] Total Host Walltime: 15.0482 s +[12/28/2023-11:29:15] [I] Total GPU Compute Time: 14.9604 s +[12/28/2023-11:29:15] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-11:29:15] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_m_int8.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_m_int8.onnx.int8.engine diff --git a/yolo_nas_pose_m_int8.onnx.usage.txt b/yolo_nas_pose_m_int8.onnx.usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..5af3aa92b38770246cd4f2e1a7dc60cb17542f71 --- /dev/null +++ b/yolo_nas_pose_m_int8.onnx.usage.txt @@ -0,0 +1,58 @@ + +Model exported successfully to yolo_nas_pose_m_int8.onnx +Model expects input image of shape [1, 3, 640, 640] +Input image dtype is torch.uint8 + +Exported model already contains preprocessing (normalization) step, so you don't need to do it manually. +Preprocessing steps to be applied to input image are: +Sequential( + (0): CastTensorTo(dtype=torch.float32) + (1): ChannelSelect(channels_indexes=tensor([2, 1, 0])) + (2): ApplyMeanStd(mean=[0.], scale=[255.]) +) + + +Exported model contains postprocessing (NMS) step with the following parameters: + num_pre_nms_predictions=1000 + max_predictions_per_image=10 + nms_threshold=0.5 + confidence_threshold=0.15 + output_predictions_format=flat + + +Exported model is in ONNX format and can be used with ONNXRuntime +To run inference with ONNXRuntime, please use the following code snippet: + + import onnxruntime + import numpy as np + session = onnxruntime.InferenceSession("yolo_nas_pose_m_int8.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + example_input_image = np.zeros((1, 3, 640, 640)).astype(np.uint8) + predictions = session.run(outputs, {inputs[0]: example_input_image}) + +Exported model can also be used with TensorRT +To run inference with TensorRT, please see TensorRT deployment documentation +You can benchmark the model using the following code snippet: + + trtexec --onnx=yolo_nas_pose_m_int8.onnx --int8 --avgRuns=100 --duration=15 + + +Exported model has predictions in flat format: + +# flat_predictions is a 2D array of [N,K] shape +# Each row represents (image_index, x_min, y_min, x_max, y_max, confidence, joints...) +# Please note all values are floats, so you have to convert them to integers if needed + +[flat_predictions] = predictions +pred_bboxes = flat_predictions[:, 1:5] +pred_scores = flat_predictions[:, 5] +pred_joints = flat_predictions[:, 6:].reshape((len(pred_bboxes), -1, 3)) +for i in range(len(pred_bboxes)): + confidence = pred_scores[i] + x_min, y_min, x_max, y_max = pred_bboxes[i] + print(f"Detected pose with confidence={{confidence}}, x_min={{x_min}}, y_min={{y_min}}, x_max={{x_max}}, y_max={{y_max}}") + for joint_index, (x, y, confidence) in enumerate(pred_joints[i]):") + print(f"Joint {{joint_index}} has coordinates x={{x}}, y={{y}}, confidence={{confidence}}") + diff --git a/yolo_nas_pose_n_fp16.onnx b/yolo_nas_pose_n_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e2458a49c27e53d7854686fd8933256743c3a64a --- /dev/null +++ b/yolo_nas_pose_n_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7780a9bdc0a3302f98f72cdec9c631f2d6d9c347c2dceefc314343235ac4a280 +size 13909326 diff --git a/yolo_nas_pose_n_fp16.onnx.best.engine b/yolo_nas_pose_n_fp16.onnx.best.engine new file mode 100644 index 0000000000000000000000000000000000000000..194212de9b23597373ab3a4b20ddfee6707d31e6 --- /dev/null +++ b/yolo_nas_pose_n_fp16.onnx.best.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bee52c1db884bf230f3eb0dfe487e3147d41e0bdf7e0dafa37a8369b4d6351b +size 10230265 diff --git a/yolo_nas_pose_n_fp16.onnx.best.engine.err b/yolo_nas_pose_n_fp16.onnx.best.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..3b189cefb9d9bab8cef57c39b7ef5ed9f71ba595 --- /dev/null +++ b/yolo_nas_pose_n_fp16.onnx.best.engine.err @@ -0,0 +1,37 @@ +[12/27/2023-19:36:15] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-19:36:15] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-19:36:16] [W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool. +[12/27/2023-20:03:19] [W] [TRT] Tactic Device request: 3144MB Available: 2247MB. Device memory is insufficient to use tactic. +[12/27/2023-20:03:20] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-20:03:20] [W] [TRT] Tactic Device request: 3144MB Available: 2248MB. Device memory is insufficient to use tactic. +[12/27/2023-20:03:20] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3144 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-20:03:20] [W] [TRT] Tactic Device request: 3144MB Available: 2248MB. Device memory is insufficient to use tactic. +[12/27/2023-20:03:20] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-20:03:21] [W] [TRT] Tactic Device request: 3140MB Available: 2248MB. Device memory is insufficient to use tactic. +[12/27/2023-20:03:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-20:03:21] [W] [TRT] Tactic Device request: 3140MB Available: 2248MB. Device memory is insufficient to use tactic. +[12/27/2023-20:03:21] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-20:23:39] [W] [TRT] Tactic Device request: 4364MB Available: 1778MB. Device memory is insufficient to use tactic. +[12/27/2023-20:23:39] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4364 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-20:23:39] [W] [TRT] Tactic Device request: 4364MB Available: 1778MB. Device memory is insufficient to use tactic. +[12/27/2023-20:23:39] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4364 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-20:23:39] [W] [TRT] Tactic Device request: 4364MB Available: 1777MB. Device memory is insufficient to use tactic. +[12/27/2023-20:23:39] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4364 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-20:23:41] [W] [TRT] Tactic Device request: 4363MB Available: 1778MB. Device memory is insufficient to use tactic. +[12/27/2023-20:23:41] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4363 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-20:23:41] [W] [TRT] Tactic Device request: 4363MB Available: 1777MB. Device memory is insufficient to use tactic. +[12/27/2023-20:23:41] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4363 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-20:52:21] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/27/2023-20:52:21] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/27/2023-20:52:21] [W] * GPU compute time is unstable, with coefficient of variance = 4.70893%. +[12/27/2023-20:52:21] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_n_fp16.onnx.best.engine.log b/yolo_nas_pose_n_fp16.onnx.best.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..0c42c3395f125fb83a9c073d1fbef1c6f8c16d02 --- /dev/null +++ b/yolo_nas_pose_n_fp16.onnx.best.engine.log @@ -0,0 +1,302 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp16.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp16.onnx.best.engine +[12/27/2023-19:36:12] [I] === Model Options === +[12/27/2023-19:36:12] [I] Format: ONNX +[12/27/2023-19:36:12] [I] Model: yolo_nas_pose_n_fp16.onnx +[12/27/2023-19:36:12] [I] Output: +[12/27/2023-19:36:12] [I] === Build Options === +[12/27/2023-19:36:12] [I] Max batch: explicit batch +[12/27/2023-19:36:12] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-19:36:12] [I] minTiming: 1 +[12/27/2023-19:36:12] [I] avgTiming: 8 +[12/27/2023-19:36:12] [I] Precision: FP32+FP16+INT8 +[12/27/2023-19:36:12] [I] LayerPrecisions: +[12/27/2023-19:36:12] [I] Calibration: Dynamic +[12/27/2023-19:36:12] [I] Refit: Disabled +[12/27/2023-19:36:12] [I] Sparsity: Disabled +[12/27/2023-19:36:12] [I] Safe mode: Disabled +[12/27/2023-19:36:12] [I] DirectIO mode: Disabled +[12/27/2023-19:36:12] [I] Restricted mode: Disabled +[12/27/2023-19:36:12] [I] Build only: Disabled +[12/27/2023-19:36:12] [I] Save engine: yolo_nas_pose_n_fp16.onnx.best.engine +[12/27/2023-19:36:12] [I] Load engine: +[12/27/2023-19:36:12] [I] Profiling verbosity: 0 +[12/27/2023-19:36:12] [I] Tactic sources: Using default tactic sources +[12/27/2023-19:36:12] [I] timingCacheMode: local +[12/27/2023-19:36:12] [I] timingCacheFile: +[12/27/2023-19:36:12] [I] Heuristic: Disabled +[12/27/2023-19:36:12] [I] Preview Features: Use default preview flags. +[12/27/2023-19:36:12] [I] Input(s)s format: fp32:CHW +[12/27/2023-19:36:12] [I] Output(s)s format: fp32:CHW +[12/27/2023-19:36:12] [I] Input build shapes: model +[12/27/2023-19:36:12] [I] Input calibration shapes: model +[12/27/2023-19:36:12] [I] === System Options === +[12/27/2023-19:36:12] [I] Device: 0 +[12/27/2023-19:36:12] [I] DLACore: +[12/27/2023-19:36:12] [I] Plugins: +[12/27/2023-19:36:12] [I] === Inference Options === +[12/27/2023-19:36:12] [I] Batch: Explicit +[12/27/2023-19:36:12] [I] Input inference shapes: model +[12/27/2023-19:36:12] [I] Iterations: 10 +[12/27/2023-19:36:12] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-19:36:12] [I] Sleep time: 0ms +[12/27/2023-19:36:12] [I] Idle time: 0ms +[12/27/2023-19:36:12] [I] Streams: 1 +[12/27/2023-19:36:12] [I] ExposeDMA: Disabled +[12/27/2023-19:36:12] [I] Data transfers: Enabled +[12/27/2023-19:36:12] [I] Spin-wait: Disabled +[12/27/2023-19:36:12] [I] Multithreading: Disabled +[12/27/2023-19:36:12] [I] CUDA Graph: Disabled +[12/27/2023-19:36:12] [I] Separate profiling: Disabled +[12/27/2023-19:36:12] [I] Time Deserialize: Disabled +[12/27/2023-19:36:12] [I] Time Refit: Disabled +[12/27/2023-19:36:12] [I] NVTX verbosity: 0 +[12/27/2023-19:36:12] [I] Persistent Cache Ratio: 0 +[12/27/2023-19:36:12] [I] Inputs: +[12/27/2023-19:36:12] [I] === Reporting Options === +[12/27/2023-19:36:12] [I] Verbose: Disabled +[12/27/2023-19:36:12] [I] Averages: 100 inferences +[12/27/2023-19:36:12] [I] Percentiles: 90,95,99 +[12/27/2023-19:36:12] [I] Dump refittable layers:Disabled +[12/27/2023-19:36:12] [I] Dump output: Disabled +[12/27/2023-19:36:12] [I] Profile: Disabled +[12/27/2023-19:36:12] [I] Export timing to JSON file: +[12/27/2023-19:36:12] [I] Export output to JSON file: +[12/27/2023-19:36:12] [I] Export profile to JSON file: +[12/27/2023-19:36:12] [I] +[12/27/2023-19:36:12] [I] === Device Information === +[12/27/2023-19:36:12] [I] Selected Device: Orin +[12/27/2023-19:36:12] [I] Compute Capability: 8.7 +[12/27/2023-19:36:12] [I] SMs: 8 +[12/27/2023-19:36:12] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-19:36:12] [I] Device Global Memory: 7471 MiB +[12/27/2023-19:36:12] [I] Shared Memory per SM: 164 KiB +[12/27/2023-19:36:12] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-19:36:12] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-19:36:12] [I] +[12/27/2023-19:36:12] [I] TensorRT version: 8.5.2 +[12/27/2023-19:36:13] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3116 (MiB) +[12/27/2023-19:36:15] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +433, now: CPU 574, GPU 3567 (MiB) +[12/27/2023-19:36:15] [I] Start parsing network model +[12/27/2023-19:36:15] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-19:36:15] [I] [TRT] Input filename: yolo_nas_pose_n_fp16.onnx +[12/27/2023-19:36:15] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-19:36:15] [I] [TRT] Opset version: 17 +[12/27/2023-19:36:15] [I] [TRT] Producer name: pytorch +[12/27/2023-19:36:15] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-19:36:15] [I] [TRT] Domain: +[12/27/2023-19:36:15] [I] [TRT] Model version: 0 +[12/27/2023-19:36:15] [I] [TRT] Doc string: +[12/27/2023-19:36:15] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-19:36:16] [I] Finish parsing network model +[12/27/2023-19:36:16] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/27/2023-19:36:16] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 386) [Constant] +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 387) [Constant] +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 388) [Constant] +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 126) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 163) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 192) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 231) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 254) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu || /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 285) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 308) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1...cast_boxes_to_fp32]} +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 390) [NMS]_1_output[DevicetoShapeHostCopy] +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/27/2023-19:36:16] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/27/2023-19:36:17] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +781, now: CPU 1126, GPU 4376 (MiB) +[12/27/2023-19:36:18] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +100, now: CPU 1209, GPU 4476 (MiB) +[12/27/2023-19:36:18] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/27/2023-20:51:48] [I] [TRT] Total Activation Memory: 7893097472 +[12/27/2023-20:51:48] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/27/2023-20:51:58] [I] [TRT] Total Host Persistent Memory: 279584 +[12/27/2023-20:51:58] [I] [TRT] Total Device Persistent Memory: 77824 +[12/27/2023-20:51:58] [I] [TRT] Total Scratch Memory: 134217728 +[12/27/2023-20:51:58] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 15 MiB, GPU 2112 MiB +[12/27/2023-20:51:58] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 170 steps to complete. +[12/27/2023-20:51:58] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 81.1442ms to assign 16 blocks to 170 nodes requiring 138889216 bytes. +[12/27/2023-20:51:58] [I] [TRT] Total Activation Memory: 138889216 +[12/27/2023-20:52:04] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1567, GPU 5340 (MiB) +[12/27/2023-20:52:04] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +7, GPU +8, now: CPU 7, GPU 8 (MiB) +[12/27/2023-20:52:05] [I] Engine built in 4552.34 sec. +[12/27/2023-20:52:05] [I] [TRT] Loaded engine size: 9 MiB +[12/27/2023-20:52:06] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1254, GPU 5338 (MiB) +[12/27/2023-20:52:06] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +7, now: CPU 0, GPU 7 (MiB) +[12/27/2023-20:52:06] [I] Engine deserialized in 0.170457 sec. +[12/27/2023-20:52:06] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1255, GPU 5338 (MiB) +[12/27/2023-20:52:06] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +133, now: CPU 0, GPU 140 (MiB) +[12/27/2023-20:52:06] [I] Setting persistentCacheLimit to 0 bytes. +[12/27/2023-20:52:06] [I] Using random values for input onnx::Cast_0 +[12/27/2023-20:52:06] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/27/2023-20:52:06] [I] Using random values for output graph2_flat_predictions +[12/27/2023-20:52:06] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/27/2023-20:52:06] [I] Starting inference +[12/27/2023-20:52:21] [I] Warmup completed 3 queries over 200 ms +[12/27/2023-20:52:21] [I] Timing trace has 1379 queries over 15.0171 s +[12/27/2023-20:52:21] [I] +[12/27/2023-20:52:21] [I] === Trace details === +[12/27/2023-20:52:21] [I] Trace averages of 100 runs: +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 10.9387 ms - Host latency: 11.0544 ms (enqueue 11.02 ms) +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 10.9995 ms - Host latency: 11.1128 ms (enqueue 11.0677 ms) +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 10.6063 ms - Host latency: 10.7177 ms (enqueue 10.6834 ms) +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 10.6551 ms - Host latency: 10.7654 ms (enqueue 10.7318 ms) +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 10.5462 ms - Host latency: 10.656 ms (enqueue 10.6246 ms) +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 10.596 ms - Host latency: 10.706 ms (enqueue 10.674 ms) +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 10.851 ms - Host latency: 10.9629 ms (enqueue 10.9238 ms) +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 10.8631 ms - Host latency: 10.9749 ms (enqueue 10.9341 ms) +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 10.5881 ms - Host latency: 10.6987 ms (enqueue 10.6662 ms) +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 10.527 ms - Host latency: 10.6373 ms (enqueue 10.6047 ms) +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 11.0507 ms - Host latency: 11.1651 ms (enqueue 11.1209 ms) +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 10.9825 ms - Host latency: 11.0964 ms (enqueue 11.0537 ms) +[12/27/2023-20:52:21] [I] Average on 100 runs - GPU latency: 10.6221 ms - Host latency: 10.7322 ms (enqueue 10.6994 ms) +[12/27/2023-20:52:21] [I] +[12/27/2023-20:52:21] [I] === Performance summary === +[12/27/2023-20:52:21] [I] Throughput: 91.8287 qps +[12/27/2023-20:52:21] [I] Latency: min = 10.1069 ms, max = 17.8303 ms, mean = 10.8595 ms, median = 10.8018 ms, percentile(90%) = 11.2207 ms, percentile(95%) = 11.2979 ms, percentile(99%) = 12.5625 ms +[12/27/2023-20:52:21] [I] Enqueue Time: min = 9.85022 ms, max = 17.7838 ms, mean = 10.8228 ms, median = 10.7627 ms, percentile(90%) = 11.1758 ms, percentile(95%) = 11.2676 ms, percentile(99%) = 12.5146 ms +[12/27/2023-20:52:21] [I] H2D Latency: min = 0.0800781 ms, max = 0.196732 ms, mean = 0.0974317 ms, median = 0.0983887 ms, percentile(90%) = 0.100098 ms, percentile(95%) = 0.100586 ms, percentile(99%) = 0.116211 ms +[12/27/2023-20:52:21] [I] GPU Compute Time: min = 9.99805 ms, max = 17.6114 ms, mean = 10.7478 ms, median = 10.6943 ms, percentile(90%) = 11.1064 ms, percentile(95%) = 11.189 ms, percentile(99%) = 12.4521 ms +[12/27/2023-20:52:21] [I] D2H Latency: min = 0.00292969 ms, max = 0.0400391 ms, mean = 0.014299 ms, median = 0.0124512 ms, percentile(90%) = 0.0205078 ms, percentile(95%) = 0.0219727 ms, percentile(99%) = 0.0283203 ms +[12/27/2023-20:52:21] [I] Total Host Walltime: 15.0171 s +[12/27/2023-20:52:21] [I] Total GPU Compute Time: 14.8211 s +[12/27/2023-20:52:21] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/27/2023-20:52:21] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp16.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp16.onnx.best.engine diff --git a/yolo_nas_pose_n_fp16.onnx.engine.err b/yolo_nas_pose_n_fp16.onnx.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..766b55f615df9cab4267250343b54841422d290e --- /dev/null +++ b/yolo_nas_pose_n_fp16.onnx.engine.err @@ -0,0 +1,8 @@ +[12/27/2023-18:53:43] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-18:53:43] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-18:53:44] [E] Error[4]: [network.cpp::validate::2925] Error Code 4: Internal Error (fp16 precision has been set for a layer or layer output, but fp16 is not configured in the builder) +[12/27/2023-18:53:44] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/27/2023-18:53:44] [E] Engine could not be created from network +[12/27/2023-18:53:44] [E] Building engine failed +[12/27/2023-18:53:44] [E] Failed to create engine from model or file. +[12/27/2023-18:53:44] [E] Engine set up failed diff --git a/yolo_nas_pose_n_fp16.onnx.engine.log b/yolo_nas_pose_n_fp16.onnx.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..e1c008d0efc415bb0ae4fdc5505a90a4bccdf613 --- /dev/null +++ b/yolo_nas_pose_n_fp16.onnx.engine.log @@ -0,0 +1,91 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp16.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp16.onnx.engine +[12/27/2023-18:53:37] [I] === Model Options === +[12/27/2023-18:53:37] [I] Format: ONNX +[12/27/2023-18:53:37] [I] Model: yolo_nas_pose_n_fp16.onnx +[12/27/2023-18:53:37] [I] Output: +[12/27/2023-18:53:37] [I] === Build Options === +[12/27/2023-18:53:37] [I] Max batch: explicit batch +[12/27/2023-18:53:37] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-18:53:37] [I] minTiming: 1 +[12/27/2023-18:53:37] [I] avgTiming: 8 +[12/27/2023-18:53:37] [I] Precision: FP32 +[12/27/2023-18:53:37] [I] LayerPrecisions: +[12/27/2023-18:53:37] [I] Calibration: +[12/27/2023-18:53:37] [I] Refit: Disabled +[12/27/2023-18:53:37] [I] Sparsity: Disabled +[12/27/2023-18:53:37] [I] Safe mode: Disabled +[12/27/2023-18:53:37] [I] DirectIO mode: Disabled +[12/27/2023-18:53:37] [I] Restricted mode: Disabled +[12/27/2023-18:53:37] [I] Build only: Disabled +[12/27/2023-18:53:37] [I] Save engine: yolo_nas_pose_n_fp16.onnx.engine +[12/27/2023-18:53:37] [I] Load engine: +[12/27/2023-18:53:37] [I] Profiling verbosity: 0 +[12/27/2023-18:53:37] [I] Tactic sources: Using default tactic sources +[12/27/2023-18:53:37] [I] timingCacheMode: local +[12/27/2023-18:53:37] [I] timingCacheFile: +[12/27/2023-18:53:37] [I] Heuristic: Disabled +[12/27/2023-18:53:37] [I] Preview Features: Use default preview flags. +[12/27/2023-18:53:37] [I] Input(s)s format: fp32:CHW +[12/27/2023-18:53:37] [I] Output(s)s format: fp32:CHW +[12/27/2023-18:53:37] [I] Input build shapes: model +[12/27/2023-18:53:37] [I] Input calibration shapes: model +[12/27/2023-18:53:37] [I] === System Options === +[12/27/2023-18:53:37] [I] Device: 0 +[12/27/2023-18:53:37] [I] DLACore: +[12/27/2023-18:53:37] [I] Plugins: +[12/27/2023-18:53:37] [I] === Inference Options === +[12/27/2023-18:53:37] [I] Batch: Explicit +[12/27/2023-18:53:37] [I] Input inference shapes: model +[12/27/2023-18:53:37] [I] Iterations: 10 +[12/27/2023-18:53:37] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-18:53:37] [I] Sleep time: 0ms +[12/27/2023-18:53:37] [I] Idle time: 0ms +[12/27/2023-18:53:37] [I] Streams: 1 +[12/27/2023-18:53:37] [I] ExposeDMA: Disabled +[12/27/2023-18:53:37] [I] Data transfers: Enabled +[12/27/2023-18:53:37] [I] Spin-wait: Disabled +[12/27/2023-18:53:37] [I] Multithreading: Disabled +[12/27/2023-18:53:37] [I] CUDA Graph: Disabled +[12/27/2023-18:53:37] [I] Separate profiling: Disabled +[12/27/2023-18:53:37] [I] Time Deserialize: Disabled +[12/27/2023-18:53:37] [I] Time Refit: Disabled +[12/27/2023-18:53:37] [I] NVTX verbosity: 0 +[12/27/2023-18:53:37] [I] Persistent Cache Ratio: 0 +[12/27/2023-18:53:37] [I] Inputs: +[12/27/2023-18:53:37] [I] === Reporting Options === +[12/27/2023-18:53:37] [I] Verbose: Disabled +[12/27/2023-18:53:37] [I] Averages: 100 inferences +[12/27/2023-18:53:37] [I] Percentiles: 90,95,99 +[12/27/2023-18:53:37] [I] Dump refittable layers:Disabled +[12/27/2023-18:53:37] [I] Dump output: Disabled +[12/27/2023-18:53:37] [I] Profile: Disabled +[12/27/2023-18:53:37] [I] Export timing to JSON file: +[12/27/2023-18:53:37] [I] Export output to JSON file: +[12/27/2023-18:53:37] [I] Export profile to JSON file: +[12/27/2023-18:53:37] [I] +[12/27/2023-18:53:37] [I] === Device Information === +[12/27/2023-18:53:37] [I] Selected Device: Orin +[12/27/2023-18:53:37] [I] Compute Capability: 8.7 +[12/27/2023-18:53:37] [I] SMs: 8 +[12/27/2023-18:53:37] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-18:53:37] [I] Device Global Memory: 7471 MiB +[12/27/2023-18:53:37] [I] Shared Memory per SM: 164 KiB +[12/27/2023-18:53:37] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-18:53:37] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-18:53:37] [I] +[12/27/2023-18:53:37] [I] TensorRT version: 8.5.2 +[12/27/2023-18:53:39] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3069 (MiB) +[12/27/2023-18:53:43] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +285, now: CPU 574, GPU 3377 (MiB) +[12/27/2023-18:53:43] [I] Start parsing network model +[12/27/2023-18:53:43] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-18:53:43] [I] [TRT] Input filename: yolo_nas_pose_n_fp16.onnx +[12/27/2023-18:53:43] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-18:53:43] [I] [TRT] Opset version: 17 +[12/27/2023-18:53:43] [I] [TRT] Producer name: pytorch +[12/27/2023-18:53:43] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-18:53:43] [I] [TRT] Domain: +[12/27/2023-18:53:43] [I] [TRT] Model version: 0 +[12/27/2023-18:53:43] [I] [TRT] Doc string: +[12/27/2023-18:53:43] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-18:53:44] [I] Finish parsing network model +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp16.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp16.onnx.engine diff --git a/yolo_nas_pose_n_fp16.onnx.fp16.engine b/yolo_nas_pose_n_fp16.onnx.fp16.engine new file mode 100644 index 0000000000000000000000000000000000000000..68c043c01ea2efc53acf8265a6a3485acbc97dd4 --- /dev/null +++ b/yolo_nas_pose_n_fp16.onnx.fp16.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb1e2dec74e457f1ed48bf7ce0152f17ae128559ddc22b5fc51c6338d3c2a4a +size 15593125 diff --git a/yolo_nas_pose_n_fp16.onnx.fp16.engine.err b/yolo_nas_pose_n_fp16.onnx.fp16.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..31aade7b3773f64b3015014387a62159fa388d74 --- /dev/null +++ b/yolo_nas_pose_n_fp16.onnx.fp16.engine.err @@ -0,0 +1,36 @@ +[12/27/2023-18:53:47] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-18:53:47] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-19:05:47] [W] [TRT] Tactic Device request: 3144MB Available: 2364MB. Device memory is insufficient to use tactic. +[12/27/2023-19:05:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-19:05:47] [W] [TRT] Tactic Device request: 3144MB Available: 2364MB. Device memory is insufficient to use tactic. +[12/27/2023-19:05:47] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3144 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-19:05:47] [W] [TRT] Tactic Device request: 3144MB Available: 2364MB. Device memory is insufficient to use tactic. +[12/27/2023-19:05:47] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-19:05:48] [W] [TRT] Tactic Device request: 3140MB Available: 2364MB. Device memory is insufficient to use tactic. +[12/27/2023-19:05:48] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-19:05:48] [W] [TRT] Tactic Device request: 3140MB Available: 2364MB. Device memory is insufficient to use tactic. +[12/27/2023-19:05:48] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-19:17:59] [W] [TRT] Tactic Device request: 4364MB Available: 2035MB. Device memory is insufficient to use tactic. +[12/27/2023-19:17:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4364 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-19:18:00] [W] [TRT] Tactic Device request: 4364MB Available: 2035MB. Device memory is insufficient to use tactic. +[12/27/2023-19:18:00] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4364 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-19:18:00] [W] [TRT] Tactic Device request: 4364MB Available: 2035MB. Device memory is insufficient to use tactic. +[12/27/2023-19:18:00] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4364 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-19:18:01] [W] [TRT] Tactic Device request: 4363MB Available: 2034MB. Device memory is insufficient to use tactic. +[12/27/2023-19:18:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4363 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-19:18:01] [W] [TRT] Tactic Device request: 4363MB Available: 2034MB. Device memory is insufficient to use tactic. +[12/27/2023-19:18:01] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4363 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-19:36:11] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/27/2023-19:36:11] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/27/2023-19:36:11] [W] * GPU compute time is unstable, with coefficient of variance = 4.16463%. +[12/27/2023-19:36:11] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_n_fp16.onnx.fp16.engine.log b/yolo_nas_pose_n_fp16.onnx.fp16.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..5c4971ad6e398a992b3107b7bc8d759d953e92d8 --- /dev/null +++ b/yolo_nas_pose_n_fp16.onnx.fp16.engine.log @@ -0,0 +1,301 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp16.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp16.onnx.fp16.engine +[12/27/2023-18:53:44] [I] === Model Options === +[12/27/2023-18:53:44] [I] Format: ONNX +[12/27/2023-18:53:44] [I] Model: yolo_nas_pose_n_fp16.onnx +[12/27/2023-18:53:44] [I] Output: +[12/27/2023-18:53:44] [I] === Build Options === +[12/27/2023-18:53:44] [I] Max batch: explicit batch +[12/27/2023-18:53:44] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-18:53:44] [I] minTiming: 1 +[12/27/2023-18:53:44] [I] avgTiming: 8 +[12/27/2023-18:53:44] [I] Precision: FP32+FP16 +[12/27/2023-18:53:44] [I] LayerPrecisions: +[12/27/2023-18:53:44] [I] Calibration: +[12/27/2023-18:53:44] [I] Refit: Disabled +[12/27/2023-18:53:44] [I] Sparsity: Disabled +[12/27/2023-18:53:44] [I] Safe mode: Disabled +[12/27/2023-18:53:44] [I] DirectIO mode: Disabled +[12/27/2023-18:53:44] [I] Restricted mode: Disabled +[12/27/2023-18:53:44] [I] Build only: Disabled +[12/27/2023-18:53:44] [I] Save engine: yolo_nas_pose_n_fp16.onnx.fp16.engine +[12/27/2023-18:53:44] [I] Load engine: +[12/27/2023-18:53:44] [I] Profiling verbosity: 0 +[12/27/2023-18:53:44] [I] Tactic sources: Using default tactic sources +[12/27/2023-18:53:44] [I] timingCacheMode: local +[12/27/2023-18:53:44] [I] timingCacheFile: +[12/27/2023-18:53:44] [I] Heuristic: Disabled +[12/27/2023-18:53:44] [I] Preview Features: Use default preview flags. +[12/27/2023-18:53:44] [I] Input(s)s format: fp32:CHW +[12/27/2023-18:53:44] [I] Output(s)s format: fp32:CHW +[12/27/2023-18:53:44] [I] Input build shapes: model +[12/27/2023-18:53:44] [I] Input calibration shapes: model +[12/27/2023-18:53:44] [I] === System Options === +[12/27/2023-18:53:44] [I] Device: 0 +[12/27/2023-18:53:44] [I] DLACore: +[12/27/2023-18:53:44] [I] Plugins: +[12/27/2023-18:53:44] [I] === Inference Options === +[12/27/2023-18:53:44] [I] Batch: Explicit +[12/27/2023-18:53:44] [I] Input inference shapes: model +[12/27/2023-18:53:44] [I] Iterations: 10 +[12/27/2023-18:53:44] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-18:53:44] [I] Sleep time: 0ms +[12/27/2023-18:53:44] [I] Idle time: 0ms +[12/27/2023-18:53:44] [I] Streams: 1 +[12/27/2023-18:53:44] [I] ExposeDMA: Disabled +[12/27/2023-18:53:44] [I] Data transfers: Enabled +[12/27/2023-18:53:44] [I] Spin-wait: Disabled +[12/27/2023-18:53:44] [I] Multithreading: Disabled +[12/27/2023-18:53:44] [I] CUDA Graph: Disabled +[12/27/2023-18:53:44] [I] Separate profiling: Disabled +[12/27/2023-18:53:44] [I] Time Deserialize: Disabled +[12/27/2023-18:53:44] [I] Time Refit: Disabled +[12/27/2023-18:53:44] [I] NVTX verbosity: 0 +[12/27/2023-18:53:44] [I] Persistent Cache Ratio: 0 +[12/27/2023-18:53:44] [I] Inputs: +[12/27/2023-18:53:44] [I] === Reporting Options === +[12/27/2023-18:53:44] [I] Verbose: Disabled +[12/27/2023-18:53:44] [I] Averages: 100 inferences +[12/27/2023-18:53:44] [I] Percentiles: 90,95,99 +[12/27/2023-18:53:44] [I] Dump refittable layers:Disabled +[12/27/2023-18:53:44] [I] Dump output: Disabled +[12/27/2023-18:53:44] [I] Profile: Disabled +[12/27/2023-18:53:44] [I] Export timing to JSON file: +[12/27/2023-18:53:44] [I] Export output to JSON file: +[12/27/2023-18:53:44] [I] Export profile to JSON file: +[12/27/2023-18:53:44] [I] +[12/27/2023-18:53:44] [I] === Device Information === +[12/27/2023-18:53:44] [I] Selected Device: Orin +[12/27/2023-18:53:44] [I] Compute Capability: 8.7 +[12/27/2023-18:53:44] [I] SMs: 8 +[12/27/2023-18:53:44] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-18:53:44] [I] Device Global Memory: 7471 MiB +[12/27/2023-18:53:44] [I] Shared Memory per SM: 164 KiB +[12/27/2023-18:53:44] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-18:53:44] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-18:53:44] [I] +[12/27/2023-18:53:44] [I] TensorRT version: 8.5.2 +[12/27/2023-18:53:45] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3069 (MiB) +[12/27/2023-18:53:47] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +285, now: CPU 574, GPU 3373 (MiB) +[12/27/2023-18:53:47] [I] Start parsing network model +[12/27/2023-18:53:47] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-18:53:47] [I] [TRT] Input filename: yolo_nas_pose_n_fp16.onnx +[12/27/2023-18:53:47] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-18:53:47] [I] [TRT] Opset version: 17 +[12/27/2023-18:53:47] [I] [TRT] Producer name: pytorch +[12/27/2023-18:53:47] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-18:53:47] [I] [TRT] Domain: +[12/27/2023-18:53:47] [I] [TRT] Model version: 0 +[12/27/2023-18:53:47] [I] [TRT] Doc string: +[12/27/2023-18:53:47] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-18:53:48] [I] Finish parsing network model +[12/27/2023-18:53:48] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/27/2023-18:53:48] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 386) [Constant] +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 387) [Constant] +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 388) [Constant] +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 126) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 163) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 192) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 231) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 254) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu || /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 285) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 308) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1...cast_boxes_to_fp32]} +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 390) [NMS]_1_output[DevicetoShapeHostCopy] +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/27/2023-18:53:48] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/27/2023-18:53:50] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +498, now: CPU 1126, GPU 3901 (MiB) +[12/27/2023-18:53:51] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +71, now: CPU 1209, GPU 3972 (MiB) +[12/27/2023-18:53:51] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/27/2023-19:35:43] [I] [TRT] Total Activation Memory: 7920491520 +[12/27/2023-19:35:43] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/27/2023-19:35:51] [I] [TRT] Total Host Persistent Memory: 290432 +[12/27/2023-19:35:51] [I] [TRT] Total Device Persistent Memory: 195584 +[12/27/2023-19:35:51] [I] [TRT] Total Scratch Memory: 134217728 +[12/27/2023-19:35:51] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 15 MiB, GPU 2112 MiB +[12/27/2023-19:35:51] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 155 steps to complete. +[12/27/2023-19:35:51] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 62.8554ms to assign 16 blocks to 155 nodes requiring 142754304 bytes. +[12/27/2023-19:35:51] [I] [TRT] Total Activation Memory: 142754304 +[12/27/2023-19:35:55] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1555, GPU 5631 (MiB) +[12/27/2023-19:35:55] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +2, GPU +16, now: CPU 2, GPU 16 (MiB) +[12/27/2023-19:35:55] [I] Engine built in 2530.59 sec. +[12/27/2023-19:35:55] [I] [TRT] Loaded engine size: 14 MiB +[12/27/2023-19:35:55] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1256, GPU 5473 (MiB) +[12/27/2023-19:35:55] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +13, now: CPU 0, GPU 13 (MiB) +[12/27/2023-19:35:55] [I] Engine deserialized in 0.103896 sec. +[12/27/2023-19:35:55] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1256, GPU 5474 (MiB) +[12/27/2023-19:35:55] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +136, now: CPU 0, GPU 149 (MiB) +[12/27/2023-19:35:55] [I] Setting persistentCacheLimit to 0 bytes. +[12/27/2023-19:35:55] [I] Using random values for input onnx::Cast_0 +[12/27/2023-19:35:55] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/27/2023-19:35:55] [I] Using random values for output graph2_flat_predictions +[12/27/2023-19:35:55] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/27/2023-19:35:55] [I] Starting inference +[12/27/2023-19:36:11] [I] Warmup completed 15 queries over 200 ms +[12/27/2023-19:36:11] [I] Timing trace has 1283 queries over 15.0201 s +[12/27/2023-19:36:11] [I] +[12/27/2023-19:36:11] [I] === Trace details === +[12/27/2023-19:36:11] [I] Trace averages of 100 runs: +[12/27/2023-19:36:11] [I] Average on 100 runs - GPU latency: 11.8369 ms - Host latency: 11.9569 ms (enqueue 11.8961 ms) +[12/27/2023-19:36:11] [I] Average on 100 runs - GPU latency: 11.701 ms - Host latency: 11.8229 ms (enqueue 11.7623 ms) +[12/27/2023-19:36:11] [I] Average on 100 runs - GPU latency: 11.6916 ms - Host latency: 11.8117 ms (enqueue 11.7561 ms) +[12/27/2023-19:36:11] [I] Average on 100 runs - GPU latency: 11.7696 ms - Host latency: 11.8903 ms (enqueue 11.8302 ms) +[12/27/2023-19:36:11] [I] Average on 100 runs - GPU latency: 11.7069 ms - Host latency: 11.8278 ms (enqueue 11.768 ms) +[12/27/2023-19:36:11] [I] Average on 100 runs - GPU latency: 11.6259 ms - Host latency: 11.7442 ms (enqueue 11.6929 ms) +[12/27/2023-19:36:11] [I] Average on 100 runs - GPU latency: 11.3222 ms - Host latency: 11.4363 ms (enqueue 11.4064 ms) +[12/27/2023-19:36:11] [I] Average on 100 runs - GPU latency: 11.4092 ms - Host latency: 11.5222 ms (enqueue 11.4813 ms) +[12/27/2023-19:36:11] [I] Average on 100 runs - GPU latency: 11.3321 ms - Host latency: 11.4449 ms (enqueue 11.4097 ms) +[12/27/2023-19:36:11] [I] Average on 100 runs - GPU latency: 11.3917 ms - Host latency: 11.5057 ms (enqueue 11.468 ms) +[12/27/2023-19:36:11] [I] Average on 100 runs - GPU latency: 11.3585 ms - Host latency: 11.4722 ms (enqueue 11.4384 ms) +[12/27/2023-19:36:11] [I] Average on 100 runs - GPU latency: 11.3499 ms - Host latency: 11.4655 ms (enqueue 11.4306 ms) +[12/27/2023-19:36:11] [I] +[12/27/2023-19:36:11] [I] === Performance summary === +[12/27/2023-19:36:11] [I] Throughput: 85.4187 qps +[12/27/2023-19:36:11] [I] Latency: min = 10.8345 ms, max = 19.9363 ms, mean = 11.67 ms, median = 11.6387 ms, percentile(90%) = 11.9463 ms, percentile(95%) = 12.1201 ms, percentile(99%) = 12.8525 ms +[12/27/2023-19:36:11] [I] Enqueue Time: min = 10.8066 ms, max = 18.6077 ms, mean = 11.623 ms, median = 11.585 ms, percentile(90%) = 11.8955 ms, percentile(95%) = 12.0723 ms, percentile(99%) = 12.5557 ms +[12/27/2023-19:36:11] [I] H2D Latency: min = 0.0810547 ms, max = 0.126953 ms, mean = 0.0986419 ms, median = 0.0986328 ms, percentile(90%) = 0.101074 ms, percentile(95%) = 0.101562 ms, percentile(99%) = 0.108521 ms +[12/27/2023-19:36:11] [I] GPU Compute Time: min = 10.7319 ms, max = 19.8337 ms, mean = 11.5526 ms, median = 11.5195 ms, percentile(90%) = 11.8228 ms, percentile(95%) = 12.0022 ms, percentile(99%) = 12.75 ms +[12/27/2023-19:36:11] [I] D2H Latency: min = 0.00292969 ms, max = 0.12793 ms, mean = 0.0187801 ms, median = 0.020752 ms, percentile(90%) = 0.0253906 ms, percentile(95%) = 0.0263672 ms, percentile(99%) = 0.0419922 ms +[12/27/2023-19:36:11] [I] Total Host Walltime: 15.0201 s +[12/27/2023-19:36:11] [I] Total GPU Compute Time: 14.8219 s +[12/27/2023-19:36:11] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/27/2023-19:36:11] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp16.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp16.onnx.fp16.engine diff --git a/yolo_nas_pose_n_fp16.onnx.int8.engine.err b/yolo_nas_pose_n_fp16.onnx.int8.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..c99cdf02f75af54159ad0c3c8eda370a111e434f --- /dev/null +++ b/yolo_nas_pose_n_fp16.onnx.int8.engine.err @@ -0,0 +1,8 @@ +[12/27/2023-20:52:32] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-20:52:32] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-20:52:32] [E] Error[4]: [network.cpp::validate::2925] Error Code 4: Internal Error (fp16 precision has been set for a layer or layer output, but fp16 is not configured in the builder) +[12/27/2023-20:52:32] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/27/2023-20:52:32] [E] Engine could not be created from network +[12/27/2023-20:52:32] [E] Building engine failed +[12/27/2023-20:52:32] [E] Failed to create engine from model or file. +[12/27/2023-20:52:32] [E] Engine set up failed diff --git a/yolo_nas_pose_n_fp16.onnx.int8.engine.log b/yolo_nas_pose_n_fp16.onnx.int8.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..3ccf9f693f53d0bb3fefcf985284752acd2770ec --- /dev/null +++ b/yolo_nas_pose_n_fp16.onnx.int8.engine.log @@ -0,0 +1,92 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp16.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp16.onnx.int8.engine +[12/27/2023-20:52:25] [I] === Model Options === +[12/27/2023-20:52:25] [I] Format: ONNX +[12/27/2023-20:52:25] [I] Model: yolo_nas_pose_n_fp16.onnx +[12/27/2023-20:52:25] [I] Output: +[12/27/2023-20:52:25] [I] === Build Options === +[12/27/2023-20:52:25] [I] Max batch: explicit batch +[12/27/2023-20:52:25] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-20:52:25] [I] minTiming: 1 +[12/27/2023-20:52:25] [I] avgTiming: 8 +[12/27/2023-20:52:25] [I] Precision: FP32+INT8 +[12/27/2023-20:52:25] [I] LayerPrecisions: +[12/27/2023-20:52:25] [I] Calibration: Dynamic +[12/27/2023-20:52:25] [I] Refit: Disabled +[12/27/2023-20:52:25] [I] Sparsity: Disabled +[12/27/2023-20:52:25] [I] Safe mode: Disabled +[12/27/2023-20:52:25] [I] DirectIO mode: Disabled +[12/27/2023-20:52:25] [I] Restricted mode: Disabled +[12/27/2023-20:52:25] [I] Build only: Disabled +[12/27/2023-20:52:25] [I] Save engine: yolo_nas_pose_n_fp16.onnx.int8.engine +[12/27/2023-20:52:25] [I] Load engine: +[12/27/2023-20:52:25] [I] Profiling verbosity: 0 +[12/27/2023-20:52:25] [I] Tactic sources: Using default tactic sources +[12/27/2023-20:52:25] [I] timingCacheMode: local +[12/27/2023-20:52:25] [I] timingCacheFile: +[12/27/2023-20:52:25] [I] Heuristic: Disabled +[12/27/2023-20:52:25] [I] Preview Features: Use default preview flags. +[12/27/2023-20:52:25] [I] Input(s)s format: fp32:CHW +[12/27/2023-20:52:25] [I] Output(s)s format: fp32:CHW +[12/27/2023-20:52:25] [I] Input build shapes: model +[12/27/2023-20:52:25] [I] Input calibration shapes: model +[12/27/2023-20:52:25] [I] === System Options === +[12/27/2023-20:52:25] [I] Device: 0 +[12/27/2023-20:52:25] [I] DLACore: +[12/27/2023-20:52:25] [I] Plugins: +[12/27/2023-20:52:25] [I] === Inference Options === +[12/27/2023-20:52:25] [I] Batch: Explicit +[12/27/2023-20:52:25] [I] Input inference shapes: model +[12/27/2023-20:52:25] [I] Iterations: 10 +[12/27/2023-20:52:25] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-20:52:25] [I] Sleep time: 0ms +[12/27/2023-20:52:25] [I] Idle time: 0ms +[12/27/2023-20:52:25] [I] Streams: 1 +[12/27/2023-20:52:25] [I] ExposeDMA: Disabled +[12/27/2023-20:52:25] [I] Data transfers: Enabled +[12/27/2023-20:52:25] [I] Spin-wait: Disabled +[12/27/2023-20:52:25] [I] Multithreading: Disabled +[12/27/2023-20:52:25] [I] CUDA Graph: Disabled +[12/27/2023-20:52:25] [I] Separate profiling: Disabled +[12/27/2023-20:52:25] [I] Time Deserialize: Disabled +[12/27/2023-20:52:25] [I] Time Refit: Disabled +[12/27/2023-20:52:25] [I] NVTX verbosity: 0 +[12/27/2023-20:52:25] [I] Persistent Cache Ratio: 0 +[12/27/2023-20:52:25] [I] Inputs: +[12/27/2023-20:52:25] [I] === Reporting Options === +[12/27/2023-20:52:25] [I] Verbose: Disabled +[12/27/2023-20:52:25] [I] Averages: 100 inferences +[12/27/2023-20:52:25] [I] Percentiles: 90,95,99 +[12/27/2023-20:52:25] [I] Dump refittable layers:Disabled +[12/27/2023-20:52:25] [I] Dump output: Disabled +[12/27/2023-20:52:25] [I] Profile: Disabled +[12/27/2023-20:52:25] [I] Export timing to JSON file: +[12/27/2023-20:52:25] [I] Export output to JSON file: +[12/27/2023-20:52:25] [I] Export profile to JSON file: +[12/27/2023-20:52:25] [I] +[12/27/2023-20:52:25] [I] === Device Information === +[12/27/2023-20:52:25] [I] Selected Device: Orin +[12/27/2023-20:52:25] [I] Compute Capability: 8.7 +[12/27/2023-20:52:25] [I] SMs: 8 +[12/27/2023-20:52:25] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-20:52:25] [I] Device Global Memory: 7471 MiB +[12/27/2023-20:52:25] [I] Shared Memory per SM: 164 KiB +[12/27/2023-20:52:25] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-20:52:25] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-20:52:25] [I] +[12/27/2023-20:52:25] [I] TensorRT version: 8.5.2 +[12/27/2023-20:52:28] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3040 (MiB) +[12/27/2023-20:52:32] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3348 (MiB) +[12/27/2023-20:52:32] [I] Start parsing network model +[12/27/2023-20:52:32] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-20:52:32] [I] [TRT] Input filename: yolo_nas_pose_n_fp16.onnx +[12/27/2023-20:52:32] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-20:52:32] [I] [TRT] Opset version: 17 +[12/27/2023-20:52:32] [I] [TRT] Producer name: pytorch +[12/27/2023-20:52:32] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-20:52:32] [I] [TRT] Domain: +[12/27/2023-20:52:32] [I] [TRT] Model version: 0 +[12/27/2023-20:52:32] [I] [TRT] Doc string: +[12/27/2023-20:52:32] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-20:52:32] [I] Finish parsing network model +[12/27/2023-20:52:32] [I] FP32 and INT8 precisions have been specified - more performance might be enabled by additionally specifying --fp16 or --best +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp16.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp16.onnx.int8.engine diff --git a/yolo_nas_pose_n_fp16.onnx.usage.txt b/yolo_nas_pose_n_fp16.onnx.usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..d37e78bbf5d67cc489b38fdd876ae58ab1380b00 --- /dev/null +++ b/yolo_nas_pose_n_fp16.onnx.usage.txt @@ -0,0 +1,58 @@ + +Model exported successfully to yolo_nas_pose_n_fp16.onnx +Model expects input image of shape [1, 3, 640, 640] +Input image dtype is torch.uint8 + +Exported model already contains preprocessing (normalization) step, so you don't need to do it manually. +Preprocessing steps to be applied to input image are: +Sequential( + (0): CastTensorTo(dtype=torch.float16) + (1): ChannelSelect(channels_indexes=tensor([2, 1, 0], device='cuda:0')) + (2): ApplyMeanStd(mean=[0.], scale=[255.]) +) + + +Exported model contains postprocessing (NMS) step with the following parameters: + num_pre_nms_predictions=1000 + max_predictions_per_image=10 + nms_threshold=0.5 + confidence_threshold=0.15 + output_predictions_format=flat + + +Exported model is in ONNX format and can be used with ONNXRuntime +To run inference with ONNXRuntime, please use the following code snippet: + + import onnxruntime + import numpy as np + session = onnxruntime.InferenceSession("yolo_nas_pose_n_fp16.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + example_input_image = np.zeros((1, 3, 640, 640)).astype(np.uint8) + predictions = session.run(outputs, {inputs[0]: example_input_image}) + +Exported model can also be used with TensorRT +To run inference with TensorRT, please see TensorRT deployment documentation +You can benchmark the model using the following code snippet: + + trtexec --onnx=yolo_nas_pose_n_fp16.onnx --fp16 --avgRuns=100 --duration=15 + + +Exported model has predictions in flat format: + +# flat_predictions is a 2D array of [N,K] shape +# Each row represents (image_index, x_min, y_min, x_max, y_max, confidence, joints...) +# Please note all values are floats, so you have to convert them to integers if needed + +[flat_predictions] = predictions +pred_bboxes = flat_predictions[:, 1:5] +pred_scores = flat_predictions[:, 5] +pred_joints = flat_predictions[:, 6:].reshape((len(pred_bboxes), -1, 3)) +for i in range(len(pred_bboxes)): + confidence = pred_scores[i] + x_min, y_min, x_max, y_max = pred_bboxes[i] + print(f"Detected pose with confidence={{confidence}}, x_min={{x_min}}, y_min={{y_min}}, x_max={{x_max}}, y_max={{y_max}}") + for joint_index, (x, y, confidence) in enumerate(pred_joints[i]):") + print(f"Joint {{joint_index}} has coordinates x={{x}}, y={{y}}, confidence={{confidence}}") + diff --git a/yolo_nas_pose_n_fp32.onnx b/yolo_nas_pose_n_fp32.onnx new file mode 100644 index 0000000000000000000000000000000000000000..373a887eb47cfedee4e6c309083b2e6d3d0a31e7 --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7868a9dbe652ba34d13ad0502bfeb9abdf00a8e11315d98f03f7b58b6bcfb181 +size 27697216 diff --git a/yolo_nas_pose_n_fp32.onnx.best.engine b/yolo_nas_pose_n_fp32.onnx.best.engine new file mode 100644 index 0000000000000000000000000000000000000000..b9684f863f2c24c63a039edf8c36f1b8f7327bb8 --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.best.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b68b4258ffdc32672cef7e64f27b61b44dadd651f33e6bb51d10b2aed5ed548f +size 10172104 diff --git a/yolo_nas_pose_n_fp32.onnx.best.engine.err b/yolo_nas_pose_n_fp32.onnx.best.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..9e3e9f7fdc6dfb0ded6299ba48a08e493b0e327c --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.best.engine.err @@ -0,0 +1,42 @@ +[12/27/2023-17:01:06] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-17:01:06] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-17:01:06] [W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool. +[12/27/2023-17:28:04] [W] [TRT] Tactic Device request: 3144MB Available: 2337MB. Device memory is insufficient to use tactic. +[12/27/2023-17:28:05] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-17:28:05] [W] [TRT] Tactic Device request: 3144MB Available: 2339MB. Device memory is insufficient to use tactic. +[12/27/2023-17:28:05] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3144 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-17:28:05] [W] [TRT] Tactic Device request: 3144MB Available: 2339MB. Device memory is insufficient to use tactic. +[12/27/2023-17:28:05] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-17:28:06] [W] [TRT] Tactic Device request: 3140MB Available: 2339MB. Device memory is insufficient to use tactic. +[12/27/2023-17:28:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-17:28:06] [W] [TRT] Tactic Device request: 3140MB Available: 2339MB. Device memory is insufficient to use tactic. +[12/27/2023-17:28:06] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-17:48:27] [W] [TRT] Tactic Device request: 4364MB Available: 1960MB. Device memory is insufficient to use tactic. +[12/27/2023-17:48:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4364 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-17:48:27] [W] [TRT] Tactic Device request: 4364MB Available: 1979MB. Device memory is insufficient to use tactic. +[12/27/2023-17:48:27] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4364 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-17:48:27] [W] [TRT] Tactic Device request: 4364MB Available: 1979MB. Device memory is insufficient to use tactic. +[12/27/2023-17:48:27] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4364 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-17:48:29] [W] [TRT] Tactic Device request: 4363MB Available: 1978MB. Device memory is insufficient to use tactic. +[12/27/2023-17:48:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4363 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-17:48:29] [W] [TRT] Tactic Device request: 4363MB Available: 1978MB. Device memory is insufficient to use tactic. +[12/27/2023-17:48:29] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4363 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-18:16:53] [W] [TRT] TensorRT encountered issues when converting weights between types and that could affect accuracy. +[12/27/2023-18:16:53] [W] [TRT] If this is not the desired behavior, please modify the weights or retrain with regularization to adjust the magnitude of the weights. +[12/27/2023-18:16:53] [W] [TRT] Check verbose logs for the list of affected weights. +[12/27/2023-18:16:53] [W] [TRT] - 89 weights are affected by this issue: Detected subnormal FP16 values. +[12/27/2023-18:16:53] [W] [TRT] - 8 weights are affected by this issue: Detected values less than smallest positive FP16 subnormal value and converted them to the FP16 minimum subnormalized value. +[12/27/2023-18:17:09] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/27/2023-18:17:09] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/27/2023-18:17:09] [W] * GPU compute time is unstable, with coefficient of variance = 4.42662%. +[12/27/2023-18:17:09] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_n_fp32.onnx.best.engine.log b/yolo_nas_pose_n_fp32.onnx.best.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..e91b2439851a8802dfb028917f0bede5317b37d1 --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.best.engine.log @@ -0,0 +1,304 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp32.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp32.onnx.best.engine +[12/27/2023-17:00:57] [I] === Model Options === +[12/27/2023-17:00:57] [I] Format: ONNX +[12/27/2023-17:00:57] [I] Model: yolo_nas_pose_n_fp32.onnx +[12/27/2023-17:00:57] [I] Output: +[12/27/2023-17:00:57] [I] === Build Options === +[12/27/2023-17:00:57] [I] Max batch: explicit batch +[12/27/2023-17:00:57] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-17:00:57] [I] minTiming: 1 +[12/27/2023-17:00:57] [I] avgTiming: 8 +[12/27/2023-17:00:57] [I] Precision: FP32+FP16+INT8 +[12/27/2023-17:00:57] [I] LayerPrecisions: +[12/27/2023-17:00:57] [I] Calibration: Dynamic +[12/27/2023-17:00:57] [I] Refit: Disabled +[12/27/2023-17:00:57] [I] Sparsity: Disabled +[12/27/2023-17:00:57] [I] Safe mode: Disabled +[12/27/2023-17:00:57] [I] DirectIO mode: Disabled +[12/27/2023-17:00:57] [I] Restricted mode: Disabled +[12/27/2023-17:00:57] [I] Build only: Disabled +[12/27/2023-17:00:57] [I] Save engine: yolo_nas_pose_n_fp32.onnx.best.engine +[12/27/2023-17:00:57] [I] Load engine: +[12/27/2023-17:00:57] [I] Profiling verbosity: 0 +[12/27/2023-17:00:57] [I] Tactic sources: Using default tactic sources +[12/27/2023-17:00:57] [I] timingCacheMode: local +[12/27/2023-17:00:57] [I] timingCacheFile: +[12/27/2023-17:00:57] [I] Heuristic: Disabled +[12/27/2023-17:00:57] [I] Preview Features: Use default preview flags. +[12/27/2023-17:00:57] [I] Input(s)s format: fp32:CHW +[12/27/2023-17:00:57] [I] Output(s)s format: fp32:CHW +[12/27/2023-17:00:57] [I] Input build shapes: model +[12/27/2023-17:00:57] [I] Input calibration shapes: model +[12/27/2023-17:00:57] [I] === System Options === +[12/27/2023-17:00:57] [I] Device: 0 +[12/27/2023-17:00:57] [I] DLACore: +[12/27/2023-17:00:57] [I] Plugins: +[12/27/2023-17:00:57] [I] === Inference Options === +[12/27/2023-17:00:57] [I] Batch: Explicit +[12/27/2023-17:00:57] [I] Input inference shapes: model +[12/27/2023-17:00:57] [I] Iterations: 10 +[12/27/2023-17:00:57] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-17:00:57] [I] Sleep time: 0ms +[12/27/2023-17:00:57] [I] Idle time: 0ms +[12/27/2023-17:00:57] [I] Streams: 1 +[12/27/2023-17:00:57] [I] ExposeDMA: Disabled +[12/27/2023-17:00:57] [I] Data transfers: Enabled +[12/27/2023-17:00:57] [I] Spin-wait: Disabled +[12/27/2023-17:00:57] [I] Multithreading: Disabled +[12/27/2023-17:00:57] [I] CUDA Graph: Disabled +[12/27/2023-17:00:57] [I] Separate profiling: Disabled +[12/27/2023-17:00:57] [I] Time Deserialize: Disabled +[12/27/2023-17:00:57] [I] Time Refit: Disabled +[12/27/2023-17:00:57] [I] NVTX verbosity: 0 +[12/27/2023-17:00:57] [I] Persistent Cache Ratio: 0 +[12/27/2023-17:00:57] [I] Inputs: +[12/27/2023-17:00:57] [I] === Reporting Options === +[12/27/2023-17:00:57] [I] Verbose: Disabled +[12/27/2023-17:00:57] [I] Averages: 100 inferences +[12/27/2023-17:00:57] [I] Percentiles: 90,95,99 +[12/27/2023-17:00:57] [I] Dump refittable layers:Disabled +[12/27/2023-17:00:57] [I] Dump output: Disabled +[12/27/2023-17:00:57] [I] Profile: Disabled +[12/27/2023-17:00:57] [I] Export timing to JSON file: +[12/27/2023-17:00:57] [I] Export output to JSON file: +[12/27/2023-17:00:57] [I] Export profile to JSON file: +[12/27/2023-17:00:57] [I] +[12/27/2023-17:00:57] [I] === Device Information === +[12/27/2023-17:00:57] [I] Selected Device: Orin +[12/27/2023-17:00:57] [I] Compute Capability: 8.7 +[12/27/2023-17:00:57] [I] SMs: 8 +[12/27/2023-17:00:57] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-17:00:57] [I] Device Global Memory: 7471 MiB +[12/27/2023-17:00:57] [I] Shared Memory per SM: 164 KiB +[12/27/2023-17:00:57] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-17:00:57] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-17:00:57] [I] +[12/27/2023-17:00:57] [I] TensorRT version: 8.5.2 +[12/27/2023-17:01:01] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3069 (MiB) +[12/27/2023-17:01:05] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +285, now: CPU 574, GPU 3374 (MiB) +[12/27/2023-17:01:05] [I] Start parsing network model +[12/27/2023-17:01:06] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-17:01:06] [I] [TRT] Input filename: yolo_nas_pose_n_fp32.onnx +[12/27/2023-17:01:06] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-17:01:06] [I] [TRT] Opset version: 17 +[12/27/2023-17:01:06] [I] [TRT] Producer name: pytorch +[12/27/2023-17:01:06] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-17:01:06] [I] [TRT] Domain: +[12/27/2023-17:01:06] [I] [TRT] Model version: 0 +[12/27/2023-17:01:06] [I] [TRT] Doc string: +[12/27/2023-17:01:06] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-17:01:06] [I] Finish parsing network model +[12/27/2023-17:01:06] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/27/2023-17:01:06] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 383) [Constant] +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 384) [Constant] +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 385) [Constant] +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 126) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 163) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 192) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 231) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 254) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu || /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 285) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 308) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 387) [NMS]_1_output[DevicetoShapeHostCopy] +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/27/2023-17:01:06] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/27/2023-17:01:19] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +392, now: CPU 1142, GPU 3820 (MiB) +[12/27/2023-17:01:21] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +73, now: CPU 1224, GPU 3893 (MiB) +[12/27/2023-17:01:21] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/27/2023-18:16:36] [I] [TRT] Total Activation Memory: 7893208064 +[12/27/2023-18:16:36] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/27/2023-18:16:46] [I] [TRT] Total Host Persistent Memory: 287328 +[12/27/2023-18:16:46] [I] [TRT] Total Device Persistent Memory: 77824 +[12/27/2023-18:16:46] [I] [TRT] Total Scratch Memory: 134217728 +[12/27/2023-18:16:46] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 13 MiB, GPU 2112 MiB +[12/27/2023-18:16:46] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 165 steps to complete. +[12/27/2023-18:16:47] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 90.0648ms to assign 13 blocks to 165 nodes requiring 138902528 bytes. +[12/27/2023-18:16:47] [I] [TRT] Total Activation Memory: 138902528 +[12/27/2023-18:16:53] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -23, now: CPU 1582, GPU 5506 (MiB) +[12/27/2023-18:16:53] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +7, GPU +8, now: CPU 7, GPU 8 (MiB) +[12/27/2023-18:16:53] [I] Engine built in 4556.17 sec. +[12/27/2023-18:16:54] [I] [TRT] Loaded engine size: 9 MiB +[12/27/2023-18:16:54] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1254, GPU 5390 (MiB) +[12/27/2023-18:16:54] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +7, now: CPU 0, GPU 7 (MiB) +[12/27/2023-18:16:54] [I] Engine deserialized in 0.249492 sec. +[12/27/2023-18:16:54] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1255, GPU 5390 (MiB) +[12/27/2023-18:16:54] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +133, now: CPU 0, GPU 140 (MiB) +[12/27/2023-18:16:54] [I] Setting persistentCacheLimit to 0 bytes. +[12/27/2023-18:16:54] [I] Using random values for input onnx::Cast_0 +[12/27/2023-18:16:54] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/27/2023-18:16:54] [I] Using random values for output graph2_flat_predictions +[12/27/2023-18:16:54] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/27/2023-18:16:54] [I] Starting inference +[12/27/2023-18:17:09] [I] Warmup completed 3 queries over 200 ms +[12/27/2023-18:17:09] [I] Timing trace has 1585 queries over 15.021 s +[12/27/2023-18:17:09] [I] +[12/27/2023-18:17:09] [I] === Trace details === +[12/27/2023-18:17:09] [I] Trace averages of 100 runs: +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.45483 ms - Host latency: 9.5689 ms (enqueue 9.53183 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.56754 ms - Host latency: 9.68717 ms (enqueue 9.63745 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.37603 ms - Host latency: 9.49177 ms (enqueue 9.44953 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.42432 ms - Host latency: 9.54236 ms (enqueue 9.4956 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.48685 ms - Host latency: 9.60414 ms (enqueue 9.55452 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.29727 ms - Host latency: 9.41133 ms (enqueue 9.37589 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.31099 ms - Host latency: 9.42416 ms (enqueue 9.38862 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.20675 ms - Host latency: 9.32071 ms (enqueue 9.28723 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.32158 ms - Host latency: 9.43526 ms (enqueue 9.40085 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.18112 ms - Host latency: 9.29475 ms (enqueue 9.26196 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.19898 ms - Host latency: 9.31291 ms (enqueue 9.28161 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.21823 ms - Host latency: 9.33146 ms (enqueue 9.29895 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.17338 ms - Host latency: 9.28631 ms (enqueue 9.25456 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.33526 ms - Host latency: 9.44828 ms (enqueue 9.40888 ms) +[12/27/2023-18:17:09] [I] Average on 100 runs - GPU latency: 9.49695 ms - Host latency: 9.61355 ms (enqueue 9.56515 ms) +[12/27/2023-18:17:09] [I] +[12/27/2023-18:17:09] [I] === Performance summary === +[12/27/2023-18:17:09] [I] Throughput: 105.519 qps +[12/27/2023-18:17:09] [I] Latency: min = 8.85352 ms, max = 16.1188 ms, mean = 9.44638 ms, median = 9.31592 ms, percentile(90%) = 9.66943 ms, percentile(95%) = 9.98145 ms, percentile(99%) = 11.1636 ms +[12/27/2023-18:17:09] [I] Enqueue Time: min = 8.8125 ms, max = 17.5256 ms, mean = 9.40808 ms, median = 9.28223 ms, percentile(90%) = 9.6239 ms, percentile(95%) = 9.93411 ms, percentile(99%) = 11.1191 ms +[12/27/2023-18:17:09] [I] H2D Latency: min = 0.0814514 ms, max = 0.197891 ms, mean = 0.100154 ms, median = 0.100586 ms, percentile(90%) = 0.102051 ms, percentile(95%) = 0.102539 ms, percentile(99%) = 0.113281 ms +[12/27/2023-18:17:09] [I] GPU Compute Time: min = 8.73633 ms, max = 16.0115 ms, mean = 9.33164 ms, median = 9.2002 ms, percentile(90%) = 9.55396 ms, percentile(95%) = 9.86475 ms, percentile(99%) = 11.0469 ms +[12/27/2023-18:17:09] [I] D2H Latency: min = 0.00292969 ms, max = 0.0561523 ms, mean = 0.0145941 ms, median = 0.0126953 ms, percentile(90%) = 0.0212402 ms, percentile(95%) = 0.0227051 ms, percentile(99%) = 0.0273438 ms +[12/27/2023-18:17:09] [I] Total Host Walltime: 15.021 s +[12/27/2023-18:17:09] [I] Total GPU Compute Time: 14.7907 s +[12/27/2023-18:17:09] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/27/2023-18:17:09] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp32.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp32.onnx.best.engine diff --git a/yolo_nas_pose_n_fp32.onnx.engine b/yolo_nas_pose_n_fp32.onnx.engine new file mode 100644 index 0000000000000000000000000000000000000000..d9a9cf89e1c9703d12fbb2c1436680afb0e87c63 --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e19d462642a6a172850d6b620289741a3bdd2757195c48b4c930a2c44906e83 +size 29694950 diff --git a/yolo_nas_pose_n_fp32.onnx.engine.err b/yolo_nas_pose_n_fp32.onnx.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..96fa889034b0aedfcbed616d011629c9c7028a3e --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.engine.err @@ -0,0 +1,24 @@ +[12/27/2023-16:02:15] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-16:02:15] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-16:06:49] [W] [TRT] Tactic Device request: 3144MB Available: 2201MB. Device memory is insufficient to use tactic. +[12/27/2023-16:06:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:06:49] [W] [TRT] Tactic Device request: 3144MB Available: 2201MB. Device memory is insufficient to use tactic. +[12/27/2023-16:06:49] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3144 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:06:49] [W] [TRT] Tactic Device request: 3144MB Available: 2201MB. Device memory is insufficient to use tactic. +[12/27/2023-16:06:49] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:11:00] [W] [TRT] Tactic Device request: 4364MB Available: 2009MB. Device memory is insufficient to use tactic. +[12/27/2023-16:11:00] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4364 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:11:00] [W] [TRT] Tactic Device request: 4364MB Available: 2009MB. Device memory is insufficient to use tactic. +[12/27/2023-16:11:00] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4364 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:11:00] [W] [TRT] Tactic Device request: 4364MB Available: 2008MB. Device memory is insufficient to use tactic. +[12/27/2023-16:11:00] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4364 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:17:38] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/27/2023-16:17:38] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/27/2023-16:17:38] [W] * GPU compute time is unstable, with coefficient of variance = 6.71979%. +[12/27/2023-16:17:38] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_n_fp32.onnx.engine.log b/yolo_nas_pose_n_fp32.onnx.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..445fad04da3f51b1bbf8a658ffb8deaca6920335 --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.engine.log @@ -0,0 +1,296 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp32.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp32.onnx.engine +[12/27/2023-16:02:06] [I] === Model Options === +[12/27/2023-16:02:06] [I] Format: ONNX +[12/27/2023-16:02:06] [I] Model: yolo_nas_pose_n_fp32.onnx +[12/27/2023-16:02:06] [I] Output: +[12/27/2023-16:02:06] [I] === Build Options === +[12/27/2023-16:02:06] [I] Max batch: explicit batch +[12/27/2023-16:02:06] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-16:02:06] [I] minTiming: 1 +[12/27/2023-16:02:06] [I] avgTiming: 8 +[12/27/2023-16:02:06] [I] Precision: FP32 +[12/27/2023-16:02:06] [I] LayerPrecisions: +[12/27/2023-16:02:06] [I] Calibration: +[12/27/2023-16:02:06] [I] Refit: Disabled +[12/27/2023-16:02:06] [I] Sparsity: Disabled +[12/27/2023-16:02:06] [I] Safe mode: Disabled +[12/27/2023-16:02:06] [I] DirectIO mode: Disabled +[12/27/2023-16:02:06] [I] Restricted mode: Disabled +[12/27/2023-16:02:06] [I] Build only: Disabled +[12/27/2023-16:02:06] [I] Save engine: yolo_nas_pose_n_fp32.onnx.engine +[12/27/2023-16:02:06] [I] Load engine: +[12/27/2023-16:02:06] [I] Profiling verbosity: 0 +[12/27/2023-16:02:06] [I] Tactic sources: Using default tactic sources +[12/27/2023-16:02:06] [I] timingCacheMode: local +[12/27/2023-16:02:06] [I] timingCacheFile: +[12/27/2023-16:02:06] [I] Heuristic: Disabled +[12/27/2023-16:02:06] [I] Preview Features: Use default preview flags. +[12/27/2023-16:02:06] [I] Input(s)s format: fp32:CHW +[12/27/2023-16:02:06] [I] Output(s)s format: fp32:CHW +[12/27/2023-16:02:06] [I] Input build shapes: model +[12/27/2023-16:02:06] [I] Input calibration shapes: model +[12/27/2023-16:02:06] [I] === System Options === +[12/27/2023-16:02:06] [I] Device: 0 +[12/27/2023-16:02:06] [I] DLACore: +[12/27/2023-16:02:06] [I] Plugins: +[12/27/2023-16:02:06] [I] === Inference Options === +[12/27/2023-16:02:06] [I] Batch: Explicit +[12/27/2023-16:02:06] [I] Input inference shapes: model +[12/27/2023-16:02:06] [I] Iterations: 10 +[12/27/2023-16:02:06] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-16:02:06] [I] Sleep time: 0ms +[12/27/2023-16:02:06] [I] Idle time: 0ms +[12/27/2023-16:02:06] [I] Streams: 1 +[12/27/2023-16:02:06] [I] ExposeDMA: Disabled +[12/27/2023-16:02:06] [I] Data transfers: Enabled +[12/27/2023-16:02:06] [I] Spin-wait: Disabled +[12/27/2023-16:02:06] [I] Multithreading: Disabled +[12/27/2023-16:02:06] [I] CUDA Graph: Disabled +[12/27/2023-16:02:06] [I] Separate profiling: Disabled +[12/27/2023-16:02:06] [I] Time Deserialize: Disabled +[12/27/2023-16:02:06] [I] Time Refit: Disabled +[12/27/2023-16:02:06] [I] NVTX verbosity: 0 +[12/27/2023-16:02:06] [I] Persistent Cache Ratio: 0 +[12/27/2023-16:02:06] [I] Inputs: +[12/27/2023-16:02:06] [I] === Reporting Options === +[12/27/2023-16:02:06] [I] Verbose: Disabled +[12/27/2023-16:02:06] [I] Averages: 100 inferences +[12/27/2023-16:02:06] [I] Percentiles: 90,95,99 +[12/27/2023-16:02:06] [I] Dump refittable layers:Disabled +[12/27/2023-16:02:06] [I] Dump output: Disabled +[12/27/2023-16:02:06] [I] Profile: Disabled +[12/27/2023-16:02:06] [I] Export timing to JSON file: +[12/27/2023-16:02:06] [I] Export output to JSON file: +[12/27/2023-16:02:06] [I] Export profile to JSON file: +[12/27/2023-16:02:06] [I] +[12/27/2023-16:02:07] [I] === Device Information === +[12/27/2023-16:02:07] [I] Selected Device: Orin +[12/27/2023-16:02:07] [I] Compute Capability: 8.7 +[12/27/2023-16:02:07] [I] SMs: 8 +[12/27/2023-16:02:07] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-16:02:07] [I] Device Global Memory: 7471 MiB +[12/27/2023-16:02:07] [I] Shared Memory per SM: 164 KiB +[12/27/2023-16:02:07] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-16:02:07] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-16:02:07] [I] +[12/27/2023-16:02:07] [I] TensorRT version: 8.5.2 +[12/27/2023-16:02:11] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2750 (MiB) +[12/27/2023-16:02:15] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +505, now: CPU 574, GPU 3278 (MiB) +[12/27/2023-16:02:15] [I] Start parsing network model +[12/27/2023-16:02:15] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-16:02:15] [I] [TRT] Input filename: yolo_nas_pose_n_fp32.onnx +[12/27/2023-16:02:15] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-16:02:15] [I] [TRT] Opset version: 17 +[12/27/2023-16:02:15] [I] [TRT] Producer name: pytorch +[12/27/2023-16:02:15] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-16:02:15] [I] [TRT] Domain: +[12/27/2023-16:02:15] [I] [TRT] Model version: 0 +[12/27/2023-16:02:15] [I] [TRT] Doc string: +[12/27/2023-16:02:15] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-16:02:15] [I] Finish parsing network model +[12/27/2023-16:02:16] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/27/2023-16:02:16] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 383) [Constant] +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 384) [Constant] +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 385) [Constant] +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 126) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 163) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 192) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 231) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 254) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu || /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 285) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 308) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 387) [NMS]_1_output[DevicetoShapeHostCopy] +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/27/2023-16:02:16] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/27/2023-16:02:28] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +726, now: CPU 1141, GPU 4084 (MiB) +[12/27/2023-16:02:30] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +161, now: CPU 1224, GPU 4245 (MiB) +[12/27/2023-16:02:30] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/27/2023-16:17:17] [I] [TRT] Total Activation Memory: 8005846016 +[12/27/2023-16:17:17] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/27/2023-16:17:20] [I] [TRT] Total Host Persistent Memory: 313840 +[12/27/2023-16:17:20] [I] [TRT] Total Device Persistent Memory: 44032 +[12/27/2023-16:17:20] [I] [TRT] Total Scratch Memory: 134217728 +[12/27/2023-16:17:20] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 8 MiB, GPU 2112 MiB +[12/27/2023-16:17:20] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 172 steps to complete. +[12/27/2023-16:17:20] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 80.4801ms to assign 14 blocks to 172 nodes requiring 150895104 bytes. +[12/27/2023-16:17:20] [I] [TRT] Total Activation Memory: 150895104 +[12/27/2023-16:17:21] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1568, GPU 5615 (MiB) +[12/27/2023-16:17:22] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +4, GPU +32, now: CPU 4, GPU 32 (MiB) +[12/27/2023-16:17:22] [I] Engine built in 915.377 sec. +[12/27/2023-16:17:22] [I] [TRT] Loaded engine size: 28 MiB +[12/27/2023-16:17:22] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1270, GPU 5462 (MiB) +[12/27/2023-16:17:22] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +26, now: CPU 0, GPU 26 (MiB) +[12/27/2023-16:17:22] [I] Engine deserialized in 0.103652 sec. +[12/27/2023-16:17:22] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1270, GPU 5462 (MiB) +[12/27/2023-16:17:22] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +144, now: CPU 0, GPU 170 (MiB) +[12/27/2023-16:17:22] [I] Setting persistentCacheLimit to 0 bytes. +[12/27/2023-16:17:22] [I] Using random values for input onnx::Cast_0 +[12/27/2023-16:17:22] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/27/2023-16:17:22] [I] Using random values for output graph2_flat_predictions +[12/27/2023-16:17:22] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/27/2023-16:17:22] [I] Starting inference +[12/27/2023-16:17:38] [I] Warmup completed 9 queries over 200 ms +[12/27/2023-16:17:38] [I] Timing trace has 719 queries over 15.0335 s +[12/27/2023-16:17:38] [I] +[12/27/2023-16:17:38] [I] === Trace details === +[12/27/2023-16:17:38] [I] Trace averages of 100 runs: +[12/27/2023-16:17:38] [I] Average on 100 runs - GPU latency: 20.9204 ms - Host latency: 21.0349 ms (enqueue 20.9735 ms) +[12/27/2023-16:17:38] [I] Average on 100 runs - GPU latency: 20.766 ms - Host latency: 20.8801 ms (enqueue 20.8248 ms) +[12/27/2023-16:17:38] [I] Average on 100 runs - GPU latency: 20.881 ms - Host latency: 21.0008 ms (enqueue 20.9431 ms) +[12/27/2023-16:17:38] [I] Average on 100 runs - GPU latency: 21.0656 ms - Host latency: 21.183 ms (enqueue 21.1181 ms) +[12/27/2023-16:17:38] [I] Average on 100 runs - GPU latency: 20.6834 ms - Host latency: 20.7941 ms (enqueue 20.7411 ms) +[12/27/2023-16:17:38] [I] Average on 100 runs - GPU latency: 20.5291 ms - Host latency: 20.6405 ms (enqueue 20.6045 ms) +[12/27/2023-16:17:38] [I] Average on 100 runs - GPU latency: 20.4844 ms - Host latency: 20.5956 ms (enqueue 20.5528 ms) +[12/27/2023-16:17:38] [I] +[12/27/2023-16:17:38] [I] === Performance summary === +[12/27/2023-16:17:38] [I] Throughput: 47.8265 qps +[12/27/2023-16:17:38] [I] Latency: min = 19.3135 ms, max = 33.7374 ms, mean = 20.8784 ms, median = 20.6987 ms, percentile(90%) = 21.5381 ms, percentile(95%) = 22.21 ms, percentile(99%) = 28.9785 ms +[12/27/2023-16:17:38] [I] Enqueue Time: min = 19.2793 ms, max = 33.667 ms, mean = 20.8258 ms, median = 20.6621 ms, percentile(90%) = 21.5679 ms, percentile(95%) = 22.1077 ms, percentile(99%) = 28.8643 ms +[12/27/2023-16:17:38] [I] H2D Latency: min = 0.0800781 ms, max = 0.128296 ms, mean = 0.0956941 ms, median = 0.0966797 ms, percentile(90%) = 0.0986328 ms, percentile(95%) = 0.0996094 ms, percentile(99%) = 0.112305 ms +[12/27/2023-16:17:38] [I] GPU Compute Time: min = 19.2031 ms, max = 33.6203 ms, mean = 20.7643 ms, median = 20.5898 ms, percentile(90%) = 21.4326 ms, percentile(95%) = 22.1172 ms, percentile(99%) = 28.8555 ms +[12/27/2023-16:17:38] [I] D2H Latency: min = 0.00292969 ms, max = 0.0698242 ms, mean = 0.0184137 ms, median = 0.0175781 ms, percentile(90%) = 0.0263672 ms, percentile(95%) = 0.0292969 ms, percentile(99%) = 0.0410156 ms +[12/27/2023-16:17:38] [I] Total Host Walltime: 15.0335 s +[12/27/2023-16:17:38] [I] Total GPU Compute Time: 14.9295 s +[12/27/2023-16:17:38] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/27/2023-16:17:38] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp32.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp32.onnx.engine diff --git a/yolo_nas_pose_n_fp32.onnx.fp16.engine b/yolo_nas_pose_n_fp32.onnx.fp16.engine new file mode 100644 index 0000000000000000000000000000000000000000..87c88a0bd82e5a651061d436546e2f6b87014ac1 --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.fp16.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40380769be3600440551b85d6793b25d11a3ba5b28e398d27afb35875b23b8c3 +size 15631856 diff --git a/yolo_nas_pose_n_fp32.onnx.fp16.engine.err b/yolo_nas_pose_n_fp32.onnx.fp16.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..98449615e885e6e3a40c6b744b4638388856fd88 --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.fp16.engine.err @@ -0,0 +1,41 @@ +[12/27/2023-16:17:49] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-16:17:49] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-16:30:09] [W] [TRT] Tactic Device request: 3144MB Available: 2339MB. Device memory is insufficient to use tactic. +[12/27/2023-16:30:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:30:09] [W] [TRT] Tactic Device request: 3144MB Available: 2339MB. Device memory is insufficient to use tactic. +[12/27/2023-16:30:09] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3144 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:30:09] [W] [TRT] Tactic Device request: 3144MB Available: 2339MB. Device memory is insufficient to use tactic. +[12/27/2023-16:30:09] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:30:10] [W] [TRT] Tactic Device request: 3140MB Available: 2340MB. Device memory is insufficient to use tactic. +[12/27/2023-16:30:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:30:10] [W] [TRT] Tactic Device request: 3140MB Available: 2340MB. Device memory is insufficient to use tactic. +[12/27/2023-16:30:10] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:42:30] [W] [TRT] Tactic Device request: 4364MB Available: 1969MB. Device memory is insufficient to use tactic. +[12/27/2023-16:42:30] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4364 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:42:31] [W] [TRT] Tactic Device request: 4364MB Available: 1969MB. Device memory is insufficient to use tactic. +[12/27/2023-16:42:31] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4364 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:42:31] [W] [TRT] Tactic Device request: 4364MB Available: 1969MB. Device memory is insufficient to use tactic. +[12/27/2023-16:42:31] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4364 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:42:32] [W] [TRT] Tactic Device request: 4363MB Available: 1968MB. Device memory is insufficient to use tactic. +[12/27/2023-16:42:32] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4363 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-16:42:32] [W] [TRT] Tactic Device request: 4363MB Available: 1968MB. Device memory is insufficient to use tactic. +[12/27/2023-16:42:32] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4363 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-17:00:37] [W] [TRT] TensorRT encountered issues when converting weights between types and that could affect accuracy. +[12/27/2023-17:00:37] [W] [TRT] If this is not the desired behavior, please modify the weights or retrain with regularization to adjust the magnitude of the weights. +[12/27/2023-17:00:37] [W] [TRT] Check verbose logs for the list of affected weights. +[12/27/2023-17:00:37] [W] [TRT] - 89 weights are affected by this issue: Detected subnormal FP16 values. +[12/27/2023-17:00:37] [W] [TRT] - 8 weights are affected by this issue: Detected values less than smallest positive FP16 subnormal value and converted them to the FP16 minimum subnormalized value. +[12/27/2023-17:00:54] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/27/2023-17:00:54] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/27/2023-17:00:54] [W] * GPU compute time is unstable, with coefficient of variance = 9.81179%. +[12/27/2023-17:00:54] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_n_fp32.onnx.fp16.engine.log b/yolo_nas_pose_n_fp32.onnx.fp16.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..29416f06e70d1688d3229acb64f06c23dbbf48ef --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.fp16.engine.log @@ -0,0 +1,301 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp32.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp32.onnx.fp16.engine +[12/27/2023-16:17:40] [I] === Model Options === +[12/27/2023-16:17:40] [I] Format: ONNX +[12/27/2023-16:17:40] [I] Model: yolo_nas_pose_n_fp32.onnx +[12/27/2023-16:17:40] [I] Output: +[12/27/2023-16:17:40] [I] === Build Options === +[12/27/2023-16:17:40] [I] Max batch: explicit batch +[12/27/2023-16:17:40] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-16:17:40] [I] minTiming: 1 +[12/27/2023-16:17:40] [I] avgTiming: 8 +[12/27/2023-16:17:40] [I] Precision: FP32+FP16 +[12/27/2023-16:17:40] [I] LayerPrecisions: +[12/27/2023-16:17:40] [I] Calibration: +[12/27/2023-16:17:40] [I] Refit: Disabled +[12/27/2023-16:17:40] [I] Sparsity: Disabled +[12/27/2023-16:17:40] [I] Safe mode: Disabled +[12/27/2023-16:17:40] [I] DirectIO mode: Disabled +[12/27/2023-16:17:40] [I] Restricted mode: Disabled +[12/27/2023-16:17:40] [I] Build only: Disabled +[12/27/2023-16:17:40] [I] Save engine: yolo_nas_pose_n_fp32.onnx.fp16.engine +[12/27/2023-16:17:40] [I] Load engine: +[12/27/2023-16:17:40] [I] Profiling verbosity: 0 +[12/27/2023-16:17:40] [I] Tactic sources: Using default tactic sources +[12/27/2023-16:17:40] [I] timingCacheMode: local +[12/27/2023-16:17:40] [I] timingCacheFile: +[12/27/2023-16:17:40] [I] Heuristic: Disabled +[12/27/2023-16:17:40] [I] Preview Features: Use default preview flags. +[12/27/2023-16:17:40] [I] Input(s)s format: fp32:CHW +[12/27/2023-16:17:40] [I] Output(s)s format: fp32:CHW +[12/27/2023-16:17:40] [I] Input build shapes: model +[12/27/2023-16:17:40] [I] Input calibration shapes: model +[12/27/2023-16:17:40] [I] === System Options === +[12/27/2023-16:17:40] [I] Device: 0 +[12/27/2023-16:17:40] [I] DLACore: +[12/27/2023-16:17:40] [I] Plugins: +[12/27/2023-16:17:40] [I] === Inference Options === +[12/27/2023-16:17:40] [I] Batch: Explicit +[12/27/2023-16:17:40] [I] Input inference shapes: model +[12/27/2023-16:17:40] [I] Iterations: 10 +[12/27/2023-16:17:40] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-16:17:40] [I] Sleep time: 0ms +[12/27/2023-16:17:40] [I] Idle time: 0ms +[12/27/2023-16:17:40] [I] Streams: 1 +[12/27/2023-16:17:40] [I] ExposeDMA: Disabled +[12/27/2023-16:17:40] [I] Data transfers: Enabled +[12/27/2023-16:17:40] [I] Spin-wait: Disabled +[12/27/2023-16:17:40] [I] Multithreading: Disabled +[12/27/2023-16:17:40] [I] CUDA Graph: Disabled +[12/27/2023-16:17:40] [I] Separate profiling: Disabled +[12/27/2023-16:17:40] [I] Time Deserialize: Disabled +[12/27/2023-16:17:40] [I] Time Refit: Disabled +[12/27/2023-16:17:40] [I] NVTX verbosity: 0 +[12/27/2023-16:17:40] [I] Persistent Cache Ratio: 0 +[12/27/2023-16:17:40] [I] Inputs: +[12/27/2023-16:17:40] [I] === Reporting Options === +[12/27/2023-16:17:40] [I] Verbose: Disabled +[12/27/2023-16:17:40] [I] Averages: 100 inferences +[12/27/2023-16:17:40] [I] Percentiles: 90,95,99 +[12/27/2023-16:17:40] [I] Dump refittable layers:Disabled +[12/27/2023-16:17:40] [I] Dump output: Disabled +[12/27/2023-16:17:40] [I] Profile: Disabled +[12/27/2023-16:17:40] [I] Export timing to JSON file: +[12/27/2023-16:17:40] [I] Export output to JSON file: +[12/27/2023-16:17:40] [I] Export profile to JSON file: +[12/27/2023-16:17:40] [I] +[12/27/2023-16:17:40] [I] === Device Information === +[12/27/2023-16:17:40] [I] Selected Device: Orin +[12/27/2023-16:17:40] [I] Compute Capability: 8.7 +[12/27/2023-16:17:40] [I] SMs: 8 +[12/27/2023-16:17:40] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-16:17:40] [I] Device Global Memory: 7471 MiB +[12/27/2023-16:17:40] [I] Shared Memory per SM: 164 KiB +[12/27/2023-16:17:40] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-16:17:40] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-16:17:40] [I] +[12/27/2023-16:17:40] [I] TensorRT version: 8.5.2 +[12/27/2023-16:17:45] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3322 (MiB) +[12/27/2023-16:17:48] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +377, now: CPU 574, GPU 3717 (MiB) +[12/27/2023-16:17:48] [I] Start parsing network model +[12/27/2023-16:17:49] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-16:17:49] [I] [TRT] Input filename: yolo_nas_pose_n_fp32.onnx +[12/27/2023-16:17:49] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-16:17:49] [I] [TRT] Opset version: 17 +[12/27/2023-16:17:49] [I] [TRT] Producer name: pytorch +[12/27/2023-16:17:49] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-16:17:49] [I] [TRT] Domain: +[12/27/2023-16:17:49] [I] [TRT] Model version: 0 +[12/27/2023-16:17:49] [I] [TRT] Doc string: +[12/27/2023-16:17:49] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-16:17:49] [I] Finish parsing network model +[12/27/2023-16:17:49] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/27/2023-16:17:49] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 383) [Constant] +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 384) [Constant] +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 385) [Constant] +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 126) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 163) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 192) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 231) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 254) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu || /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 285) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 308) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 387) [NMS]_1_output[DevicetoShapeHostCopy] +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/27/2023-16:17:49] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/27/2023-16:18:03] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +699, now: CPU 1141, GPU 4471 (MiB) +[12/27/2023-16:18:04] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +123, now: CPU 1224, GPU 4594 (MiB) +[12/27/2023-16:18:04] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/27/2023-17:00:25] [I] [TRT] Total Activation Memory: 7921581056 +[12/27/2023-17:00:25] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/27/2023-17:00:33] [I] [TRT] Total Host Persistent Memory: 300672 +[12/27/2023-17:00:33] [I] [TRT] Total Device Persistent Memory: 88576 +[12/27/2023-17:00:33] [I] [TRT] Total Scratch Memory: 134217728 +[12/27/2023-17:00:33] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 18 MiB, GPU 2112 MiB +[12/27/2023-17:00:33] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 163 steps to complete. +[12/27/2023-17:00:33] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 70.7792ms to assign 16 blocks to 163 nodes requiring 142793216 bytes. +[12/27/2023-17:00:33] [I] [TRT] Total Activation Memory: 142793216 +[12/27/2023-17:00:37] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1, GPU -3, now: CPU 1584, GPU 5531 (MiB) +[12/27/2023-17:00:37] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +13, GPU +16, now: CPU 13, GPU 16 (MiB) +[12/27/2023-17:00:38] [I] Engine built in 2577.06 sec. +[12/27/2023-17:00:39] [I] [TRT] Loaded engine size: 14 MiB +[12/27/2023-17:00:39] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1256, GPU 5299 (MiB) +[12/27/2023-17:00:39] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +13, now: CPU 0, GPU 13 (MiB) +[12/27/2023-17:00:39] [I] Engine deserialized in 0.0945461 sec. +[12/27/2023-17:00:39] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1256, GPU 5299 (MiB) +[12/27/2023-17:00:39] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +136, now: CPU 0, GPU 149 (MiB) +[12/27/2023-17:00:39] [I] Setting persistentCacheLimit to 0 bytes. +[12/27/2023-17:00:39] [I] Using random values for input onnx::Cast_0 +[12/27/2023-17:00:39] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/27/2023-17:00:39] [I] Using random values for output graph2_flat_predictions +[12/27/2023-17:00:39] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/27/2023-17:00:39] [I] Starting inference +[12/27/2023-17:00:54] [I] Warmup completed 1 queries over 200 ms +[12/27/2023-17:00:54] [I] Timing trace has 1237 queries over 15.0152 s +[12/27/2023-17:00:54] [I] +[12/27/2023-17:00:54] [I] === Trace details === +[12/27/2023-17:00:54] [I] Trace averages of 100 runs: +[12/27/2023-17:00:54] [I] Average on 100 runs - GPU latency: 12.6573 ms - Host latency: 12.7781 ms (enqueue 12.7178 ms) +[12/27/2023-17:00:54] [I] Average on 100 runs - GPU latency: 11.6142 ms - Host latency: 11.728 ms (enqueue 11.6905 ms) +[12/27/2023-17:00:54] [I] Average on 100 runs - GPU latency: 11.6262 ms - Host latency: 11.7402 ms (enqueue 11.7016 ms) +[12/27/2023-17:00:54] [I] Average on 100 runs - GPU latency: 11.6586 ms - Host latency: 11.7742 ms (enqueue 11.7329 ms) +[12/27/2023-17:00:54] [I] Average on 100 runs - GPU latency: 11.622 ms - Host latency: 11.7355 ms (enqueue 11.6981 ms) +[12/27/2023-17:00:54] [I] Average on 100 runs - GPU latency: 11.8267 ms - Host latency: 11.941 ms (enqueue 11.8978 ms) +[12/27/2023-17:00:54] [I] Average on 100 runs - GPU latency: 12.1193 ms - Host latency: 12.2384 ms (enqueue 12.1803 ms) +[12/27/2023-17:00:54] [I] Average on 100 runs - GPU latency: 12.1242 ms - Host latency: 12.2438 ms (enqueue 12.1885 ms) +[12/27/2023-17:00:54] [I] Average on 100 runs - GPU latency: 12.1149 ms - Host latency: 12.2342 ms (enqueue 12.174 ms) +[12/27/2023-17:00:54] [I] Average on 100 runs - GPU latency: 12.1567 ms - Host latency: 12.2762 ms (enqueue 12.2142 ms) +[12/27/2023-17:00:54] [I] Average on 100 runs - GPU latency: 12.1019 ms - Host latency: 12.2216 ms (enqueue 12.1666 ms) +[12/27/2023-17:00:54] [I] Average on 100 runs - GPU latency: 12.1605 ms - Host latency: 12.2789 ms (enqueue 12.2169 ms) +[12/27/2023-17:00:54] [I] +[12/27/2023-17:00:54] [I] === Performance summary === +[12/27/2023-17:00:54] [I] Throughput: 82.3834 qps +[12/27/2023-17:00:54] [I] Latency: min = 11.2305 ms, max = 43.9559 ms, mean = 12.1005 ms, median = 12.1875 ms, percentile(90%) = 12.3506 ms, percentile(95%) = 12.5639 ms, percentile(99%) = 13.2209 ms +[12/27/2023-17:00:54] [I] Enqueue Time: min = 11.1899 ms, max = 43.4125 ms, mean = 12.0493 ms, median = 12.1338 ms, percentile(90%) = 12.29 ms, percentile(95%) = 12.5039 ms, percentile(99%) = 13.1118 ms +[12/27/2023-17:00:54] [I] H2D Latency: min = 0.0812988 ms, max = 0.196533 ms, mean = 0.0972906 ms, median = 0.097168 ms, percentile(90%) = 0.100098 ms, percentile(95%) = 0.100708 ms, percentile(99%) = 0.108887 ms +[12/27/2023-17:00:54] [I] GPU Compute Time: min = 11.1187 ms, max = 43.7558 ms, mean = 11.9831 ms, median = 12.0708 ms, percentile(90%) = 12.2266 ms, percentile(95%) = 12.4424 ms, percentile(99%) = 13.1182 ms +[12/27/2023-17:00:54] [I] D2H Latency: min = 0.00292969 ms, max = 0.0546875 ms, mean = 0.0201313 ms, median = 0.0214844 ms, percentile(90%) = 0.0258789 ms, percentile(95%) = 0.0263672 ms, percentile(99%) = 0.0419922 ms +[12/27/2023-17:00:54] [I] Total Host Walltime: 15.0152 s +[12/27/2023-17:00:54] [I] Total GPU Compute Time: 14.8231 s +[12/27/2023-17:00:54] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/27/2023-17:00:54] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp32.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp32.onnx.fp16.engine diff --git a/yolo_nas_pose_n_fp32.onnx.int8.engine b/yolo_nas_pose_n_fp32.onnx.int8.engine new file mode 100644 index 0000000000000000000000000000000000000000..9b503ff2928aa8f902682726a980a083b73883cb --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.int8.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfa0f33192936200fb30a01b36b1203e4cb7dd10a7f23ff648a0dd788aa95ba4 +size 10046762 diff --git a/yolo_nas_pose_n_fp32.onnx.int8.engine.err b/yolo_nas_pose_n_fp32.onnx.int8.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..45cddfcabda42285e1bd06115e095ea9ccc27dd3 --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.int8.engine.err @@ -0,0 +1,25 @@ +[12/27/2023-18:17:24] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-18:17:24] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-18:17:24] [W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool. +[12/27/2023-18:28:23] [W] [TRT] Tactic Device request: 3144MB Available: 2372MB. Device memory is insufficient to use tactic. +[12/27/2023-18:28:23] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-18:28:23] [W] [TRT] Tactic Device request: 3144MB Available: 2371MB. Device memory is insufficient to use tactic. +[12/27/2023-18:28:23] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3144 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-18:28:23] [W] [TRT] Tactic Device request: 3144MB Available: 2372MB. Device memory is insufficient to use tactic. +[12/27/2023-18:28:23] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-18:38:51] [W] [TRT] Tactic Device request: 4364MB Available: 2170MB. Device memory is insufficient to use tactic. +[12/27/2023-18:38:51] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4364 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-18:38:51] [W] [TRT] Tactic Device request: 4364MB Available: 2170MB. Device memory is insufficient to use tactic. +[12/27/2023-18:38:51] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4364 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-18:38:51] [W] [TRT] Tactic Device request: 4364MB Available: 2170MB. Device memory is insufficient to use tactic. +[12/27/2023-18:38:51] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4364 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-18:53:35] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/27/2023-18:53:35] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/27/2023-18:53:35] [W] * GPU compute time is unstable, with coefficient of variance = 5.02314%. +[12/27/2023-18:53:35] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_n_fp32.onnx.int8.engine.log b/yolo_nas_pose_n_fp32.onnx.int8.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..347572e0b05693e748d432fb49c2387f202ad4b3 --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.int8.engine.log @@ -0,0 +1,303 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp32.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp32.onnx.int8.engine +[12/27/2023-18:17:13] [I] === Model Options === +[12/27/2023-18:17:13] [I] Format: ONNX +[12/27/2023-18:17:13] [I] Model: yolo_nas_pose_n_fp32.onnx +[12/27/2023-18:17:13] [I] Output: +[12/27/2023-18:17:13] [I] === Build Options === +[12/27/2023-18:17:13] [I] Max batch: explicit batch +[12/27/2023-18:17:13] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-18:17:13] [I] minTiming: 1 +[12/27/2023-18:17:13] [I] avgTiming: 8 +[12/27/2023-18:17:13] [I] Precision: FP32+INT8 +[12/27/2023-18:17:13] [I] LayerPrecisions: +[12/27/2023-18:17:13] [I] Calibration: Dynamic +[12/27/2023-18:17:13] [I] Refit: Disabled +[12/27/2023-18:17:13] [I] Sparsity: Disabled +[12/27/2023-18:17:13] [I] Safe mode: Disabled +[12/27/2023-18:17:13] [I] DirectIO mode: Disabled +[12/27/2023-18:17:13] [I] Restricted mode: Disabled +[12/27/2023-18:17:13] [I] Build only: Disabled +[12/27/2023-18:17:13] [I] Save engine: yolo_nas_pose_n_fp32.onnx.int8.engine +[12/27/2023-18:17:13] [I] Load engine: +[12/27/2023-18:17:13] [I] Profiling verbosity: 0 +[12/27/2023-18:17:13] [I] Tactic sources: Using default tactic sources +[12/27/2023-18:17:13] [I] timingCacheMode: local +[12/27/2023-18:17:13] [I] timingCacheFile: +[12/27/2023-18:17:13] [I] Heuristic: Disabled +[12/27/2023-18:17:13] [I] Preview Features: Use default preview flags. +[12/27/2023-18:17:13] [I] Input(s)s format: fp32:CHW +[12/27/2023-18:17:13] [I] Output(s)s format: fp32:CHW +[12/27/2023-18:17:13] [I] Input build shapes: model +[12/27/2023-18:17:13] [I] Input calibration shapes: model +[12/27/2023-18:17:13] [I] === System Options === +[12/27/2023-18:17:13] [I] Device: 0 +[12/27/2023-18:17:13] [I] DLACore: +[12/27/2023-18:17:13] [I] Plugins: +[12/27/2023-18:17:13] [I] === Inference Options === +[12/27/2023-18:17:13] [I] Batch: Explicit +[12/27/2023-18:17:13] [I] Input inference shapes: model +[12/27/2023-18:17:13] [I] Iterations: 10 +[12/27/2023-18:17:13] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-18:17:13] [I] Sleep time: 0ms +[12/27/2023-18:17:13] [I] Idle time: 0ms +[12/27/2023-18:17:13] [I] Streams: 1 +[12/27/2023-18:17:13] [I] ExposeDMA: Disabled +[12/27/2023-18:17:13] [I] Data transfers: Enabled +[12/27/2023-18:17:13] [I] Spin-wait: Disabled +[12/27/2023-18:17:13] [I] Multithreading: Disabled +[12/27/2023-18:17:13] [I] CUDA Graph: Disabled +[12/27/2023-18:17:13] [I] Separate profiling: Disabled +[12/27/2023-18:17:13] [I] Time Deserialize: Disabled +[12/27/2023-18:17:13] [I] Time Refit: Disabled +[12/27/2023-18:17:13] [I] NVTX verbosity: 0 +[12/27/2023-18:17:13] [I] Persistent Cache Ratio: 0 +[12/27/2023-18:17:13] [I] Inputs: +[12/27/2023-18:17:13] [I] === Reporting Options === +[12/27/2023-18:17:13] [I] Verbose: Disabled +[12/27/2023-18:17:13] [I] Averages: 100 inferences +[12/27/2023-18:17:13] [I] Percentiles: 90,95,99 +[12/27/2023-18:17:13] [I] Dump refittable layers:Disabled +[12/27/2023-18:17:13] [I] Dump output: Disabled +[12/27/2023-18:17:13] [I] Profile: Disabled +[12/27/2023-18:17:13] [I] Export timing to JSON file: +[12/27/2023-18:17:13] [I] Export output to JSON file: +[12/27/2023-18:17:13] [I] Export profile to JSON file: +[12/27/2023-18:17:13] [I] +[12/27/2023-18:17:14] [I] === Device Information === +[12/27/2023-18:17:14] [I] Selected Device: Orin +[12/27/2023-18:17:14] [I] Compute Capability: 8.7 +[12/27/2023-18:17:14] [I] SMs: 8 +[12/27/2023-18:17:14] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-18:17:14] [I] Device Global Memory: 7471 MiB +[12/27/2023-18:17:14] [I] Shared Memory per SM: 164 KiB +[12/27/2023-18:17:14] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-18:17:14] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-18:17:14] [I] +[12/27/2023-18:17:14] [I] TensorRT version: 8.5.2 +[12/27/2023-18:17:19] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3111 (MiB) +[12/27/2023-18:17:23] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +285, now: CPU 574, GPU 3417 (MiB) +[12/27/2023-18:17:23] [I] Start parsing network model +[12/27/2023-18:17:24] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-18:17:24] [I] [TRT] Input filename: yolo_nas_pose_n_fp32.onnx +[12/27/2023-18:17:24] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-18:17:24] [I] [TRT] Opset version: 17 +[12/27/2023-18:17:24] [I] [TRT] Producer name: pytorch +[12/27/2023-18:17:24] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-18:17:24] [I] [TRT] Domain: +[12/27/2023-18:17:24] [I] [TRT] Model version: 0 +[12/27/2023-18:17:24] [I] [TRT] Doc string: +[12/27/2023-18:17:24] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-18:17:24] [I] Finish parsing network model +[12/27/2023-18:17:24] [I] FP32 and INT8 precisions have been specified - more performance might be enabled by additionally specifying --fp16 or --best +[12/27/2023-18:17:24] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/27/2023-18:17:24] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 383) [Constant] +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 384) [Constant] +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 385) [Constant] +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 126) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 163) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 192) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 231) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 254) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu || /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 285) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 308) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 387) [NMS]_1_output[DevicetoShapeHostCopy] +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/27/2023-18:17:24] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/27/2023-18:17:38] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +389, now: CPU 1142, GPU 3859 (MiB) +[12/27/2023-18:17:40] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +21, now: CPU 1224, GPU 3880 (MiB) +[12/27/2023-18:17:40] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/27/2023-18:53:07] [I] [TRT] Total Activation Memory: 7893371904 +[12/27/2023-18:53:07] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/27/2023-18:53:15] [I] [TRT] Total Host Persistent Memory: 277536 +[12/27/2023-18:53:15] [I] [TRT] Total Device Persistent Memory: 175104 +[12/27/2023-18:53:15] [I] [TRT] Total Scratch Memory: 134217728 +[12/27/2023-18:53:15] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 11 MiB, GPU 2112 MiB +[12/27/2023-18:53:15] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 157 steps to complete. +[12/27/2023-18:53:15] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 47.7476ms to assign 13 blocks to 157 nodes requiring 139861504 bytes. +[12/27/2023-18:53:15] [I] [TRT] Total Activation Memory: 139861504 +[12/27/2023-18:53:18] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1, GPU +0, now: CPU 1580, GPU 5361 (MiB) +[12/27/2023-18:53:18] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +7, GPU +8, now: CPU 7, GPU 8 (MiB) +[12/27/2023-18:53:18] [I] Engine built in 2164.58 sec. +[12/27/2023-18:53:19] [I] [TRT] Loaded engine size: 9 MiB +[12/27/2023-18:53:19] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1254, GPU 5365 (MiB) +[12/27/2023-18:53:19] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +7, now: CPU 0, GPU 7 (MiB) +[12/27/2023-18:53:19] [I] Engine deserialized in 0.247014 sec. +[12/27/2023-18:53:19] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1255, GPU 5365 (MiB) +[12/27/2023-18:53:19] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +134, now: CPU 0, GPU 141 (MiB) +[12/27/2023-18:53:19] [I] Setting persistentCacheLimit to 0 bytes. +[12/27/2023-18:53:19] [I] Using random values for input onnx::Cast_0 +[12/27/2023-18:53:19] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/27/2023-18:53:19] [I] Using random values for output graph2_flat_predictions +[12/27/2023-18:53:19] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/27/2023-18:53:19] [I] Starting inference +[12/27/2023-18:53:35] [I] Warmup completed 13 queries over 200 ms +[12/27/2023-18:53:35] [I] Timing trace has 1323 queries over 15.0218 s +[12/27/2023-18:53:35] [I] +[12/27/2023-18:53:35] [I] === Trace details === +[12/27/2023-18:53:35] [I] Trace averages of 100 runs: +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 11.525 ms - Host latency: 11.6442 ms (enqueue 11.5938 ms) +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 11.3455 ms - Host latency: 11.463 ms (enqueue 11.4145 ms) +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 10.8129 ms - Host latency: 10.9255 ms (enqueue 10.8888 ms) +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 10.7824 ms - Host latency: 10.8952 ms (enqueue 10.858 ms) +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 10.7982 ms - Host latency: 10.912 ms (enqueue 10.8761 ms) +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 10.8503 ms - Host latency: 10.9632 ms (enqueue 10.9246 ms) +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 11.3109 ms - Host latency: 11.4263 ms (enqueue 11.3781 ms) +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 10.9917 ms - Host latency: 11.106 ms (enqueue 11.066 ms) +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 11.2826 ms - Host latency: 11.3998 ms (enqueue 11.3579 ms) +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 11.4708 ms - Host latency: 11.5897 ms (enqueue 11.5385 ms) +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 11.4387 ms - Host latency: 11.5564 ms (enqueue 11.5123 ms) +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 11.5236 ms - Host latency: 11.6419 ms (enqueue 11.5877 ms) +[12/27/2023-18:53:35] [I] Average on 100 runs - GPU latency: 11.4453 ms - Host latency: 11.5643 ms (enqueue 11.5125 ms) +[12/27/2023-18:53:35] [I] +[12/27/2023-18:53:35] [I] === Performance summary === +[12/27/2023-18:53:35] [I] Throughput: 88.0719 qps +[12/27/2023-18:53:35] [I] Latency: min = 9.19824 ms, max = 16.3008 ms, mean = 11.3194 ms, median = 11.4092 ms, percentile(90%) = 11.8027 ms, percentile(95%) = 11.8662 ms, percentile(99%) = 13.1797 ms +[12/27/2023-18:53:35] [I] Enqueue Time: min = 9.16504 ms, max = 16.2461 ms, mean = 11.2747 ms, median = 11.3594 ms, percentile(90%) = 11.75 ms, percentile(95%) = 11.8208 ms, percentile(99%) = 13.4873 ms +[12/27/2023-18:53:35] [I] H2D Latency: min = 0.0808105 ms, max = 0.118973 ms, mean = 0.0980852 ms, median = 0.0981445 ms, percentile(90%) = 0.100098 ms, percentile(95%) = 0.100586 ms, percentile(99%) = 0.112061 ms +[12/27/2023-18:53:35] [I] GPU Compute Time: min = 9.09082 ms, max = 16.1807 ms, mean = 11.2032 ms, median = 11.2918 ms, percentile(90%) = 11.6843 ms, percentile(95%) = 11.751 ms, percentile(99%) = 13.0771 ms +[12/27/2023-18:53:35] [I] D2H Latency: min = 0.00341797 ms, max = 0.0477295 ms, mean = 0.0180909 ms, median = 0.0188293 ms, percentile(90%) = 0.0234375 ms, percentile(95%) = 0.0244141 ms, percentile(99%) = 0.0371094 ms +[12/27/2023-18:53:35] [I] Total Host Walltime: 15.0218 s +[12/27/2023-18:53:35] [I] Total GPU Compute Time: 14.8218 s +[12/27/2023-18:53:35] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/27/2023-18:53:35] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_fp32.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_fp32.onnx.int8.engine diff --git a/yolo_nas_pose_n_fp32.onnx.usage.txt b/yolo_nas_pose_n_fp32.onnx.usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..41e88de7ddca2253409c5f14371afc3d77714488 --- /dev/null +++ b/yolo_nas_pose_n_fp32.onnx.usage.txt @@ -0,0 +1,58 @@ + +Model exported successfully to yolo_nas_pose_n_fp32.onnx +Model expects input image of shape [1, 3, 640, 640] +Input image dtype is torch.uint8 + +Exported model already contains preprocessing (normalization) step, so you don't need to do it manually. +Preprocessing steps to be applied to input image are: +Sequential( + (0): CastTensorTo(dtype=torch.float32) + (1): ChannelSelect(channels_indexes=tensor([2, 1, 0])) + (2): ApplyMeanStd(mean=[0.], scale=[255.]) +) + + +Exported model contains postprocessing (NMS) step with the following parameters: + num_pre_nms_predictions=1000 + max_predictions_per_image=10 + nms_threshold=0.5 + confidence_threshold=0.15 + output_predictions_format=flat + + +Exported model is in ONNX format and can be used with ONNXRuntime +To run inference with ONNXRuntime, please use the following code snippet: + + import onnxruntime + import numpy as np + session = onnxruntime.InferenceSession("yolo_nas_pose_n_fp32.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + example_input_image = np.zeros((1, 3, 640, 640)).astype(np.uint8) + predictions = session.run(outputs, {inputs[0]: example_input_image}) + +Exported model can also be used with TensorRT +To run inference with TensorRT, please see TensorRT deployment documentation +You can benchmark the model using the following code snippet: + + trtexec --onnx=yolo_nas_pose_n_fp32.onnx --fp16 --avgRuns=100 --duration=15 + + +Exported model has predictions in flat format: + +# flat_predictions is a 2D array of [N,K] shape +# Each row represents (image_index, x_min, y_min, x_max, y_max, confidence, joints...) +# Please note all values are floats, so you have to convert them to integers if needed + +[flat_predictions] = predictions +pred_bboxes = flat_predictions[:, 1:5] +pred_scores = flat_predictions[:, 5] +pred_joints = flat_predictions[:, 6:].reshape((len(pred_bboxes), -1, 3)) +for i in range(len(pred_bboxes)): + confidence = pred_scores[i] + x_min, y_min, x_max, y_max = pred_bboxes[i] + print(f"Detected pose with confidence={{confidence}}, x_min={{x_min}}, y_min={{y_min}}, x_max={{x_max}}, y_max={{y_max}}") + for joint_index, (x, y, confidence) in enumerate(pred_joints[i]):") + print(f"Joint {{joint_index}} has coordinates x={{x}}, y={{y}}, confidence={{confidence}}") + diff --git a/yolo_nas_pose_n_int8.onnx b/yolo_nas_pose_n_int8.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9fde0541927dcfee452a91f01a84318f2814942d --- /dev/null +++ b/yolo_nas_pose_n_int8.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1240ae85ecb3abae0ff8e32aa24da73da1d91bd386113fe8ce70b9ffe9f9b9c6 +size 28218615 diff --git a/yolo_nas_pose_n_int8.onnx.best.engine b/yolo_nas_pose_n_int8.onnx.best.engine new file mode 100644 index 0000000000000000000000000000000000000000..9b6d63bd34fc9a90a7ac977d68edfcef93c9cbc3 --- /dev/null +++ b/yolo_nas_pose_n_int8.onnx.best.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:022d94ab6ef2f6c6deba8f1cb3e09149d10b3df3fe17f24559f62f7277c75177 +size 10228093 diff --git a/yolo_nas_pose_n_int8.onnx.best.engine.err b/yolo_nas_pose_n_int8.onnx.best.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..a300069ddf5e77830c07cadeba4c3e9964fef0a5 --- /dev/null +++ b/yolo_nas_pose_n_int8.onnx.best.engine.err @@ -0,0 +1,7 @@ +[12/27/2023-20:52:51] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-20:52:51] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-20:52:54] [W] [TRT] Calibrator won't be used in explicit precision mode. Use quantization aware training to generate network with Quantize/Dequantize nodes. +[12/27/2023-21:21:24] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/27/2023-21:21:24] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/27/2023-21:21:24] [W] * GPU compute time is unstable, with coefficient of variance = 3.55467%. +[12/27/2023-21:21:24] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_n_int8.onnx.best.engine.log b/yolo_nas_pose_n_int8.onnx.best.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..b53ac5e2562bef1496969752d98f5c6923491933 --- /dev/null +++ b/yolo_nas_pose_n_int8.onnx.best.engine.log @@ -0,0 +1,323 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_int8.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_int8.onnx.best.engine +[12/27/2023-20:52:47] [I] === Model Options === +[12/27/2023-20:52:47] [I] Format: ONNX +[12/27/2023-20:52:47] [I] Model: yolo_nas_pose_n_int8.onnx +[12/27/2023-20:52:47] [I] Output: +[12/27/2023-20:52:47] [I] === Build Options === +[12/27/2023-20:52:47] [I] Max batch: explicit batch +[12/27/2023-20:52:47] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-20:52:47] [I] minTiming: 1 +[12/27/2023-20:52:47] [I] avgTiming: 8 +[12/27/2023-20:52:47] [I] Precision: FP32+FP16+INT8 +[12/27/2023-20:52:47] [I] LayerPrecisions: +[12/27/2023-20:52:47] [I] Calibration: Dynamic +[12/27/2023-20:52:47] [I] Refit: Disabled +[12/27/2023-20:52:47] [I] Sparsity: Disabled +[12/27/2023-20:52:47] [I] Safe mode: Disabled +[12/27/2023-20:52:47] [I] DirectIO mode: Disabled +[12/27/2023-20:52:47] [I] Restricted mode: Disabled +[12/27/2023-20:52:47] [I] Build only: Disabled +[12/27/2023-20:52:47] [I] Save engine: yolo_nas_pose_n_int8.onnx.best.engine +[12/27/2023-20:52:47] [I] Load engine: +[12/27/2023-20:52:47] [I] Profiling verbosity: 0 +[12/27/2023-20:52:47] [I] Tactic sources: Using default tactic sources +[12/27/2023-20:52:47] [I] timingCacheMode: local +[12/27/2023-20:52:47] [I] timingCacheFile: +[12/27/2023-20:52:47] [I] Heuristic: Disabled +[12/27/2023-20:52:47] [I] Preview Features: Use default preview flags. +[12/27/2023-20:52:47] [I] Input(s)s format: fp32:CHW +[12/27/2023-20:52:47] [I] Output(s)s format: fp32:CHW +[12/27/2023-20:52:47] [I] Input build shapes: model +[12/27/2023-20:52:47] [I] Input calibration shapes: model +[12/27/2023-20:52:47] [I] === System Options === +[12/27/2023-20:52:47] [I] Device: 0 +[12/27/2023-20:52:47] [I] DLACore: +[12/27/2023-20:52:47] [I] Plugins: +[12/27/2023-20:52:47] [I] === Inference Options === +[12/27/2023-20:52:47] [I] Batch: Explicit +[12/27/2023-20:52:47] [I] Input inference shapes: model +[12/27/2023-20:52:47] [I] Iterations: 10 +[12/27/2023-20:52:47] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-20:52:47] [I] Sleep time: 0ms +[12/27/2023-20:52:47] [I] Idle time: 0ms +[12/27/2023-20:52:47] [I] Streams: 1 +[12/27/2023-20:52:47] [I] ExposeDMA: Disabled +[12/27/2023-20:52:47] [I] Data transfers: Enabled +[12/27/2023-20:52:47] [I] Spin-wait: Disabled +[12/27/2023-20:52:47] [I] Multithreading: Disabled +[12/27/2023-20:52:47] [I] CUDA Graph: Disabled +[12/27/2023-20:52:47] [I] Separate profiling: Disabled +[12/27/2023-20:52:47] [I] Time Deserialize: Disabled +[12/27/2023-20:52:47] [I] Time Refit: Disabled +[12/27/2023-20:52:47] [I] NVTX verbosity: 0 +[12/27/2023-20:52:47] [I] Persistent Cache Ratio: 0 +[12/27/2023-20:52:47] [I] Inputs: +[12/27/2023-20:52:47] [I] === Reporting Options === +[12/27/2023-20:52:47] [I] Verbose: Disabled +[12/27/2023-20:52:47] [I] Averages: 100 inferences +[12/27/2023-20:52:47] [I] Percentiles: 90,95,99 +[12/27/2023-20:52:47] [I] Dump refittable layers:Disabled +[12/27/2023-20:52:47] [I] Dump output: Disabled +[12/27/2023-20:52:47] [I] Profile: Disabled +[12/27/2023-20:52:47] [I] Export timing to JSON file: +[12/27/2023-20:52:47] [I] Export output to JSON file: +[12/27/2023-20:52:47] [I] Export profile to JSON file: +[12/27/2023-20:52:47] [I] +[12/27/2023-20:52:47] [I] === Device Information === +[12/27/2023-20:52:47] [I] Selected Device: Orin +[12/27/2023-20:52:47] [I] Compute Capability: 8.7 +[12/27/2023-20:52:47] [I] SMs: 8 +[12/27/2023-20:52:47] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-20:52:47] [I] Device Global Memory: 7471 MiB +[12/27/2023-20:52:47] [I] Shared Memory per SM: 164 KiB +[12/27/2023-20:52:47] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-20:52:47] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-20:52:47] [I] +[12/27/2023-20:52:47] [I] TensorRT version: 8.5.2 +[12/27/2023-20:52:48] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3041 (MiB) +[12/27/2023-20:52:51] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +283, now: CPU 574, GPU 3346 (MiB) +[12/27/2023-20:52:51] [I] Start parsing network model +[12/27/2023-20:52:51] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-20:52:51] [I] [TRT] Input filename: yolo_nas_pose_n_int8.onnx +[12/27/2023-20:52:51] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-20:52:51] [I] [TRT] Opset version: 17 +[12/27/2023-20:52:51] [I] [TRT] Producer name: pytorch +[12/27/2023-20:52:51] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-20:52:51] [I] [TRT] Domain: +[12/27/2023-20:52:51] [I] [TRT] Model version: 0 +[12/27/2023-20:52:51] [I] [TRT] Doc string: +[12/27/2023-20:52:51] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-20:52:54] [I] Finish parsing network model +[12/27/2023-20:52:58] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/27/2023-20:52:58] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1203) [Constant] +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1204) [Constant] +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1205) [Constant] +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stem/conv/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stem.conv.rbr_reparam.weight + /model/backbone/stem/conv/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stem/conv/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.downsample.rbr_reparam.weight + /model/backbone/stage1/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/downsample/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv2.conv.weight + /model/backbone/stage1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv1.conv.weight + /model/backbone/stage1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 485) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 501) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv3.conv.weight + /model/backbone/stage1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv3/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_skip2.conv.weight + /model/neck/neck2/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.downsample.rbr_reparam.weight + /model/backbone/stage2/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/downsample/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.downsample.conv.weight + /model/neck/neck2/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/downsample/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv2.conv.weight + /model/backbone/stage2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv1.conv.weight + /model/backbone/stage2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 548) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 564) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 580) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/Concat_/model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add_output_0_clone_0 copy +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv3.conv.weight + /model/backbone/stage2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv3/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip2.conv.weight + /model/neck/neck1/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip2/conv/Conv || model.neck.neck2.reduce_skip1.conv.weight + /model/neck/neck2/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.downsample.rbr_reparam.weight + /model/backbone/stage3/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/downsample/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.downsample.conv.weight + /model/neck/neck1/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/downsample/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv2.conv.weight + /model/backbone/stage3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv1.conv.weight + /model/backbone/stage3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 630) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 646) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 662) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 678) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv3.conv.weight + /model/backbone/stage3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv3/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip1.conv.weight + /model/neck/neck1/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.downsample.rbr_reparam.weight + /model/backbone/stage4/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/downsample/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv2.conv.weight + /model/backbone/stage4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv1.conv.weight + /model/backbone/stage4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 719) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 735) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv3.conv.weight + /model/backbone/stage4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv3/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv1.conv.weight + /model/backbone/context_module/cv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/m.2/MaxPool_output_0 copy +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv2.conv.weight + /model/backbone/context_module/cv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.conv.conv.weight + /model/neck/neck1/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/conv/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck1.upsample.weight + /model/neck/neck1/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck1/upsample/ConvTranspose +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_after_concat.conv.weight + /model/neck/neck1/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_after_concat/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv2.conv.weight + /model/neck/neck1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv1.conv.weight + /model/neck/neck1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 800) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 816) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/Concat_/model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add_output_0_clone_0 copy +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv3.conv.weight + /model/neck/neck1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv3/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.conv.conv.weight + /model/neck/neck2/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/conv/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/upsample/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck2.upsample.weight + /model/neck/neck2/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck2/upsample/ConvTranspose +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/Concat_/model/neck/neck2/reduce_skip1/act/Relu_output_0_clone_1 copy +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_after_concat.conv.weight + /model/neck/neck2/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_after_concat/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv2.conv.weight + /model/neck/neck2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv1.conv.weight + /model/neck/neck2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 865) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 881) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv3.conv.weight + /model/neck/neck2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv3/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.bbox_stem.seq.conv.weight + /model/heads/head1/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/bbox_stem/seq/conv/Conv || model.heads.head1.pose_stem.seq.conv.weight + /model/heads/head1/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_stem/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.conv.conv.weight + /model/neck/neck3/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/conv/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_convs.0.seq.conv.weight + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head1.cls_convs.0.seq.conv.weight + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.0.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv1/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.cls_pred.weight + /model/heads/head1/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_pred/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_pred.weight + /model/heads/head1/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_pred/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.1.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv2.conv.weight + /model/neck/neck3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv1.conv.weight + /model/neck/neck3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_pred.weight + /model/heads/head1/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_pred/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 947) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 988) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/Concat_/model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add_output_0_clone_0 copy +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv3.conv.weight + /model/neck/neck3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv3/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.bbox_stem.seq.conv.weight + /model/heads/head2/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/bbox_stem/seq/conv/Conv || model.heads.head2.pose_stem.seq.conv.weight + /model/heads/head2/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_stem/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.conv.conv.weight + /model/neck/neck4/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/conv/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_convs.0.seq.conv.weight + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head2.cls_convs.0.seq.conv.weight + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.0.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv2.conv.weight + /model/neck/neck4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv1.conv.weight + /model/neck/neck4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.cls_pred.weight + /model/heads/head2/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_pred/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_pred.weight + /model/heads/head2/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_pred/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.1.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_pred.weight + /model/heads/head2/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_pred/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 1053) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1094) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/Concat_/model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add_output_0_clone_0 copy +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv3.conv.weight + /model/neck/neck4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv3/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.bbox_stem.seq.conv.weight + /model/heads/head3/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/bbox_stem/seq/conv/Conv || model.heads.head3.pose_stem.seq.conv.weight + /model/heads/head3/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_stem/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_convs.0.seq.conv.weight + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head3.cls_convs.0.seq.conv.weight + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.0.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.cls_pred.weight + /model/heads/head3/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_pred/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_pred.weight + /model/heads/head3/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_pred/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.1.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.2.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_pred.weight + /model/heads/head3/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_pred/Conv +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] NMS: batched_nms_238 +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 1207) [NMS]_1_output[DevicetoShapeHostCopy] +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/27/2023-20:52:58] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/27/2023-20:53:08] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +373, now: CPU 1168, GPU 3783 (MiB) +[12/27/2023-20:53:10] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +73, now: CPU 1250, GPU 3856 (MiB) +[12/27/2023-20:53:10] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/27/2023-21:20:58] [I] [TRT] Total Activation Memory: 7904134656 +[12/27/2023-21:20:58] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/27/2023-21:21:05] [I] [TRT] Total Host Persistent Memory: 298176 +[12/27/2023-21:21:05] [I] [TRT] Total Device Persistent Memory: 77824 +[12/27/2023-21:21:05] [I] [TRT] Total Scratch Memory: 134217728 +[12/27/2023-21:21:05] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 33 MiB, GPU 154 MiB +[12/27/2023-21:21:05] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 179 steps to complete. +[12/27/2023-21:21:05] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 201.747ms to assign 13 blocks to 179 nodes requiring 140788224 bytes. +[12/27/2023-21:21:05] [I] [TRT] Total Activation Memory: 140788224 +[12/27/2023-21:21:08] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1593, GPU 4908 (MiB) +[12/27/2023-21:21:08] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +6, GPU +8, now: CPU 6, GPU 8 (MiB) +[12/27/2023-21:21:08] [I] Engine built in 1700.74 sec. +[12/27/2023-21:21:09] [I] [TRT] Loaded engine size: 9 MiB +[12/27/2023-21:21:09] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -1, now: CPU 1244, GPU 4897 (MiB) +[12/27/2023-21:21:09] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +7, now: CPU 0, GPU 7 (MiB) +[12/27/2023-21:21:09] [I] Engine deserialized in 0.189318 sec. +[12/27/2023-21:21:09] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1245, GPU 4898 (MiB) +[12/27/2023-21:21:09] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +135, now: CPU 0, GPU 142 (MiB) +[12/27/2023-21:21:09] [I] Setting persistentCacheLimit to 0 bytes. +[12/27/2023-21:21:09] [I] Using random values for input onnx::Cast_0 +[12/27/2023-21:21:09] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/27/2023-21:21:09] [I] Using random values for output graph2_flat_predictions +[12/27/2023-21:21:09] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/27/2023-21:21:09] [I] Starting inference +[12/27/2023-21:21:24] [I] Warmup completed 13 queries over 200 ms +[12/27/2023-21:21:24] [I] Timing trace has 1215 queries over 15.0321 s +[12/27/2023-21:21:24] [I] +[12/27/2023-21:21:24] [I] === Trace details === +[12/27/2023-21:21:24] [I] Trace averages of 100 runs: +[12/27/2023-21:21:24] [I] Average on 100 runs - GPU latency: 12.6305 ms - Host latency: 12.7431 ms (enqueue 12.7033 ms) +[12/27/2023-21:21:24] [I] Average on 100 runs - GPU latency: 12.2402 ms - Host latency: 12.3526 ms (enqueue 12.3197 ms) +[12/27/2023-21:21:24] [I] Average on 100 runs - GPU latency: 12.256 ms - Host latency: 12.3679 ms (enqueue 12.3361 ms) +[12/27/2023-21:21:24] [I] Average on 100 runs - GPU latency: 12.1815 ms - Host latency: 12.2935 ms (enqueue 12.2622 ms) +[12/27/2023-21:21:24] [I] Average on 100 runs - GPU latency: 12.1642 ms - Host latency: 12.276 ms (enqueue 12.2445 ms) +[12/27/2023-21:21:24] [I] Average on 100 runs - GPU latency: 12.2964 ms - Host latency: 12.4083 ms (enqueue 12.3727 ms) +[12/27/2023-21:21:24] [I] Average on 100 runs - GPU latency: 12.1804 ms - Host latency: 12.2922 ms (enqueue 12.2598 ms) +[12/27/2023-21:21:24] [I] Average on 100 runs - GPU latency: 12.163 ms - Host latency: 12.2745 ms (enqueue 12.2423 ms) +[12/27/2023-21:21:24] [I] Average on 100 runs - GPU latency: 12.173 ms - Host latency: 12.2853 ms (enqueue 12.2536 ms) +[12/27/2023-21:21:24] [I] Average on 100 runs - GPU latency: 12.1462 ms - Host latency: 12.2579 ms (enqueue 12.2245 ms) +[12/27/2023-21:21:24] [I] Average on 100 runs - GPU latency: 12.1697 ms - Host latency: 12.281 ms (enqueue 12.2499 ms) +[12/27/2023-21:21:24] [I] Average on 100 runs - GPU latency: 12.1833 ms - Host latency: 12.2942 ms (enqueue 12.2621 ms) +[12/27/2023-21:21:24] [I] +[12/27/2023-21:21:24] [I] === Performance summary === +[12/27/2023-21:21:24] [I] Throughput: 80.8271 qps +[12/27/2023-21:21:24] [I] Latency: min = 11.5059 ms, max = 19.759 ms, mean = 12.343 ms, median = 12.2217 ms, percentile(90%) = 12.6807 ms, percentile(95%) = 12.8347 ms, percentile(99%) = 14.0815 ms +[12/27/2023-21:21:24] [I] Enqueue Time: min = 11.4648 ms, max = 21.5256 ms, mean = 12.31 ms, median = 12.1904 ms, percentile(90%) = 12.6436 ms, percentile(95%) = 12.7954 ms, percentile(99%) = 13.7207 ms +[12/27/2023-21:21:24] [I] H2D Latency: min = 0.0810547 ms, max = 0.117188 ms, mean = 0.0989079 ms, median = 0.0996094 ms, percentile(90%) = 0.100586 ms, percentile(95%) = 0.100586 ms, percentile(99%) = 0.101807 ms +[12/27/2023-21:21:24] [I] GPU Compute Time: min = 11.4023 ms, max = 19.6559 ms, mean = 12.2311 ms, median = 12.1094 ms, percentile(90%) = 12.564 ms, percentile(95%) = 12.7192 ms, percentile(99%) = 13.9766 ms +[12/27/2023-21:21:24] [I] D2H Latency: min = 0.00292969 ms, max = 0.0380859 ms, mean = 0.0129526 ms, median = 0.0117188 ms, percentile(90%) = 0.0170898 ms, percentile(95%) = 0.019043 ms, percentile(99%) = 0.027832 ms +[12/27/2023-21:21:24] [I] Total Host Walltime: 15.0321 s +[12/27/2023-21:21:24] [I] Total GPU Compute Time: 14.8608 s +[12/27/2023-21:21:24] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/27/2023-21:21:24] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_int8.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_int8.onnx.best.engine diff --git a/yolo_nas_pose_n_int8.onnx.engine.err b/yolo_nas_pose_n_int8.onnx.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..a5f37e8b900376461f9d63eb544b8d919fba7c5e --- /dev/null +++ b/yolo_nas_pose_n_int8.onnx.engine.err @@ -0,0 +1,8 @@ +[12/27/2023-20:52:37] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-20:52:37] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-20:52:39] [E] Error[4]: [network.cpp::validate::2922] Error Code 4: Internal Error (Int8 precision has been set for a layer or layer output, but int8 is not configured in the builder) +[12/27/2023-20:52:39] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/27/2023-20:52:39] [E] Engine could not be created from network +[12/27/2023-20:52:39] [E] Building engine failed +[12/27/2023-20:52:39] [E] Failed to create engine from model or file. +[12/27/2023-20:52:39] [E] Engine set up failed diff --git a/yolo_nas_pose_n_int8.onnx.engine.log b/yolo_nas_pose_n_int8.onnx.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..366d267fa7bd4350a198c513176d5fbbb258806c --- /dev/null +++ b/yolo_nas_pose_n_int8.onnx.engine.log @@ -0,0 +1,91 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_int8.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_int8.onnx.engine +[12/27/2023-20:52:33] [I] === Model Options === +[12/27/2023-20:52:33] [I] Format: ONNX +[12/27/2023-20:52:33] [I] Model: yolo_nas_pose_n_int8.onnx +[12/27/2023-20:52:33] [I] Output: +[12/27/2023-20:52:33] [I] === Build Options === +[12/27/2023-20:52:33] [I] Max batch: explicit batch +[12/27/2023-20:52:33] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-20:52:33] [I] minTiming: 1 +[12/27/2023-20:52:33] [I] avgTiming: 8 +[12/27/2023-20:52:33] [I] Precision: FP32 +[12/27/2023-20:52:33] [I] LayerPrecisions: +[12/27/2023-20:52:33] [I] Calibration: +[12/27/2023-20:52:33] [I] Refit: Disabled +[12/27/2023-20:52:33] [I] Sparsity: Disabled +[12/27/2023-20:52:33] [I] Safe mode: Disabled +[12/27/2023-20:52:33] [I] DirectIO mode: Disabled +[12/27/2023-20:52:33] [I] Restricted mode: Disabled +[12/27/2023-20:52:33] [I] Build only: Disabled +[12/27/2023-20:52:33] [I] Save engine: yolo_nas_pose_n_int8.onnx.engine +[12/27/2023-20:52:33] [I] Load engine: +[12/27/2023-20:52:33] [I] Profiling verbosity: 0 +[12/27/2023-20:52:33] [I] Tactic sources: Using default tactic sources +[12/27/2023-20:52:33] [I] timingCacheMode: local +[12/27/2023-20:52:33] [I] timingCacheFile: +[12/27/2023-20:52:33] [I] Heuristic: Disabled +[12/27/2023-20:52:33] [I] Preview Features: Use default preview flags. +[12/27/2023-20:52:33] [I] Input(s)s format: fp32:CHW +[12/27/2023-20:52:33] [I] Output(s)s format: fp32:CHW +[12/27/2023-20:52:33] [I] Input build shapes: model +[12/27/2023-20:52:33] [I] Input calibration shapes: model +[12/27/2023-20:52:33] [I] === System Options === +[12/27/2023-20:52:33] [I] Device: 0 +[12/27/2023-20:52:33] [I] DLACore: +[12/27/2023-20:52:33] [I] Plugins: +[12/27/2023-20:52:33] [I] === Inference Options === +[12/27/2023-20:52:33] [I] Batch: Explicit +[12/27/2023-20:52:33] [I] Input inference shapes: model +[12/27/2023-20:52:33] [I] Iterations: 10 +[12/27/2023-20:52:33] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-20:52:33] [I] Sleep time: 0ms +[12/27/2023-20:52:33] [I] Idle time: 0ms +[12/27/2023-20:52:33] [I] Streams: 1 +[12/27/2023-20:52:33] [I] ExposeDMA: Disabled +[12/27/2023-20:52:33] [I] Data transfers: Enabled +[12/27/2023-20:52:33] [I] Spin-wait: Disabled +[12/27/2023-20:52:33] [I] Multithreading: Disabled +[12/27/2023-20:52:33] [I] CUDA Graph: Disabled +[12/27/2023-20:52:33] [I] Separate profiling: Disabled +[12/27/2023-20:52:33] [I] Time Deserialize: Disabled +[12/27/2023-20:52:33] [I] Time Refit: Disabled +[12/27/2023-20:52:33] [I] NVTX verbosity: 0 +[12/27/2023-20:52:33] [I] Persistent Cache Ratio: 0 +[12/27/2023-20:52:33] [I] Inputs: +[12/27/2023-20:52:33] [I] === Reporting Options === +[12/27/2023-20:52:33] [I] Verbose: Disabled +[12/27/2023-20:52:33] [I] Averages: 100 inferences +[12/27/2023-20:52:33] [I] Percentiles: 90,95,99 +[12/27/2023-20:52:33] [I] Dump refittable layers:Disabled +[12/27/2023-20:52:33] [I] Dump output: Disabled +[12/27/2023-20:52:33] [I] Profile: Disabled +[12/27/2023-20:52:33] [I] Export timing to JSON file: +[12/27/2023-20:52:33] [I] Export output to JSON file: +[12/27/2023-20:52:33] [I] Export profile to JSON file: +[12/27/2023-20:52:33] [I] +[12/27/2023-20:52:33] [I] === Device Information === +[12/27/2023-20:52:33] [I] Selected Device: Orin +[12/27/2023-20:52:33] [I] Compute Capability: 8.7 +[12/27/2023-20:52:33] [I] SMs: 8 +[12/27/2023-20:52:33] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-20:52:33] [I] Device Global Memory: 7471 MiB +[12/27/2023-20:52:33] [I] Shared Memory per SM: 164 KiB +[12/27/2023-20:52:33] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-20:52:33] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-20:52:33] [I] +[12/27/2023-20:52:33] [I] TensorRT version: 8.5.2 +[12/27/2023-20:52:33] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3046 (MiB) +[12/27/2023-20:52:36] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3349 (MiB) +[12/27/2023-20:52:36] [I] Start parsing network model +[12/27/2023-20:52:36] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-20:52:36] [I] [TRT] Input filename: yolo_nas_pose_n_int8.onnx +[12/27/2023-20:52:36] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-20:52:36] [I] [TRT] Opset version: 17 +[12/27/2023-20:52:36] [I] [TRT] Producer name: pytorch +[12/27/2023-20:52:36] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-20:52:36] [I] [TRT] Domain: +[12/27/2023-20:52:36] [I] [TRT] Model version: 0 +[12/27/2023-20:52:36] [I] [TRT] Doc string: +[12/27/2023-20:52:36] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-20:52:39] [I] Finish parsing network model +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_int8.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_int8.onnx.engine diff --git a/yolo_nas_pose_n_int8.onnx.fp16.engine.err b/yolo_nas_pose_n_int8.onnx.fp16.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..710b66f7a81a372ea382985ae24402bc87a2b3f4 --- /dev/null +++ b/yolo_nas_pose_n_int8.onnx.fp16.engine.err @@ -0,0 +1,8 @@ +[12/27/2023-20:52:44] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-20:52:44] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-20:52:47] [E] Error[4]: [network.cpp::validate::2922] Error Code 4: Internal Error (Int8 precision has been set for a layer or layer output, but int8 is not configured in the builder) +[12/27/2023-20:52:47] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/27/2023-20:52:47] [E] Engine could not be created from network +[12/27/2023-20:52:47] [E] Building engine failed +[12/27/2023-20:52:47] [E] Failed to create engine from model or file. +[12/27/2023-20:52:47] [E] Engine set up failed diff --git a/yolo_nas_pose_n_int8.onnx.fp16.engine.log b/yolo_nas_pose_n_int8.onnx.fp16.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..ef053b40cbe76f8a88583c3f1e9bed030c74491f --- /dev/null +++ b/yolo_nas_pose_n_int8.onnx.fp16.engine.log @@ -0,0 +1,91 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_int8.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_int8.onnx.fp16.engine +[12/27/2023-20:52:40] [I] === Model Options === +[12/27/2023-20:52:40] [I] Format: ONNX +[12/27/2023-20:52:40] [I] Model: yolo_nas_pose_n_int8.onnx +[12/27/2023-20:52:40] [I] Output: +[12/27/2023-20:52:40] [I] === Build Options === +[12/27/2023-20:52:40] [I] Max batch: explicit batch +[12/27/2023-20:52:40] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-20:52:40] [I] minTiming: 1 +[12/27/2023-20:52:40] [I] avgTiming: 8 +[12/27/2023-20:52:40] [I] Precision: FP32+FP16 +[12/27/2023-20:52:40] [I] LayerPrecisions: +[12/27/2023-20:52:40] [I] Calibration: +[12/27/2023-20:52:40] [I] Refit: Disabled +[12/27/2023-20:52:40] [I] Sparsity: Disabled +[12/27/2023-20:52:40] [I] Safe mode: Disabled +[12/27/2023-20:52:40] [I] DirectIO mode: Disabled +[12/27/2023-20:52:40] [I] Restricted mode: Disabled +[12/27/2023-20:52:40] [I] Build only: Disabled +[12/27/2023-20:52:40] [I] Save engine: yolo_nas_pose_n_int8.onnx.fp16.engine +[12/27/2023-20:52:40] [I] Load engine: +[12/27/2023-20:52:40] [I] Profiling verbosity: 0 +[12/27/2023-20:52:40] [I] Tactic sources: Using default tactic sources +[12/27/2023-20:52:40] [I] timingCacheMode: local +[12/27/2023-20:52:40] [I] timingCacheFile: +[12/27/2023-20:52:40] [I] Heuristic: Disabled +[12/27/2023-20:52:40] [I] Preview Features: Use default preview flags. +[12/27/2023-20:52:40] [I] Input(s)s format: fp32:CHW +[12/27/2023-20:52:40] [I] Output(s)s format: fp32:CHW +[12/27/2023-20:52:40] [I] Input build shapes: model +[12/27/2023-20:52:40] [I] Input calibration shapes: model +[12/27/2023-20:52:40] [I] === System Options === +[12/27/2023-20:52:40] [I] Device: 0 +[12/27/2023-20:52:40] [I] DLACore: +[12/27/2023-20:52:40] [I] Plugins: +[12/27/2023-20:52:40] [I] === Inference Options === +[12/27/2023-20:52:40] [I] Batch: Explicit +[12/27/2023-20:52:40] [I] Input inference shapes: model +[12/27/2023-20:52:40] [I] Iterations: 10 +[12/27/2023-20:52:40] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-20:52:40] [I] Sleep time: 0ms +[12/27/2023-20:52:40] [I] Idle time: 0ms +[12/27/2023-20:52:40] [I] Streams: 1 +[12/27/2023-20:52:40] [I] ExposeDMA: Disabled +[12/27/2023-20:52:40] [I] Data transfers: Enabled +[12/27/2023-20:52:40] [I] Spin-wait: Disabled +[12/27/2023-20:52:40] [I] Multithreading: Disabled +[12/27/2023-20:52:40] [I] CUDA Graph: Disabled +[12/27/2023-20:52:40] [I] Separate profiling: Disabled +[12/27/2023-20:52:40] [I] Time Deserialize: Disabled +[12/27/2023-20:52:40] [I] Time Refit: Disabled +[12/27/2023-20:52:40] [I] NVTX verbosity: 0 +[12/27/2023-20:52:40] [I] Persistent Cache Ratio: 0 +[12/27/2023-20:52:40] [I] Inputs: +[12/27/2023-20:52:40] [I] === Reporting Options === +[12/27/2023-20:52:40] [I] Verbose: Disabled +[12/27/2023-20:52:40] [I] Averages: 100 inferences +[12/27/2023-20:52:40] [I] Percentiles: 90,95,99 +[12/27/2023-20:52:40] [I] Dump refittable layers:Disabled +[12/27/2023-20:52:40] [I] Dump output: Disabled +[12/27/2023-20:52:40] [I] Profile: Disabled +[12/27/2023-20:52:40] [I] Export timing to JSON file: +[12/27/2023-20:52:40] [I] Export output to JSON file: +[12/27/2023-20:52:40] [I] Export profile to JSON file: +[12/27/2023-20:52:40] [I] +[12/27/2023-20:52:40] [I] === Device Information === +[12/27/2023-20:52:40] [I] Selected Device: Orin +[12/27/2023-20:52:40] [I] Compute Capability: 8.7 +[12/27/2023-20:52:40] [I] SMs: 8 +[12/27/2023-20:52:40] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-20:52:40] [I] Device Global Memory: 7471 MiB +[12/27/2023-20:52:40] [I] Shared Memory per SM: 164 KiB +[12/27/2023-20:52:40] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-20:52:40] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-20:52:40] [I] +[12/27/2023-20:52:40] [I] TensorRT version: 8.5.2 +[12/27/2023-20:52:41] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3041 (MiB) +[12/27/2023-20:52:44] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3347 (MiB) +[12/27/2023-20:52:44] [I] Start parsing network model +[12/27/2023-20:52:44] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-20:52:44] [I] [TRT] Input filename: yolo_nas_pose_n_int8.onnx +[12/27/2023-20:52:44] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-20:52:44] [I] [TRT] Opset version: 17 +[12/27/2023-20:52:44] [I] [TRT] Producer name: pytorch +[12/27/2023-20:52:44] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-20:52:44] [I] [TRT] Domain: +[12/27/2023-20:52:44] [I] [TRT] Model version: 0 +[12/27/2023-20:52:44] [I] [TRT] Doc string: +[12/27/2023-20:52:44] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-20:52:47] [I] Finish parsing network model +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_int8.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_int8.onnx.fp16.engine diff --git a/yolo_nas_pose_n_int8.onnx.int8.engine b/yolo_nas_pose_n_int8.onnx.int8.engine new file mode 100644 index 0000000000000000000000000000000000000000..2758dbde2b9cffdddf7726a27df983f5685e117c --- /dev/null +++ b/yolo_nas_pose_n_int8.onnx.int8.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1235190611d52c7c4bb3e4edb7652062a0e7c08bf3d721d882d72cef0b6eb7cc +size 10088004 diff --git a/yolo_nas_pose_n_int8.onnx.int8.engine.err b/yolo_nas_pose_n_int8.onnx.int8.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..fa815a244f15a1be8698ae9fdd1e9a6ae9113897 --- /dev/null +++ b/yolo_nas_pose_n_int8.onnx.int8.engine.err @@ -0,0 +1,7 @@ +[12/27/2023-21:21:30] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-21:21:30] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-21:21:33] [W] [TRT] Calibrator won't be used in explicit precision mode. Use quantization aware training to generate network with Quantize/Dequantize nodes. +[12/27/2023-21:30:30] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/27/2023-21:30:30] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/27/2023-21:30:30] [W] * GPU compute time is unstable, with coefficient of variance = 5.79476%. +[12/27/2023-21:30:30] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_n_int8.onnx.int8.engine.log b/yolo_nas_pose_n_int8.onnx.int8.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..2f0d2c8a8582b32217eab205b62bd5b499bf1976 --- /dev/null +++ b/yolo_nas_pose_n_int8.onnx.int8.engine.log @@ -0,0 +1,323 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_int8.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_int8.onnx.int8.engine +[12/27/2023-21:21:26] [I] === Model Options === +[12/27/2023-21:21:26] [I] Format: ONNX +[12/27/2023-21:21:26] [I] Model: yolo_nas_pose_n_int8.onnx +[12/27/2023-21:21:26] [I] Output: +[12/27/2023-21:21:26] [I] === Build Options === +[12/27/2023-21:21:26] [I] Max batch: explicit batch +[12/27/2023-21:21:26] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-21:21:26] [I] minTiming: 1 +[12/27/2023-21:21:26] [I] avgTiming: 8 +[12/27/2023-21:21:26] [I] Precision: FP32+INT8 +[12/27/2023-21:21:26] [I] LayerPrecisions: +[12/27/2023-21:21:26] [I] Calibration: Dynamic +[12/27/2023-21:21:26] [I] Refit: Disabled +[12/27/2023-21:21:26] [I] Sparsity: Disabled +[12/27/2023-21:21:26] [I] Safe mode: Disabled +[12/27/2023-21:21:26] [I] DirectIO mode: Disabled +[12/27/2023-21:21:26] [I] Restricted mode: Disabled +[12/27/2023-21:21:26] [I] Build only: Disabled +[12/27/2023-21:21:26] [I] Save engine: yolo_nas_pose_n_int8.onnx.int8.engine +[12/27/2023-21:21:26] [I] Load engine: +[12/27/2023-21:21:26] [I] Profiling verbosity: 0 +[12/27/2023-21:21:26] [I] Tactic sources: Using default tactic sources +[12/27/2023-21:21:26] [I] timingCacheMode: local +[12/27/2023-21:21:26] [I] timingCacheFile: +[12/27/2023-21:21:26] [I] Heuristic: Disabled +[12/27/2023-21:21:26] [I] Preview Features: Use default preview flags. +[12/27/2023-21:21:26] [I] Input(s)s format: fp32:CHW +[12/27/2023-21:21:26] [I] Output(s)s format: fp32:CHW +[12/27/2023-21:21:26] [I] Input build shapes: model +[12/27/2023-21:21:26] [I] Input calibration shapes: model +[12/27/2023-21:21:26] [I] === System Options === +[12/27/2023-21:21:26] [I] Device: 0 +[12/27/2023-21:21:26] [I] DLACore: +[12/27/2023-21:21:26] [I] Plugins: +[12/27/2023-21:21:26] [I] === Inference Options === +[12/27/2023-21:21:26] [I] Batch: Explicit +[12/27/2023-21:21:26] [I] Input inference shapes: model +[12/27/2023-21:21:26] [I] Iterations: 10 +[12/27/2023-21:21:26] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-21:21:26] [I] Sleep time: 0ms +[12/27/2023-21:21:26] [I] Idle time: 0ms +[12/27/2023-21:21:26] [I] Streams: 1 +[12/27/2023-21:21:26] [I] ExposeDMA: Disabled +[12/27/2023-21:21:26] [I] Data transfers: Enabled +[12/27/2023-21:21:26] [I] Spin-wait: Disabled +[12/27/2023-21:21:26] [I] Multithreading: Disabled +[12/27/2023-21:21:26] [I] CUDA Graph: Disabled +[12/27/2023-21:21:26] [I] Separate profiling: Disabled +[12/27/2023-21:21:26] [I] Time Deserialize: Disabled +[12/27/2023-21:21:26] [I] Time Refit: Disabled +[12/27/2023-21:21:26] [I] NVTX verbosity: 0 +[12/27/2023-21:21:26] [I] Persistent Cache Ratio: 0 +[12/27/2023-21:21:26] [I] Inputs: +[12/27/2023-21:21:26] [I] === Reporting Options === +[12/27/2023-21:21:26] [I] Verbose: Disabled +[12/27/2023-21:21:26] [I] Averages: 100 inferences +[12/27/2023-21:21:26] [I] Percentiles: 90,95,99 +[12/27/2023-21:21:26] [I] Dump refittable layers:Disabled +[12/27/2023-21:21:26] [I] Dump output: Disabled +[12/27/2023-21:21:26] [I] Profile: Disabled +[12/27/2023-21:21:26] [I] Export timing to JSON file: +[12/27/2023-21:21:26] [I] Export output to JSON file: +[12/27/2023-21:21:26] [I] Export profile to JSON file: +[12/27/2023-21:21:26] [I] +[12/27/2023-21:21:26] [I] === Device Information === +[12/27/2023-21:21:26] [I] Selected Device: Orin +[12/27/2023-21:21:26] [I] Compute Capability: 8.7 +[12/27/2023-21:21:26] [I] SMs: 8 +[12/27/2023-21:21:26] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-21:21:26] [I] Device Global Memory: 7471 MiB +[12/27/2023-21:21:26] [I] Shared Memory per SM: 164 KiB +[12/27/2023-21:21:26] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-21:21:26] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-21:21:26] [I] +[12/27/2023-21:21:26] [I] TensorRT version: 8.5.2 +[12/27/2023-21:21:27] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3026 (MiB) +[12/27/2023-21:21:30] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +281, now: CPU 574, GPU 3329 (MiB) +[12/27/2023-21:21:30] [I] Start parsing network model +[12/27/2023-21:21:30] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-21:21:30] [I] [TRT] Input filename: yolo_nas_pose_n_int8.onnx +[12/27/2023-21:21:30] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-21:21:30] [I] [TRT] Opset version: 17 +[12/27/2023-21:21:30] [I] [TRT] Producer name: pytorch +[12/27/2023-21:21:30] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-21:21:30] [I] [TRT] Domain: +[12/27/2023-21:21:30] [I] [TRT] Model version: 0 +[12/27/2023-21:21:30] [I] [TRT] Doc string: +[12/27/2023-21:21:30] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-21:21:32] [I] Finish parsing network model +[12/27/2023-21:21:32] [I] FP32 and INT8 precisions have been specified - more performance might be enabled by additionally specifying --fp16 or --best +[12/27/2023-21:21:36] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/27/2023-21:21:36] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1203) [Constant] +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1204) [Constant] +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1205) [Constant] +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stem/conv/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stem.conv.rbr_reparam.weight + /model/backbone/stem/conv/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stem/conv/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.downsample.rbr_reparam.weight + /model/backbone/stage1/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/downsample/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv2.conv.weight + /model/backbone/stage1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv1.conv.weight + /model/backbone/stage1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 485) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 501) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv3.conv.weight + /model/backbone/stage1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv3/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_skip2.conv.weight + /model/neck/neck2/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.downsample.rbr_reparam.weight + /model/backbone/stage2/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/downsample/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.downsample.conv.weight + /model/neck/neck2/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/downsample/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv2.conv.weight + /model/backbone/stage2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv1.conv.weight + /model/backbone/stage2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 548) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 564) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 580) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/Concat_/model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add_output_0_clone_0 copy +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv3.conv.weight + /model/backbone/stage2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv3/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip2.conv.weight + /model/neck/neck1/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip2/conv/Conv || model.neck.neck2.reduce_skip1.conv.weight + /model/neck/neck2/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.downsample.rbr_reparam.weight + /model/backbone/stage3/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/downsample/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.downsample.conv.weight + /model/neck/neck1/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/downsample/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv2.conv.weight + /model/backbone/stage3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv1.conv.weight + /model/backbone/stage3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 630) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 646) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 662) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 678) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv3.conv.weight + /model/backbone/stage3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv3/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip1.conv.weight + /model/neck/neck1/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.downsample.rbr_reparam.weight + /model/backbone/stage4/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/downsample/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv2.conv.weight + /model/backbone/stage4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv1.conv.weight + /model/backbone/stage4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 719) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 735) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv3.conv.weight + /model/backbone/stage4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv3/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv1.conv.weight + /model/backbone/context_module/cv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/m.2/MaxPool_output_0 copy +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv2.conv.weight + /model/backbone/context_module/cv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.conv.conv.weight + /model/neck/neck1/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/conv/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck1.upsample.weight + /model/neck/neck1/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck1/upsample/ConvTranspose +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_after_concat.conv.weight + /model/neck/neck1/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_after_concat/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv2.conv.weight + /model/neck/neck1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv1.conv.weight + /model/neck/neck1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 800) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 816) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/Concat_/model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add_output_0_clone_0 copy +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv3.conv.weight + /model/neck/neck1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv3/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.conv.conv.weight + /model/neck/neck2/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/conv/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/upsample/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck2.upsample.weight + /model/neck/neck2/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck2/upsample/ConvTranspose +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/Concat_/model/neck/neck2/reduce_skip1/act/Relu_output_0_clone_1 copy +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_after_concat.conv.weight + /model/neck/neck2/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_after_concat/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv2.conv.weight + /model/neck/neck2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv1.conv.weight + /model/neck/neck2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 865) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 881) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv3.conv.weight + /model/neck/neck2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv3/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.bbox_stem.seq.conv.weight + /model/heads/head1/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/bbox_stem/seq/conv/Conv || model.heads.head1.pose_stem.seq.conv.weight + /model/heads/head1/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_stem/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.conv.conv.weight + /model/neck/neck3/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/conv/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_convs.0.seq.conv.weight + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head1.cls_convs.0.seq.conv.weight + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.0.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv1/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.cls_pred.weight + /model/heads/head1/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_pred/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_pred.weight + /model/heads/head1/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_pred/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.1.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv2.conv.weight + /model/neck/neck3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv1.conv.weight + /model/neck/neck3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_pred.weight + /model/heads/head1/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_pred/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 947) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 988) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/Concat_/model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add_output_0_clone_0 copy +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv3.conv.weight + /model/neck/neck3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv3/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.bbox_stem.seq.conv.weight + /model/heads/head2/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/bbox_stem/seq/conv/Conv || model.heads.head2.pose_stem.seq.conv.weight + /model/heads/head2/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_stem/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.conv.conv.weight + /model/neck/neck4/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/conv/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_convs.0.seq.conv.weight + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head2.cls_convs.0.seq.conv.weight + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.0.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv2.conv.weight + /model/neck/neck4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv1.conv.weight + /model/neck/neck4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.cls_pred.weight + /model/heads/head2/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_pred/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_pred.weight + /model/heads/head2/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_pred/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.1.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_pred.weight + /model/heads/head2/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_pred/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 1053) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1094) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/Concat_/model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add_output_0_clone_0 copy +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv3.conv.weight + /model/neck/neck4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv3/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.bbox_stem.seq.conv.weight + /model/heads/head3/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/bbox_stem/seq/conv/Conv || model.heads.head3.pose_stem.seq.conv.weight + /model/heads/head3/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_stem/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_convs.0.seq.conv.weight + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head3.cls_convs.0.seq.conv.weight + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.0.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.cls_pred.weight + /model/heads/head3/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_pred/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_pred.weight + /model/heads/head3/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_pred/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.1.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.2.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_pred.weight + /model/heads/head3/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_pred/Conv +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] NMS: batched_nms_238 +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 1207) [NMS]_1_output[DevicetoShapeHostCopy] +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/27/2023-21:21:36] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/27/2023-21:21:37] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +498, now: CPU 1168, GPU 3892 (MiB) +[12/27/2023-21:21:38] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +61, now: CPU 1250, GPU 3953 (MiB) +[12/27/2023-21:21:38] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/27/2023-21:30:13] [I] [TRT] Total Activation Memory: 7918594560 +[12/27/2023-21:30:13] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/27/2023-21:30:14] [I] [TRT] Total Host Persistent Memory: 299584 +[12/27/2023-21:30:14] [I] [TRT] Total Device Persistent Memory: 116736 +[12/27/2023-21:30:14] [I] [TRT] Total Scratch Memory: 134217728 +[12/27/2023-21:30:14] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 33 MiB, GPU 132 MiB +[12/27/2023-21:30:14] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 180 steps to complete. +[12/27/2023-21:30:14] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 39.0902ms to assign 13 blocks to 180 nodes requiring 144065024 bytes. +[12/27/2023-21:30:14] [I] [TRT] Total Activation Memory: 144065024 +[12/27/2023-21:30:14] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1590, GPU 4967 (MiB) +[12/27/2023-21:30:14] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +6, GPU +8, now: CPU 6, GPU 8 (MiB) +[12/27/2023-21:30:15] [I] Engine built in 528.25 sec. +[12/27/2023-21:30:15] [I] [TRT] Loaded engine size: 9 MiB +[12/27/2023-21:30:15] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1240, GPU 4970 (MiB) +[12/27/2023-21:30:15] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +7, now: CPU 0, GPU 7 (MiB) +[12/27/2023-21:30:15] [I] Engine deserialized in 0.110555 sec. +[12/27/2023-21:30:15] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1241, GPU 4970 (MiB) +[12/27/2023-21:30:15] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +138, now: CPU 0, GPU 145 (MiB) +[12/27/2023-21:30:15] [I] Setting persistentCacheLimit to 0 bytes. +[12/27/2023-21:30:15] [I] Using random values for input onnx::Cast_0 +[12/27/2023-21:30:15] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/27/2023-21:30:15] [I] Using random values for output graph2_flat_predictions +[12/27/2023-21:30:15] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/27/2023-21:30:15] [I] Starting inference +[12/27/2023-21:30:30] [I] Warmup completed 11 queries over 200 ms +[12/27/2023-21:30:30] [I] Timing trace has 1116 queries over 15.0271 s +[12/27/2023-21:30:30] [I] +[12/27/2023-21:30:30] [I] === Trace details === +[12/27/2023-21:30:30] [I] Trace averages of 100 runs: +[12/27/2023-21:30:30] [I] Average on 100 runs - GPU latency: 13.303 ms - Host latency: 13.4154 ms (enqueue 13.3824 ms) +[12/27/2023-21:30:30] [I] Average on 100 runs - GPU latency: 13.1087 ms - Host latency: 13.2205 ms (enqueue 13.189 ms) +[12/27/2023-21:30:30] [I] Average on 100 runs - GPU latency: 13.1353 ms - Host latency: 13.2476 ms (enqueue 13.2151 ms) +[12/27/2023-21:30:30] [I] Average on 100 runs - GPU latency: 13.1584 ms - Host latency: 13.27 ms (enqueue 13.2379 ms) +[12/27/2023-21:30:30] [I] Average on 100 runs - GPU latency: 13.3766 ms - Host latency: 13.4898 ms (enqueue 13.4566 ms) +[12/27/2023-21:30:30] [I] Average on 100 runs - GPU latency: 13.1268 ms - Host latency: 13.2384 ms (enqueue 13.2038 ms) +[12/27/2023-21:30:30] [I] Average on 100 runs - GPU latency: 13.4482 ms - Host latency: 13.5607 ms (enqueue 13.5189 ms) +[12/27/2023-21:30:30] [I] Average on 100 runs - GPU latency: 13.5223 ms - Host latency: 13.637 ms (enqueue 13.5947 ms) +[12/27/2023-21:30:30] [I] Average on 100 runs - GPU latency: 13.7048 ms - Host latency: 13.8213 ms (enqueue 13.7771 ms) +[12/27/2023-21:30:30] [I] Average on 100 runs - GPU latency: 13.5633 ms - Host latency: 13.6782 ms (enqueue 13.6358 ms) +[12/27/2023-21:30:30] [I] Average on 100 runs - GPU latency: 13.1192 ms - Host latency: 13.2312 ms (enqueue 13.1992 ms) +[12/27/2023-21:30:30] [I] +[12/27/2023-21:30:30] [I] === Performance summary === +[12/27/2023-21:30:30] [I] Throughput: 74.2658 qps +[12/27/2023-21:30:30] [I] Latency: min = 12.3477 ms, max = 28.8486 ms, mean = 13.4348 ms, median = 13.165 ms, percentile(90%) = 13.8916 ms, percentile(95%) = 13.9805 ms, percentile(99%) = 16.4219 ms +[12/27/2023-21:30:30] [I] Enqueue Time: min = 12.3198 ms, max = 28.8143 ms, mean = 13.3985 ms, median = 13.1329 ms, percentile(90%) = 13.8477 ms, percentile(95%) = 13.9395 ms, percentile(99%) = 16.3662 ms +[12/27/2023-21:30:30] [I] H2D Latency: min = 0.0810547 ms, max = 0.185547 ms, mean = 0.0987979 ms, median = 0.0986328 ms, percentile(90%) = 0.100586 ms, percentile(95%) = 0.100586 ms, percentile(99%) = 0.111328 ms +[12/27/2023-21:30:30] [I] GPU Compute Time: min = 12.2375 ms, max = 28.7328 ms, mean = 13.3217 ms, median = 13.0537 ms, percentile(90%) = 13.7734 ms, percentile(95%) = 13.8643 ms, percentile(99%) = 16.2988 ms +[12/27/2023-21:30:30] [I] D2H Latency: min = 0.00292969 ms, max = 0.0380859 ms, mean = 0.0142309 ms, median = 0.0126953 ms, percentile(90%) = 0.0195312 ms, percentile(95%) = 0.0214844 ms, percentile(99%) = 0.0302734 ms +[12/27/2023-21:30:30] [I] Total Host Walltime: 15.0271 s +[12/27/2023-21:30:30] [I] Total GPU Compute Time: 14.8671 s +[12/27/2023-21:30:30] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/27/2023-21:30:30] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_n_int8.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_n_int8.onnx.int8.engine diff --git a/yolo_nas_pose_n_int8.onnx.usage.txt b/yolo_nas_pose_n_int8.onnx.usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..dd5e28b1fa8f98dc8aa3bbf44c14814facc2d542 --- /dev/null +++ b/yolo_nas_pose_n_int8.onnx.usage.txt @@ -0,0 +1,58 @@ + +Model exported successfully to yolo_nas_pose_n_int8.onnx +Model expects input image of shape [1, 3, 640, 640] +Input image dtype is torch.uint8 + +Exported model already contains preprocessing (normalization) step, so you don't need to do it manually. +Preprocessing steps to be applied to input image are: +Sequential( + (0): CastTensorTo(dtype=torch.float32) + (1): ChannelSelect(channels_indexes=tensor([2, 1, 0])) + (2): ApplyMeanStd(mean=[0.], scale=[255.]) +) + + +Exported model contains postprocessing (NMS) step with the following parameters: + num_pre_nms_predictions=1000 + max_predictions_per_image=10 + nms_threshold=0.5 + confidence_threshold=0.15 + output_predictions_format=flat + + +Exported model is in ONNX format and can be used with ONNXRuntime +To run inference with ONNXRuntime, please use the following code snippet: + + import onnxruntime + import numpy as np + session = onnxruntime.InferenceSession("yolo_nas_pose_n_int8.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + example_input_image = np.zeros((1, 3, 640, 640)).astype(np.uint8) + predictions = session.run(outputs, {inputs[0]: example_input_image}) + +Exported model can also be used with TensorRT +To run inference with TensorRT, please see TensorRT deployment documentation +You can benchmark the model using the following code snippet: + + trtexec --onnx=yolo_nas_pose_n_int8.onnx --int8 --avgRuns=100 --duration=15 + + +Exported model has predictions in flat format: + +# flat_predictions is a 2D array of [N,K] shape +# Each row represents (image_index, x_min, y_min, x_max, y_max, confidence, joints...) +# Please note all values are floats, so you have to convert them to integers if needed + +[flat_predictions] = predictions +pred_bboxes = flat_predictions[:, 1:5] +pred_scores = flat_predictions[:, 5] +pred_joints = flat_predictions[:, 6:].reshape((len(pred_bboxes), -1, 3)) +for i in range(len(pred_bboxes)): + confidence = pred_scores[i] + x_min, y_min, x_max, y_max = pred_bboxes[i] + print(f"Detected pose with confidence={{confidence}}, x_min={{x_min}}, y_min={{y_min}}, x_max={{x_max}}, y_max={{y_max}}") + for joint_index, (x, y, confidence) in enumerate(pred_joints[i]):") + print(f"Joint {{joint_index}} has coordinates x={{x}}, y={{y}}, confidence={{confidence}}") + diff --git a/yolo_nas_pose_s_fp16.onnx b/yolo_nas_pose_s_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..927591ef19784c8cdd889aca34c810ee11258418 --- /dev/null +++ b/yolo_nas_pose_s_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe40712b6dbff75170c0dbc9a5c9789532f4a9c29b592e361ff602dfa28b2f7e +size 30900352 diff --git a/yolo_nas_pose_s_fp16.onnx.best.engine b/yolo_nas_pose_s_fp16.onnx.best.engine new file mode 100644 index 0000000000000000000000000000000000000000..d0c8d66d169761a722ad90d753fe727169bde40b --- /dev/null +++ b/yolo_nas_pose_s_fp16.onnx.best.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3056c7ce3acb7b64780a758921924cafc5f32de43d54fe43c9a33e503bddd3cb +size 17828458 diff --git a/yolo_nas_pose_s_fp16.onnx.best.engine.err b/yolo_nas_pose_s_fp16.onnx.best.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..4a46e55ab7934ee08d9b86c879bb00c67a715173 --- /dev/null +++ b/yolo_nas_pose_s_fp16.onnx.best.engine.err @@ -0,0 +1,265 @@ +[12/28/2023-01:35:38] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-01:35:38] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-01:35:38] [W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool. +[12/28/2023-01:48:24] [W] [TRT] Tactic Device request: 3160MB Available: 2805MB. Device memory is insufficient to use tactic. +[12/28/2023-01:48:24] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3160 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:48:24] [W] [TRT] Tactic Device request: 3160MB Available: 2805MB. Device memory is insufficient to use tactic. +[12/28/2023-01:48:24] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3160 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:48:26] [W] [TRT] Tactic Device request: 3152MB Available: 2792MB. Device memory is insufficient to use tactic. +[12/28/2023-01:48:26] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:48:27] [W] [TRT] Tactic Device request: 3152MB Available: 2792MB. Device memory is insufficient to use tactic. +[12/28/2023-01:48:27] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3152 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:55:59] [W] [TRT] Tactic Device request: 3143MB Available: 2622MB. Device memory is insufficient to use tactic. +[12/28/2023-01:55:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3143 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:55:59] [W] [TRT] Tactic Device request: 3143MB Available: 2622MB. Device memory is insufficient to use tactic. +[12/28/2023-01:55:59] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3143 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:55:59] [W] [TRT] Tactic Device request: 3143MB Available: 2622MB. Device memory is insufficient to use tactic. +[12/28/2023-01:55:59] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3143 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:56:01] [W] [TRT] Tactic Device request: 3136MB Available: 2620MB. Device memory is insufficient to use tactic. +[12/28/2023-01:56:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3136 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:56:01] [W] [TRT] Tactic Device request: 3136MB Available: 2620MB. Device memory is insufficient to use tactic. +[12/28/2023-01:56:01] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3136 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:56:03] [W] [TRT] Tactic Device request: 4711MB Available: 2620MB. Device memory is insufficient to use tactic. +[12/28/2023-01:56:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:56:03] [W] [TRT] Tactic Device request: 4711MB Available: 2620MB. Device memory is insufficient to use tactic. +[12/28/2023-01:56:03] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:56:03] [W] [TRT] Tactic Device request: 4711MB Available: 2620MB. Device memory is insufficient to use tactic. +[12/28/2023-01:56:03] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:56:05] [W] [TRT] Tactic Device request: 4701MB Available: 2620MB. Device memory is insufficient to use tactic. +[12/28/2023-01:56:05] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4701 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:56:05] [W] [TRT] Tactic Device request: 4701MB Available: 2620MB. Device memory is insufficient to use tactic. +[12/28/2023-01:56:05] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4701 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:56:26] [W] [TRT] Tactic Device request: 3152MB Available: 2620MB. Device memory is insufficient to use tactic. +[12/28/2023-01:56:26] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:56:26] [W] [TRT] Tactic Device request: 3152MB Available: 2620MB. Device memory is insufficient to use tactic. +[12/28/2023-01:56:26] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3152 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:56:26] [W] [TRT] Tactic Device request: 3152MB Available: 2620MB. Device memory is insufficient to use tactic. +[12/28/2023-01:56:26] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:56:27] [W] [TRT] Tactic Device request: 3148MB Available: 2619MB. Device memory is insufficient to use tactic. +[12/28/2023-01:56:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3148 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:56:27] [W] [TRT] Tactic Device request: 3148MB Available: 2619MB. Device memory is insufficient to use tactic. +[12/28/2023-01:56:27] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3148 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:06:08] [W] [TRT] Tactic Device request: 3144MB Available: 2398MB. Device memory is insufficient to use tactic. +[12/28/2023-02:06:08] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:06:08] [W] [TRT] Tactic Device request: 3144MB Available: 2398MB. Device memory is insufficient to use tactic. +[12/28/2023-02:06:08] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3144 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:06:08] [W] [TRT] Tactic Device request: 3144MB Available: 2397MB. Device memory is insufficient to use tactic. +[12/28/2023-02:06:08] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:06:09] [W] [TRT] Tactic Device request: 3140MB Available: 2399MB. Device memory is insufficient to use tactic. +[12/28/2023-02:06:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:06:10] [W] [TRT] Tactic Device request: 3140MB Available: 2399MB. Device memory is insufficient to use tactic. +[12/28/2023-02:06:10] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:06:11] [W] [TRT] Tactic Device request: 7056MB Available: 2397MB. Device memory is insufficient to use tactic. +[12/28/2023-02:06:11] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:06:11] [W] [TRT] Tactic Device request: 7056MB Available: 2397MB. Device memory is insufficient to use tactic. +[12/28/2023-02:06:11] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:06:12] [W] [TRT] Tactic Device request: 7056MB Available: 2398MB. Device memory is insufficient to use tactic. +[12/28/2023-02:06:12] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:06:14] [W] [TRT] Tactic Device request: 7050MB Available: 2399MB. Device memory is insufficient to use tactic. +[12/28/2023-02:06:14] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7050 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:06:14] [W] [TRT] Tactic Device request: 7050MB Available: 2399MB. Device memory is insufficient to use tactic. +[12/28/2023-02:06:14] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 7050 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:52] [W] [TRT] Tactic Device request: 2385MB Available: 2274MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:52] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:52] [W] [TRT] Tactic Device request: 2385MB Available: 2274MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:52] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:53] [W] [TRT] Tactic Device request: 2384MB Available: 2277MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:53] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2384 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:53] [W] [TRT] Tactic Device request: 2384MB Available: 2277MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:53] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2384 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:55] [W] [TRT] Tactic Device request: 2394MB Available: 2274MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:55] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:55] [W] [TRT] Tactic Device request: 2394MB Available: 2274MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:55] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:55] [W] [TRT] Tactic Device request: 2394MB Available: 2274MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:55] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:56] [W] [TRT] Tactic Device request: 2392MB Available: 2275MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:56] [W] [TRT] Tactic Device request: 2392MB Available: 2275MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:56] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:56] [W] [TRT] Tactic Device request: 2392MB Available: 2275MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:56] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:58] [W] [TRT] Tactic Device request: 2391MB Available: 2274MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:58] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2391 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:58] [W] [TRT] Tactic Device request: 2391MB Available: 2274MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:58] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2391 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:58] [W] [TRT] Tactic Device request: 2390MB Available: 2274MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:58] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:20:58] [W] [TRT] Tactic Device request: 2390MB Available: 2274MB. Device memory is insufficient to use tactic. +[12/28/2023-02:20:58] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:21:15] [W] [TRT] Tactic Device request: 2457MB Available: 2252MB. Device memory is insufficient to use tactic. +[12/28/2023-02:21:15] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:21:15] [W] [TRT] Tactic Device request: 2457MB Available: 2253MB. Device memory is insufficient to use tactic. +[12/28/2023-02:21:15] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:21:15] [W] [TRT] Tactic Device request: 2457MB Available: 2253MB. Device memory is insufficient to use tactic. +[12/28/2023-02:21:15] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:21:16] [W] [TRT] Tactic Device request: 2456MB Available: 2252MB. Device memory is insufficient to use tactic. +[12/28/2023-02:21:16] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:21:16] [W] [TRT] Tactic Device request: 2456MB Available: 2252MB. Device memory is insufficient to use tactic. +[12/28/2023-02:21:16] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:28:57] [W] [TRT] Tactic Device request: 2454MB Available: 2123MB. Device memory is insufficient to use tactic. +[12/28/2023-02:28:57] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2454 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:28:57] [W] [TRT] Tactic Device request: 2454MB Available: 2124MB. Device memory is insufficient to use tactic. +[12/28/2023-02:28:57] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:28:58] [W] [TRT] Tactic Device request: 2453MB Available: 2124MB. Device memory is insufficient to use tactic. +[12/28/2023-02:28:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2453 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:28:59] [W] [TRT] Tactic Device request: 2453MB Available: 2124MB. Device memory is insufficient to use tactic. +[12/28/2023-02:28:59] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2453 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:29:01] [W] [TRT] Tactic Device request: 2457MB Available: 2124MB. Device memory is insufficient to use tactic. +[12/28/2023-02:29:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:29:01] [W] [TRT] Tactic Device request: 2457MB Available: 2124MB. Device memory is insufficient to use tactic. +[12/28/2023-02:29:01] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:29:01] [W] [TRT] Tactic Device request: 2457MB Available: 2124MB. Device memory is insufficient to use tactic. +[12/28/2023-02:29:01] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:29:02] [W] [TRT] Tactic Device request: 2456MB Available: 2124MB. Device memory is insufficient to use tactic. +[12/28/2023-02:29:02] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:29:02] [W] [TRT] Tactic Device request: 2456MB Available: 2124MB. Device memory is insufficient to use tactic. +[12/28/2023-02:29:02] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:29:29] [W] [TRT] Tactic Device request: 3587MB Available: 2386MB. Device memory is insufficient to use tactic. +[12/28/2023-02:29:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:29:29] [W] [TRT] Tactic Device request: 3587MB Available: 2386MB. Device memory is insufficient to use tactic. +[12/28/2023-02:29:29] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:29:29] [W] [TRT] Tactic Device request: 3587MB Available: 2386MB. Device memory is insufficient to use tactic. +[12/28/2023-02:29:29] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:29:30] [W] [TRT] Tactic Device request: 3585MB Available: 2411MB. Device memory is insufficient to use tactic. +[12/28/2023-02:29:30] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3585 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:29:30] [W] [TRT] Tactic Device request: 3585MB Available: 2411MB. Device memory is insufficient to use tactic. +[12/28/2023-02:29:30] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3585 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:37:32] [W] [TRT] Tactic Device request: 3556MB Available: 1971MB. Device memory is insufficient to use tactic. +[12/28/2023-02:37:32] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:37:32] [W] [TRT] Tactic Device request: 3556MB Available: 1971MB. Device memory is insufficient to use tactic. +[12/28/2023-02:37:32] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:37:32] [W] [TRT] Tactic Device request: 3556MB Available: 1971MB. Device memory is insufficient to use tactic. +[12/28/2023-02:37:32] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:37:33] [W] [TRT] Tactic Device request: 3551MB Available: 1971MB. Device memory is insufficient to use tactic. +[12/28/2023-02:37:33] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3551 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:37:33] [W] [TRT] Tactic Device request: 3551MB Available: 1970MB. Device memory is insufficient to use tactic. +[12/28/2023-02:37:33] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3551 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:52:31] [W] [TRT] Tactic Device request: 2385MB Available: 1861MB. Device memory is insufficient to use tactic. +[12/28/2023-02:52:31] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:52:31] [W] [TRT] Tactic Device request: 2385MB Available: 1861MB. Device memory is insufficient to use tactic. +[12/28/2023-02:52:32] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:52:32] [W] [TRT] Tactic Device request: 2384MB Available: 1861MB. Device memory is insufficient to use tactic. +[12/28/2023-02:52:32] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2384 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:52:32] [W] [TRT] Tactic Device request: 2384MB Available: 1861MB. Device memory is insufficient to use tactic. +[12/28/2023-02:52:32] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2384 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:52:56] [W] [TRT] Tactic Device request: 2126MB Available: 1857MB. Device memory is insufficient to use tactic. +[12/28/2023-02:52:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2126 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:52:57] [W] [TRT] Tactic Device request: 2126MB Available: 1856MB. Device memory is insufficient to use tactic. +[12/28/2023-02:52:57] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2126 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:52:57] [W] [TRT] Tactic Device request: 2126MB Available: 1855MB. Device memory is insufficient to use tactic. +[12/28/2023-02:52:57] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2126 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:53:00] [W] [TRT] Tactic Device request: 2124MB Available: 1857MB. Device memory is insufficient to use tactic. +[12/28/2023-02:53:00] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:53:00] [W] [TRT] Tactic Device request: 2124MB Available: 1857MB. Device memory is insufficient to use tactic. +[12/28/2023-02:53:00] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:53:03] [W] [TRT] Tactic Device request: 2125MB Available: 1857MB. Device memory is insufficient to use tactic. +[12/28/2023-02:53:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:53:03] [W] [TRT] Tactic Device request: 2125MB Available: 1856MB. Device memory is insufficient to use tactic. +[12/28/2023-02:53:03] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2125 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:53:04] [W] [TRT] Tactic Device request: 2125MB Available: 1856MB. Device memory is insufficient to use tactic. +[12/28/2023-02:53:04] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:53:06] [W] [TRT] Tactic Device request: 2124MB Available: 1857MB. Device memory is insufficient to use tactic. +[12/28/2023-02:53:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:53:07] [W] [TRT] Tactic Device request: 2124MB Available: 1857MB. Device memory is insufficient to use tactic. +[12/28/2023-02:53:07] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:53:21] [W] [TRT] Tactic Device request: 2125MB Available: 1856MB. Device memory is insufficient to use tactic. +[12/28/2023-02:53:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:53:21] [W] [TRT] Tactic Device request: 2125MB Available: 1855MB. Device memory is insufficient to use tactic. +[12/28/2023-02:53:21] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2125 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:53:22] [W] [TRT] Tactic Device request: 2125MB Available: 1856MB. Device memory is insufficient to use tactic. +[12/28/2023-02:53:22] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:53:24] [W] [TRT] Tactic Device request: 2124MB Available: 1856MB. Device memory is insufficient to use tactic. +[12/28/2023-02:53:24] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-02:53:24] [W] [TRT] Tactic Device request: 2124MB Available: 1855MB. Device memory is insufficient to use tactic. +[12/28/2023-02:53:25] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-03:02:55] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-03:02:56] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-03:02:56] [W] * GPU compute time is unstable, with coefficient of variance = 6.46901%. +[12/28/2023-03:02:56] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_s_fp16.onnx.best.engine.log b/yolo_nas_pose_s_fp16.onnx.best.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..62ed61f4eafde959b858b71aa23a6f783596362a --- /dev/null +++ b/yolo_nas_pose_s_fp16.onnx.best.engine.log @@ -0,0 +1,301 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp16.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp16.onnx.best.engine +[12/28/2023-01:35:27] [I] === Model Options === +[12/28/2023-01:35:27] [I] Format: ONNX +[12/28/2023-01:35:27] [I] Model: yolo_nas_pose_s_fp16.onnx +[12/28/2023-01:35:27] [I] Output: +[12/28/2023-01:35:27] [I] === Build Options === +[12/28/2023-01:35:27] [I] Max batch: explicit batch +[12/28/2023-01:35:27] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-01:35:27] [I] minTiming: 1 +[12/28/2023-01:35:27] [I] avgTiming: 8 +[12/28/2023-01:35:27] [I] Precision: FP32+FP16+INT8 +[12/28/2023-01:35:27] [I] LayerPrecisions: +[12/28/2023-01:35:27] [I] Calibration: Dynamic +[12/28/2023-01:35:27] [I] Refit: Disabled +[12/28/2023-01:35:27] [I] Sparsity: Disabled +[12/28/2023-01:35:27] [I] Safe mode: Disabled +[12/28/2023-01:35:27] [I] DirectIO mode: Disabled +[12/28/2023-01:35:27] [I] Restricted mode: Disabled +[12/28/2023-01:35:27] [I] Build only: Disabled +[12/28/2023-01:35:27] [I] Save engine: yolo_nas_pose_s_fp16.onnx.best.engine +[12/28/2023-01:35:27] [I] Load engine: +[12/28/2023-01:35:27] [I] Profiling verbosity: 0 +[12/28/2023-01:35:27] [I] Tactic sources: Using default tactic sources +[12/28/2023-01:35:27] [I] timingCacheMode: local +[12/28/2023-01:35:27] [I] timingCacheFile: +[12/28/2023-01:35:27] [I] Heuristic: Disabled +[12/28/2023-01:35:27] [I] Preview Features: Use default preview flags. +[12/28/2023-01:35:27] [I] Input(s)s format: fp32:CHW +[12/28/2023-01:35:27] [I] Output(s)s format: fp32:CHW +[12/28/2023-01:35:27] [I] Input build shapes: model +[12/28/2023-01:35:27] [I] Input calibration shapes: model +[12/28/2023-01:35:27] [I] === System Options === +[12/28/2023-01:35:27] [I] Device: 0 +[12/28/2023-01:35:27] [I] DLACore: +[12/28/2023-01:35:27] [I] Plugins: +[12/28/2023-01:35:27] [I] === Inference Options === +[12/28/2023-01:35:27] [I] Batch: Explicit +[12/28/2023-01:35:27] [I] Input inference shapes: model +[12/28/2023-01:35:27] [I] Iterations: 10 +[12/28/2023-01:35:27] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-01:35:27] [I] Sleep time: 0ms +[12/28/2023-01:35:27] [I] Idle time: 0ms +[12/28/2023-01:35:27] [I] Streams: 1 +[12/28/2023-01:35:27] [I] ExposeDMA: Disabled +[12/28/2023-01:35:27] [I] Data transfers: Enabled +[12/28/2023-01:35:27] [I] Spin-wait: Disabled +[12/28/2023-01:35:27] [I] Multithreading: Disabled +[12/28/2023-01:35:27] [I] CUDA Graph: Disabled +[12/28/2023-01:35:27] [I] Separate profiling: Disabled +[12/28/2023-01:35:27] [I] Time Deserialize: Disabled +[12/28/2023-01:35:27] [I] Time Refit: Disabled +[12/28/2023-01:35:27] [I] NVTX verbosity: 0 +[12/28/2023-01:35:27] [I] Persistent Cache Ratio: 0 +[12/28/2023-01:35:27] [I] Inputs: +[12/28/2023-01:35:27] [I] === Reporting Options === +[12/28/2023-01:35:27] [I] Verbose: Disabled +[12/28/2023-01:35:27] [I] Averages: 100 inferences +[12/28/2023-01:35:27] [I] Percentiles: 90,95,99 +[12/28/2023-01:35:27] [I] Dump refittable layers:Disabled +[12/28/2023-01:35:27] [I] Dump output: Disabled +[12/28/2023-01:35:27] [I] Profile: Disabled +[12/28/2023-01:35:27] [I] Export timing to JSON file: +[12/28/2023-01:35:27] [I] Export output to JSON file: +[12/28/2023-01:35:27] [I] Export profile to JSON file: +[12/28/2023-01:35:27] [I] +[12/28/2023-01:35:27] [I] === Device Information === +[12/28/2023-01:35:27] [I] Selected Device: Orin +[12/28/2023-01:35:27] [I] Compute Capability: 8.7 +[12/28/2023-01:35:27] [I] SMs: 8 +[12/28/2023-01:35:27] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-01:35:27] [I] Device Global Memory: 7471 MiB +[12/28/2023-01:35:27] [I] Shared Memory per SM: 164 KiB +[12/28/2023-01:35:27] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-01:35:27] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-01:35:27] [I] +[12/28/2023-01:35:27] [I] TensorRT version: 8.5.2 +[12/28/2023-01:35:32] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2988 (MiB) +[12/28/2023-01:35:37] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3292 (MiB) +[12/28/2023-01:35:37] [I] Start parsing network model +[12/28/2023-01:35:37] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-01:35:37] [I] [TRT] Input filename: yolo_nas_pose_s_fp16.onnx +[12/28/2023-01:35:37] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-01:35:37] [I] [TRT] Opset version: 17 +[12/28/2023-01:35:37] [I] [TRT] Producer name: pytorch +[12/28/2023-01:35:37] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-01:35:37] [I] [TRT] Domain: +[12/28/2023-01:35:37] [I] [TRT] Model version: 0 +[12/28/2023-01:35:37] [I] [TRT] Doc string: +[12/28/2023-01:35:37] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-01:35:38] [I] Finish parsing network model +[12/28/2023-01:35:38] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-01:35:38] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 394) [Constant] +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 395) [Constant] +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 396) [Constant] +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 208) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 239) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 262) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 293) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 316) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1...cast_boxes_to_fp32]} +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 398) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-01:35:38] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-01:35:51] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +395, now: CPU 1145, GPU 3709 (MiB) +[12/28/2023-01:35:54] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +79, now: CPU 1228, GPU 3788 (MiB) +[12/28/2023-01:35:54] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-03:02:18] [I] [TRT] Total Activation Memory: 7908156416 +[12/28/2023-03:02:19] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-03:02:30] [I] [TRT] Total Host Persistent Memory: 284224 +[12/28/2023-03:02:30] [I] [TRT] Total Device Persistent Memory: 77824 +[12/28/2023-03:02:30] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-03:02:30] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 35 MiB, GPU 3164 MiB +[12/28/2023-03:02:30] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 154 steps to complete. +[12/28/2023-03:02:31] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 68.0481ms to assign 14 blocks to 154 nodes requiring 140175360 bytes. +[12/28/2023-03:02:31] [I] [TRT] Total Activation Memory: 140175360 +[12/28/2023-03:02:38] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1594, GPU 5110 (MiB) +[12/28/2023-03:02:38] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +15, GPU +16, now: CPU 15, GPU 16 (MiB) +[12/28/2023-03:02:38] [I] Engine built in 5231.02 sec. +[12/28/2023-03:02:40] [I] [TRT] Loaded engine size: 17 MiB +[12/28/2023-03:02:40] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1261, GPU 4970 (MiB) +[12/28/2023-03:02:40] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +15, now: CPU 0, GPU 15 (MiB) +[12/28/2023-03:02:40] [I] Engine deserialized in 0.274266 sec. +[12/28/2023-03:02:40] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1262, GPU 4970 (MiB) +[12/28/2023-03:02:40] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +133, now: CPU 0, GPU 148 (MiB) +[12/28/2023-03:02:40] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-03:02:40] [I] Using random values for input onnx::Cast_0 +[12/28/2023-03:02:40] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-03:02:40] [I] Using random values for output graph2_flat_predictions +[12/28/2023-03:02:40] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-03:02:40] [I] Starting inference +[12/28/2023-03:02:55] [I] Warmup completed 2 queries over 200 ms +[12/28/2023-03:02:55] [I] Timing trace has 1271 queries over 15.0046 s +[12/28/2023-03:02:55] [I] +[12/28/2023-03:02:55] [I] === Trace details === +[12/28/2023-03:02:55] [I] Trace averages of 100 runs: +[12/28/2023-03:02:55] [I] Average on 100 runs - GPU latency: 11.6586 ms - Host latency: 11.7724 ms (enqueue 11.7397 ms) +[12/28/2023-03:02:55] [I] Average on 100 runs - GPU latency: 11.2151 ms - Host latency: 11.3271 ms (enqueue 11.2903 ms) +[12/28/2023-03:02:55] [I] Average on 100 runs - GPU latency: 11.3022 ms - Host latency: 11.4144 ms (enqueue 11.3786 ms) +[12/28/2023-03:02:55] [I] Average on 100 runs - GPU latency: 11.3059 ms - Host latency: 11.4191 ms (enqueue 11.3806 ms) +[12/28/2023-03:02:55] [I] Average on 100 runs - GPU latency: 11.2014 ms - Host latency: 11.3126 ms (enqueue 11.2798 ms) +[12/28/2023-03:02:55] [I] Average on 100 runs - GPU latency: 11.9142 ms - Host latency: 12.0286 ms (enqueue 11.9783 ms) +[12/28/2023-03:02:55] [I] Average on 100 runs - GPU latency: 12.0826 ms - Host latency: 12.1981 ms (enqueue 12.1441 ms) +[12/28/2023-03:02:55] [I] Average on 100 runs - GPU latency: 11.2947 ms - Host latency: 11.4059 ms (enqueue 11.3673 ms) +[12/28/2023-03:02:55] [I] Average on 100 runs - GPU latency: 11.5638 ms - Host latency: 11.6774 ms (enqueue 11.6403 ms) +[12/28/2023-03:02:55] [I] Average on 100 runs - GPU latency: 11.6864 ms - Host latency: 11.8001 ms (enqueue 11.7618 ms) +[12/28/2023-03:02:55] [I] Average on 100 runs - GPU latency: 12.0147 ms - Host latency: 12.1311 ms (enqueue 12.075 ms) +[12/28/2023-03:02:55] [I] Average on 100 runs - GPU latency: 12.1921 ms - Host latency: 12.3067 ms (enqueue 12.2635 ms) +[12/28/2023-03:02:55] [I] +[12/28/2023-03:02:55] [I] === Performance summary === +[12/28/2023-03:02:55] [I] Throughput: 84.7072 qps +[12/28/2023-03:02:55] [I] Latency: min = 10.8282 ms, max = 20.5541 ms, mean = 11.7696 ms, median = 11.7119 ms, percentile(90%) = 12.6196 ms, percentile(95%) = 12.8711 ms, percentile(99%) = 14.5762 ms +[12/28/2023-03:02:55] [I] Enqueue Time: min = 10.8013 ms, max = 20.772 ms, mean = 11.728 ms, median = 11.6685 ms, percentile(90%) = 12.5703 ms, percentile(95%) = 12.8066 ms, percentile(99%) = 14.187 ms +[12/28/2023-03:02:55] [I] H2D Latency: min = 0.0800781 ms, max = 0.193756 ms, mean = 0.0974861 ms, median = 0.0976562 ms, percentile(90%) = 0.0997314 ms, percentile(95%) = 0.100586 ms, percentile(99%) = 0.112061 ms +[12/28/2023-03:02:55] [I] GPU Compute Time: min = 10.7179 ms, max = 20.3231 ms, mean = 11.656 ms, median = 11.6025 ms, percentile(90%) = 12.5049 ms, percentile(95%) = 12.7568 ms, percentile(99%) = 14.4551 ms +[12/28/2023-03:02:55] [I] D2H Latency: min = 0.00292969 ms, max = 0.0439453 ms, mean = 0.0161257 ms, median = 0.0170898 ms, percentile(90%) = 0.0234375 ms, percentile(95%) = 0.0244141 ms, percentile(99%) = 0.0273438 ms +[12/28/2023-03:02:55] [I] Total Host Walltime: 15.0046 s +[12/28/2023-03:02:55] [I] Total GPU Compute Time: 14.8148 s +[12/28/2023-03:02:56] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-03:02:56] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp16.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp16.onnx.best.engine diff --git a/yolo_nas_pose_s_fp16.onnx.engine.err b/yolo_nas_pose_s_fp16.onnx.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..51df1b61ca5c5866ca44aa924d6b23901a55795d --- /dev/null +++ b/yolo_nas_pose_s_fp16.onnx.engine.err @@ -0,0 +1,8 @@ +[12/28/2023-00:45:37] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-00:45:37] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-00:45:37] [E] Error[4]: [network.cpp::validate::2925] Error Code 4: Internal Error (fp16 precision has been set for a layer or layer output, but fp16 is not configured in the builder) +[12/28/2023-00:45:37] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/28/2023-00:45:37] [E] Engine could not be created from network +[12/28/2023-00:45:37] [E] Building engine failed +[12/28/2023-00:45:37] [E] Failed to create engine from model or file. +[12/28/2023-00:45:37] [E] Engine set up failed diff --git a/yolo_nas_pose_s_fp16.onnx.engine.log b/yolo_nas_pose_s_fp16.onnx.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..6b069eb53e2e9010aae79f224c7af1831dcea35c --- /dev/null +++ b/yolo_nas_pose_s_fp16.onnx.engine.log @@ -0,0 +1,91 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp16.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp16.onnx.engine +[12/28/2023-00:45:28] [I] === Model Options === +[12/28/2023-00:45:28] [I] Format: ONNX +[12/28/2023-00:45:28] [I] Model: yolo_nas_pose_s_fp16.onnx +[12/28/2023-00:45:28] [I] Output: +[12/28/2023-00:45:28] [I] === Build Options === +[12/28/2023-00:45:28] [I] Max batch: explicit batch +[12/28/2023-00:45:28] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-00:45:28] [I] minTiming: 1 +[12/28/2023-00:45:28] [I] avgTiming: 8 +[12/28/2023-00:45:28] [I] Precision: FP32 +[12/28/2023-00:45:28] [I] LayerPrecisions: +[12/28/2023-00:45:28] [I] Calibration: +[12/28/2023-00:45:28] [I] Refit: Disabled +[12/28/2023-00:45:28] [I] Sparsity: Disabled +[12/28/2023-00:45:28] [I] Safe mode: Disabled +[12/28/2023-00:45:28] [I] DirectIO mode: Disabled +[12/28/2023-00:45:28] [I] Restricted mode: Disabled +[12/28/2023-00:45:28] [I] Build only: Disabled +[12/28/2023-00:45:28] [I] Save engine: yolo_nas_pose_s_fp16.onnx.engine +[12/28/2023-00:45:28] [I] Load engine: +[12/28/2023-00:45:28] [I] Profiling verbosity: 0 +[12/28/2023-00:45:28] [I] Tactic sources: Using default tactic sources +[12/28/2023-00:45:28] [I] timingCacheMode: local +[12/28/2023-00:45:28] [I] timingCacheFile: +[12/28/2023-00:45:28] [I] Heuristic: Disabled +[12/28/2023-00:45:28] [I] Preview Features: Use default preview flags. +[12/28/2023-00:45:28] [I] Input(s)s format: fp32:CHW +[12/28/2023-00:45:28] [I] Output(s)s format: fp32:CHW +[12/28/2023-00:45:28] [I] Input build shapes: model +[12/28/2023-00:45:28] [I] Input calibration shapes: model +[12/28/2023-00:45:28] [I] === System Options === +[12/28/2023-00:45:28] [I] Device: 0 +[12/28/2023-00:45:28] [I] DLACore: +[12/28/2023-00:45:28] [I] Plugins: +[12/28/2023-00:45:28] [I] === Inference Options === +[12/28/2023-00:45:28] [I] Batch: Explicit +[12/28/2023-00:45:28] [I] Input inference shapes: model +[12/28/2023-00:45:28] [I] Iterations: 10 +[12/28/2023-00:45:28] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-00:45:28] [I] Sleep time: 0ms +[12/28/2023-00:45:28] [I] Idle time: 0ms +[12/28/2023-00:45:28] [I] Streams: 1 +[12/28/2023-00:45:28] [I] ExposeDMA: Disabled +[12/28/2023-00:45:28] [I] Data transfers: Enabled +[12/28/2023-00:45:28] [I] Spin-wait: Disabled +[12/28/2023-00:45:28] [I] Multithreading: Disabled +[12/28/2023-00:45:28] [I] CUDA Graph: Disabled +[12/28/2023-00:45:28] [I] Separate profiling: Disabled +[12/28/2023-00:45:28] [I] Time Deserialize: Disabled +[12/28/2023-00:45:28] [I] Time Refit: Disabled +[12/28/2023-00:45:28] [I] NVTX verbosity: 0 +[12/28/2023-00:45:28] [I] Persistent Cache Ratio: 0 +[12/28/2023-00:45:28] [I] Inputs: +[12/28/2023-00:45:28] [I] === Reporting Options === +[12/28/2023-00:45:28] [I] Verbose: Disabled +[12/28/2023-00:45:28] [I] Averages: 100 inferences +[12/28/2023-00:45:28] [I] Percentiles: 90,95,99 +[12/28/2023-00:45:28] [I] Dump refittable layers:Disabled +[12/28/2023-00:45:28] [I] Dump output: Disabled +[12/28/2023-00:45:28] [I] Profile: Disabled +[12/28/2023-00:45:28] [I] Export timing to JSON file: +[12/28/2023-00:45:28] [I] Export output to JSON file: +[12/28/2023-00:45:28] [I] Export profile to JSON file: +[12/28/2023-00:45:28] [I] +[12/28/2023-00:45:28] [I] === Device Information === +[12/28/2023-00:45:28] [I] Selected Device: Orin +[12/28/2023-00:45:28] [I] Compute Capability: 8.7 +[12/28/2023-00:45:28] [I] SMs: 8 +[12/28/2023-00:45:28] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-00:45:28] [I] Device Global Memory: 7471 MiB +[12/28/2023-00:45:28] [I] Shared Memory per SM: 164 KiB +[12/28/2023-00:45:28] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-00:45:28] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-00:45:28] [I] +[12/28/2023-00:45:28] [I] TensorRT version: 8.5.2 +[12/28/2023-00:45:32] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2990 (MiB) +[12/28/2023-00:45:36] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +285, now: CPU 574, GPU 3296 (MiB) +[12/28/2023-00:45:36] [I] Start parsing network model +[12/28/2023-00:45:37] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-00:45:37] [I] [TRT] Input filename: yolo_nas_pose_s_fp16.onnx +[12/28/2023-00:45:37] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-00:45:37] [I] [TRT] Opset version: 17 +[12/28/2023-00:45:37] [I] [TRT] Producer name: pytorch +[12/28/2023-00:45:37] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-00:45:37] [I] [TRT] Domain: +[12/28/2023-00:45:37] [I] [TRT] Model version: 0 +[12/28/2023-00:45:37] [I] [TRT] Doc string: +[12/28/2023-00:45:37] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-00:45:37] [I] Finish parsing network model +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp16.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp16.onnx.engine diff --git a/yolo_nas_pose_s_fp16.onnx.fp16.engine b/yolo_nas_pose_s_fp16.onnx.fp16.engine new file mode 100644 index 0000000000000000000000000000000000000000..ef929e4d8934b2886b1b1a7428a48f4909da93eb --- /dev/null +++ b/yolo_nas_pose_s_fp16.onnx.fp16.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c531e1cabb57488b1b1b2d5add041401984c6f1659e66aa33394333a141b8aa +size 32503685 diff --git a/yolo_nas_pose_s_fp16.onnx.fp16.engine.err b/yolo_nas_pose_s_fp16.onnx.fp16.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..6cf8be184baaaf8e91a8c3ef816d61917b6a10a6 --- /dev/null +++ b/yolo_nas_pose_s_fp16.onnx.fp16.engine.err @@ -0,0 +1,123 @@ +[12/28/2023-00:45:41] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-00:45:41] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-00:48:10] [W] [TRT] Tactic Device request: 3160MB Available: 3019MB. Device memory is insufficient to use tactic. +[12/28/2023-00:48:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3160 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:48:11] [W] [TRT] Tactic Device request: 3160MB Available: 3019MB. Device memory is insufficient to use tactic. +[12/28/2023-00:48:11] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3160 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:48:11] [W] [TRT] Tactic Device request: 3160MB Available: 3019MB. Device memory is insufficient to use tactic. +[12/28/2023-00:48:11] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3160 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:48:12] [W] [TRT] Tactic Device request: 3152MB Available: 3015MB. Device memory is insufficient to use tactic. +[12/28/2023-00:48:12] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:48:12] [W] [TRT] Tactic Device request: 3152MB Available: 3013MB. Device memory is insufficient to use tactic. +[12/28/2023-00:48:12] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3152 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:52:57] [W] [TRT] Tactic Device request: 3143MB Available: 2814MB. Device memory is insufficient to use tactic. +[12/28/2023-00:52:57] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3143 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:52:57] [W] [TRT] Tactic Device request: 3143MB Available: 2814MB. Device memory is insufficient to use tactic. +[12/28/2023-00:52:57] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3143 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:53:00] [W] [TRT] Tactic Device request: 3136MB Available: 2800MB. Device memory is insufficient to use tactic. +[12/28/2023-00:53:00] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3136 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:53:00] [W] [TRT] Tactic Device request: 3136MB Available: 2800MB. Device memory is insufficient to use tactic. +[12/28/2023-00:53:00] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3136 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:53:01] [W] [TRT] Tactic Device request: 4711MB Available: 2799MB. Device memory is insufficient to use tactic. +[12/28/2023-00:53:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:53:01] [W] [TRT] Tactic Device request: 4711MB Available: 2799MB. Device memory is insufficient to use tactic. +[12/28/2023-00:53:01] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:53:01] [W] [TRT] Tactic Device request: 4711MB Available: 2799MB. Device memory is insufficient to use tactic. +[12/28/2023-00:53:01] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:53:03] [W] [TRT] Tactic Device request: 4701MB Available: 2800MB. Device memory is insufficient to use tactic. +[12/28/2023-00:53:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4701 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:53:03] [W] [TRT] Tactic Device request: 4701MB Available: 2800MB. Device memory is insufficient to use tactic. +[12/28/2023-00:53:03] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4701 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:53:20] [W] [TRT] Tactic Device request: 3152MB Available: 2797MB. Device memory is insufficient to use tactic. +[12/28/2023-00:53:20] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:53:20] [W] [TRT] Tactic Device request: 3152MB Available: 2797MB. Device memory is insufficient to use tactic. +[12/28/2023-00:53:20] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3152 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:53:20] [W] [TRT] Tactic Device request: 3152MB Available: 2797MB. Device memory is insufficient to use tactic. +[12/28/2023-00:53:20] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:53:21] [W] [TRT] Tactic Device request: 3148MB Available: 2798MB. Device memory is insufficient to use tactic. +[12/28/2023-00:53:21] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3148 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:53:21] [W] [TRT] Tactic Device request: 3148MB Available: 2798MB. Device memory is insufficient to use tactic. +[12/28/2023-00:53:21] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3148 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:59:12] [W] [TRT] Tactic Device request: 3144MB Available: 2679MB. Device memory is insufficient to use tactic. +[12/28/2023-00:59:12] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:59:12] [W] [TRT] Tactic Device request: 3144MB Available: 2679MB. Device memory is insufficient to use tactic. +[12/28/2023-00:59:12] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3144 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:59:13] [W] [TRT] Tactic Device request: 3144MB Available: 2679MB. Device memory is insufficient to use tactic. +[12/28/2023-00:59:13] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:59:14] [W] [TRT] Tactic Device request: 3140MB Available: 2678MB. Device memory is insufficient to use tactic. +[12/28/2023-00:59:14] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:59:14] [W] [TRT] Tactic Device request: 3140MB Available: 2678MB. Device memory is insufficient to use tactic. +[12/28/2023-00:59:14] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:59:15] [W] [TRT] Tactic Device request: 7056MB Available: 2679MB. Device memory is insufficient to use tactic. +[12/28/2023-00:59:15] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:59:15] [W] [TRT] Tactic Device request: 7056MB Available: 2679MB. Device memory is insufficient to use tactic. +[12/28/2023-00:59:15] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:59:15] [W] [TRT] Tactic Device request: 7056MB Available: 2679MB. Device memory is insufficient to use tactic. +[12/28/2023-00:59:15] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:59:17] [W] [TRT] Tactic Device request: 7050MB Available: 2679MB. Device memory is insufficient to use tactic. +[12/28/2023-00:59:17] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7050 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:59:17] [W] [TRT] Tactic Device request: 7050MB Available: 2679MB. Device memory is insufficient to use tactic. +[12/28/2023-00:59:17] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 7050 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:13:45] [W] [TRT] Tactic Device request: 3587MB Available: 2526MB. Device memory is insufficient to use tactic. +[12/28/2023-01:13:45] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:13:45] [W] [TRT] Tactic Device request: 3587MB Available: 2526MB. Device memory is insufficient to use tactic. +[12/28/2023-01:13:45] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:13:45] [W] [TRT] Tactic Device request: 3587MB Available: 2526MB. Device memory is insufficient to use tactic. +[12/28/2023-01:13:45] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:13:47] [W] [TRT] Tactic Device request: 3585MB Available: 2526MB. Device memory is insufficient to use tactic. +[12/28/2023-01:13:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3585 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:13:47] [W] [TRT] Tactic Device request: 3585MB Available: 2526MB. Device memory is insufficient to use tactic. +[12/28/2023-01:13:47] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3585 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:18:46] [W] [TRT] Tactic Device request: 3556MB Available: 2429MB. Device memory is insufficient to use tactic. +[12/28/2023-01:18:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:18:46] [W] [TRT] Tactic Device request: 3556MB Available: 2429MB. Device memory is insufficient to use tactic. +[12/28/2023-01:18:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:18:46] [W] [TRT] Tactic Device request: 3556MB Available: 2428MB. Device memory is insufficient to use tactic. +[12/28/2023-01:18:46] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:18:47] [W] [TRT] Tactic Device request: 3551MB Available: 2429MB. Device memory is insufficient to use tactic. +[12/28/2023-01:18:47] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3551 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:18:47] [W] [TRT] Tactic Device request: 3551MB Available: 2429MB. Device memory is insufficient to use tactic. +[12/28/2023-01:18:47] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3551 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-01:35:23] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-01:35:23] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-01:35:23] [W] * GPU compute time is unstable, with coefficient of variance = 3.12559%. +[12/28/2023-01:35:23] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_s_fp16.onnx.fp16.engine.log b/yolo_nas_pose_s_fp16.onnx.fp16.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..a92c136cd121628649db2590cb3ea24ee705aee9 --- /dev/null +++ b/yolo_nas_pose_s_fp16.onnx.fp16.engine.log @@ -0,0 +1,298 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp16.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp16.onnx.fp16.engine +[12/28/2023-00:45:37] [I] === Model Options === +[12/28/2023-00:45:37] [I] Format: ONNX +[12/28/2023-00:45:37] [I] Model: yolo_nas_pose_s_fp16.onnx +[12/28/2023-00:45:37] [I] Output: +[12/28/2023-00:45:37] [I] === Build Options === +[12/28/2023-00:45:37] [I] Max batch: explicit batch +[12/28/2023-00:45:37] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-00:45:37] [I] minTiming: 1 +[12/28/2023-00:45:37] [I] avgTiming: 8 +[12/28/2023-00:45:37] [I] Precision: FP32+FP16 +[12/28/2023-00:45:37] [I] LayerPrecisions: +[12/28/2023-00:45:37] [I] Calibration: +[12/28/2023-00:45:37] [I] Refit: Disabled +[12/28/2023-00:45:37] [I] Sparsity: Disabled +[12/28/2023-00:45:37] [I] Safe mode: Disabled +[12/28/2023-00:45:37] [I] DirectIO mode: Disabled +[12/28/2023-00:45:37] [I] Restricted mode: Disabled +[12/28/2023-00:45:37] [I] Build only: Disabled +[12/28/2023-00:45:37] [I] Save engine: yolo_nas_pose_s_fp16.onnx.fp16.engine +[12/28/2023-00:45:37] [I] Load engine: +[12/28/2023-00:45:37] [I] Profiling verbosity: 0 +[12/28/2023-00:45:37] [I] Tactic sources: Using default tactic sources +[12/28/2023-00:45:37] [I] timingCacheMode: local +[12/28/2023-00:45:37] [I] timingCacheFile: +[12/28/2023-00:45:37] [I] Heuristic: Disabled +[12/28/2023-00:45:37] [I] Preview Features: Use default preview flags. +[12/28/2023-00:45:37] [I] Input(s)s format: fp32:CHW +[12/28/2023-00:45:37] [I] Output(s)s format: fp32:CHW +[12/28/2023-00:45:37] [I] Input build shapes: model +[12/28/2023-00:45:37] [I] Input calibration shapes: model +[12/28/2023-00:45:37] [I] === System Options === +[12/28/2023-00:45:37] [I] Device: 0 +[12/28/2023-00:45:37] [I] DLACore: +[12/28/2023-00:45:37] [I] Plugins: +[12/28/2023-00:45:37] [I] === Inference Options === +[12/28/2023-00:45:37] [I] Batch: Explicit +[12/28/2023-00:45:37] [I] Input inference shapes: model +[12/28/2023-00:45:37] [I] Iterations: 10 +[12/28/2023-00:45:37] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-00:45:37] [I] Sleep time: 0ms +[12/28/2023-00:45:37] [I] Idle time: 0ms +[12/28/2023-00:45:37] [I] Streams: 1 +[12/28/2023-00:45:37] [I] ExposeDMA: Disabled +[12/28/2023-00:45:37] [I] Data transfers: Enabled +[12/28/2023-00:45:37] [I] Spin-wait: Disabled +[12/28/2023-00:45:37] [I] Multithreading: Disabled +[12/28/2023-00:45:37] [I] CUDA Graph: Disabled +[12/28/2023-00:45:37] [I] Separate profiling: Disabled +[12/28/2023-00:45:37] [I] Time Deserialize: Disabled +[12/28/2023-00:45:37] [I] Time Refit: Disabled +[12/28/2023-00:45:37] [I] NVTX verbosity: 0 +[12/28/2023-00:45:37] [I] Persistent Cache Ratio: 0 +[12/28/2023-00:45:37] [I] Inputs: +[12/28/2023-00:45:37] [I] === Reporting Options === +[12/28/2023-00:45:37] [I] Verbose: Disabled +[12/28/2023-00:45:37] [I] Averages: 100 inferences +[12/28/2023-00:45:37] [I] Percentiles: 90,95,99 +[12/28/2023-00:45:37] [I] Dump refittable layers:Disabled +[12/28/2023-00:45:37] [I] Dump output: Disabled +[12/28/2023-00:45:37] [I] Profile: Disabled +[12/28/2023-00:45:37] [I] Export timing to JSON file: +[12/28/2023-00:45:37] [I] Export output to JSON file: +[12/28/2023-00:45:37] [I] Export profile to JSON file: +[12/28/2023-00:45:37] [I] +[12/28/2023-00:45:38] [I] === Device Information === +[12/28/2023-00:45:38] [I] Selected Device: Orin +[12/28/2023-00:45:38] [I] Compute Capability: 8.7 +[12/28/2023-00:45:38] [I] SMs: 8 +[12/28/2023-00:45:38] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-00:45:38] [I] Device Global Memory: 7471 MiB +[12/28/2023-00:45:38] [I] Shared Memory per SM: 164 KiB +[12/28/2023-00:45:38] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-00:45:38] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-00:45:38] [I] +[12/28/2023-00:45:38] [I] TensorRT version: 8.5.2 +[12/28/2023-00:45:38] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2991 (MiB) +[12/28/2023-00:45:41] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3297 (MiB) +[12/28/2023-00:45:41] [I] Start parsing network model +[12/28/2023-00:45:41] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-00:45:41] [I] [TRT] Input filename: yolo_nas_pose_s_fp16.onnx +[12/28/2023-00:45:41] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-00:45:41] [I] [TRT] Opset version: 17 +[12/28/2023-00:45:41] [I] [TRT] Producer name: pytorch +[12/28/2023-00:45:41] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-00:45:41] [I] [TRT] Domain: +[12/28/2023-00:45:41] [I] [TRT] Model version: 0 +[12/28/2023-00:45:41] [I] [TRT] Doc string: +[12/28/2023-00:45:41] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-00:45:41] [I] Finish parsing network model +[12/28/2023-00:45:41] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-00:45:41] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 394) [Constant] +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 395) [Constant] +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 396) [Constant] +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 208) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 239) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 262) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 293) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 316) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1...cast_boxes_to_fp32]} +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 398) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-00:45:41] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-00:45:50] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +457, now: CPU 1145, GPU 3815 (MiB) +[12/28/2023-00:45:51] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +128, now: CPU 1227, GPU 3943 (MiB) +[12/28/2023-00:45:51] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-01:34:52] [I] [TRT] Total Activation Memory: 7949772800 +[12/28/2023-01:34:52] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-01:35:01] [I] [TRT] Total Host Persistent Memory: 318624 +[12/28/2023-01:35:01] [I] [TRT] Total Device Persistent Memory: 178176 +[12/28/2023-01:35:01] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-01:35:01] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 35 MiB, GPU 3147 MiB +[12/28/2023-01:35:01] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 157 steps to complete. +[12/28/2023-01:35:01] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 58.2404ms to assign 13 blocks to 157 nodes requiring 145584128 bytes. +[12/28/2023-01:35:01] [I] [TRT] Total Activation Memory: 145584128 +[12/28/2023-01:35:06] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -3, now: CPU 1574, GPU 4868 (MiB) +[12/28/2023-01:35:06] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +5, GPU +32, now: CPU 5, GPU 32 (MiB) +[12/28/2023-01:35:06] [I] Engine built in 2968.44 sec. +[12/28/2023-01:35:07] [I] [TRT] Loaded engine size: 30 MiB +[12/28/2023-01:35:07] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1272, GPU 4600 (MiB) +[12/28/2023-01:35:07] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +29, now: CPU 0, GPU 29 (MiB) +[12/28/2023-01:35:07] [I] Engine deserialized in 0.118536 sec. +[12/28/2023-01:35:07] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1272, GPU 4600 (MiB) +[12/28/2023-01:35:07] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +139, now: CPU 0, GPU 168 (MiB) +[12/28/2023-01:35:07] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-01:35:07] [I] Using random values for input onnx::Cast_0 +[12/28/2023-01:35:08] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-01:35:08] [I] Using random values for output graph2_flat_predictions +[12/28/2023-01:35:08] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-01:35:08] [I] Starting inference +[12/28/2023-01:35:23] [I] Warmup completed 12 queries over 200 ms +[12/28/2023-01:35:23] [I] Timing trace has 992 queries over 15.0269 s +[12/28/2023-01:35:23] [I] +[12/28/2023-01:35:23] [I] === Trace details === +[12/28/2023-01:35:23] [I] Trace averages of 100 runs: +[12/28/2023-01:35:23] [I] Average on 100 runs - GPU latency: 15.0474 ms - Host latency: 15.1595 ms (enqueue 15.1156 ms) +[12/28/2023-01:35:23] [I] Average on 100 runs - GPU latency: 14.8888 ms - Host latency: 15.0077 ms (enqueue 14.9605 ms) +[12/28/2023-01:35:23] [I] Average on 100 runs - GPU latency: 14.8946 ms - Host latency: 15.0065 ms (enqueue 14.97 ms) +[12/28/2023-01:35:23] [I] Average on 100 runs - GPU latency: 15.1381 ms - Host latency: 15.2551 ms (enqueue 15.1941 ms) +[12/28/2023-01:35:23] [I] Average on 100 runs - GPU latency: 14.9868 ms - Host latency: 15.0965 ms (enqueue 15.0594 ms) +[12/28/2023-01:35:23] [I] Average on 100 runs - GPU latency: 14.998 ms - Host latency: 15.1081 ms (enqueue 15.071 ms) +[12/28/2023-01:35:23] [I] Average on 100 runs - GPU latency: 15.0962 ms - Host latency: 15.2121 ms (enqueue 15.1554 ms) +[12/28/2023-01:35:23] [I] Average on 100 runs - GPU latency: 15.0497 ms - Host latency: 15.1646 ms (enqueue 15.116 ms) +[12/28/2023-01:35:23] [I] Average on 100 runs - GPU latency: 14.938 ms - Host latency: 15.0476 ms (enqueue 15.0057 ms) +[12/28/2023-01:35:23] [I] +[12/28/2023-01:35:23] [I] === Performance summary === +[12/28/2023-01:35:23] [I] Throughput: 66.0151 qps +[12/28/2023-01:35:23] [I] Latency: min = 14.3174 ms, max = 19.3745 ms, mean = 15.1176 ms, median = 14.9447 ms, percentile(90%) = 15.6328 ms, percentile(95%) = 15.7109 ms, percentile(99%) = 16.7114 ms +[12/28/2023-01:35:23] [I] Enqueue Time: min = 14.2823 ms, max = 19.3198 ms, mean = 15.0733 ms, median = 14.915 ms, percentile(90%) = 15.595 ms, percentile(95%) = 15.7031 ms, percentile(99%) = 16.5127 ms +[12/28/2023-01:35:23] [I] H2D Latency: min = 0.079834 ms, max = 0.15332 ms, mean = 0.0969547 ms, median = 0.0979004 ms, percentile(90%) = 0.100586 ms, percentile(95%) = 0.100586 ms, percentile(99%) = 0.112305 ms +[12/28/2023-01:35:23] [I] GPU Compute Time: min = 14.2117 ms, max = 19.25 ms, mean = 15.0044 ms, median = 14.8321 ms, percentile(90%) = 15.5146 ms, percentile(95%) = 15.5957 ms, percentile(99%) = 16.6123 ms +[12/28/2023-01:35:23] [I] D2H Latency: min = 0.00292969 ms, max = 0.088623 ms, mean = 0.0162415 ms, median = 0.0126953 ms, percentile(90%) = 0.0253906 ms, percentile(95%) = 0.0263672 ms, percentile(99%) = 0.0429688 ms +[12/28/2023-01:35:23] [I] Total Host Walltime: 15.0269 s +[12/28/2023-01:35:23] [I] Total GPU Compute Time: 14.8843 s +[12/28/2023-01:35:23] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-01:35:23] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp16.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp16.onnx.fp16.engine diff --git a/yolo_nas_pose_s_fp16.onnx.int8.engine.err b/yolo_nas_pose_s_fp16.onnx.int8.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..33c6f49ff8f30e7ca71e1466714f69b31c898a1e --- /dev/null +++ b/yolo_nas_pose_s_fp16.onnx.int8.engine.err @@ -0,0 +1,8 @@ +[12/28/2023-03:03:11] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-03:03:11] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-03:03:11] [E] Error[4]: [network.cpp::validate::2925] Error Code 4: Internal Error (fp16 precision has been set for a layer or layer output, but fp16 is not configured in the builder) +[12/28/2023-03:03:11] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/28/2023-03:03:11] [E] Engine could not be created from network +[12/28/2023-03:03:11] [E] Building engine failed +[12/28/2023-03:03:11] [E] Failed to create engine from model or file. +[12/28/2023-03:03:11] [E] Engine set up failed diff --git a/yolo_nas_pose_s_fp16.onnx.int8.engine.log b/yolo_nas_pose_s_fp16.onnx.int8.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..bc709d1842b4aa7b2807c6b76f2b98428d46dd23 --- /dev/null +++ b/yolo_nas_pose_s_fp16.onnx.int8.engine.log @@ -0,0 +1,92 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp16.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp16.onnx.int8.engine +[12/28/2023-03:03:01] [I] === Model Options === +[12/28/2023-03:03:01] [I] Format: ONNX +[12/28/2023-03:03:01] [I] Model: yolo_nas_pose_s_fp16.onnx +[12/28/2023-03:03:01] [I] Output: +[12/28/2023-03:03:01] [I] === Build Options === +[12/28/2023-03:03:01] [I] Max batch: explicit batch +[12/28/2023-03:03:01] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-03:03:01] [I] minTiming: 1 +[12/28/2023-03:03:01] [I] avgTiming: 8 +[12/28/2023-03:03:01] [I] Precision: FP32+INT8 +[12/28/2023-03:03:01] [I] LayerPrecisions: +[12/28/2023-03:03:01] [I] Calibration: Dynamic +[12/28/2023-03:03:01] [I] Refit: Disabled +[12/28/2023-03:03:01] [I] Sparsity: Disabled +[12/28/2023-03:03:01] [I] Safe mode: Disabled +[12/28/2023-03:03:01] [I] DirectIO mode: Disabled +[12/28/2023-03:03:01] [I] Restricted mode: Disabled +[12/28/2023-03:03:01] [I] Build only: Disabled +[12/28/2023-03:03:01] [I] Save engine: yolo_nas_pose_s_fp16.onnx.int8.engine +[12/28/2023-03:03:01] [I] Load engine: +[12/28/2023-03:03:01] [I] Profiling verbosity: 0 +[12/28/2023-03:03:01] [I] Tactic sources: Using default tactic sources +[12/28/2023-03:03:01] [I] timingCacheMode: local +[12/28/2023-03:03:01] [I] timingCacheFile: +[12/28/2023-03:03:01] [I] Heuristic: Disabled +[12/28/2023-03:03:01] [I] Preview Features: Use default preview flags. +[12/28/2023-03:03:01] [I] Input(s)s format: fp32:CHW +[12/28/2023-03:03:01] [I] Output(s)s format: fp32:CHW +[12/28/2023-03:03:01] [I] Input build shapes: model +[12/28/2023-03:03:01] [I] Input calibration shapes: model +[12/28/2023-03:03:01] [I] === System Options === +[12/28/2023-03:03:01] [I] Device: 0 +[12/28/2023-03:03:01] [I] DLACore: +[12/28/2023-03:03:01] [I] Plugins: +[12/28/2023-03:03:01] [I] === Inference Options === +[12/28/2023-03:03:01] [I] Batch: Explicit +[12/28/2023-03:03:01] [I] Input inference shapes: model +[12/28/2023-03:03:01] [I] Iterations: 10 +[12/28/2023-03:03:01] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-03:03:01] [I] Sleep time: 0ms +[12/28/2023-03:03:01] [I] Idle time: 0ms +[12/28/2023-03:03:01] [I] Streams: 1 +[12/28/2023-03:03:01] [I] ExposeDMA: Disabled +[12/28/2023-03:03:01] [I] Data transfers: Enabled +[12/28/2023-03:03:01] [I] Spin-wait: Disabled +[12/28/2023-03:03:01] [I] Multithreading: Disabled +[12/28/2023-03:03:01] [I] CUDA Graph: Disabled +[12/28/2023-03:03:01] [I] Separate profiling: Disabled +[12/28/2023-03:03:01] [I] Time Deserialize: Disabled +[12/28/2023-03:03:01] [I] Time Refit: Disabled +[12/28/2023-03:03:01] [I] NVTX verbosity: 0 +[12/28/2023-03:03:01] [I] Persistent Cache Ratio: 0 +[12/28/2023-03:03:01] [I] Inputs: +[12/28/2023-03:03:01] [I] === Reporting Options === +[12/28/2023-03:03:01] [I] Verbose: Disabled +[12/28/2023-03:03:01] [I] Averages: 100 inferences +[12/28/2023-03:03:01] [I] Percentiles: 90,95,99 +[12/28/2023-03:03:01] [I] Dump refittable layers:Disabled +[12/28/2023-03:03:01] [I] Dump output: Disabled +[12/28/2023-03:03:01] [I] Profile: Disabled +[12/28/2023-03:03:01] [I] Export timing to JSON file: +[12/28/2023-03:03:01] [I] Export output to JSON file: +[12/28/2023-03:03:01] [I] Export profile to JSON file: +[12/28/2023-03:03:01] [I] +[12/28/2023-03:03:01] [I] === Device Information === +[12/28/2023-03:03:01] [I] Selected Device: Orin +[12/28/2023-03:03:01] [I] Compute Capability: 8.7 +[12/28/2023-03:03:01] [I] SMs: 8 +[12/28/2023-03:03:01] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-03:03:01] [I] Device Global Memory: 7471 MiB +[12/28/2023-03:03:01] [I] Shared Memory per SM: 164 KiB +[12/28/2023-03:03:01] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-03:03:01] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-03:03:01] [I] +[12/28/2023-03:03:01] [I] TensorRT version: 8.5.2 +[12/28/2023-03:03:06] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3026 (MiB) +[12/28/2023-03:03:10] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +285, now: CPU 574, GPU 3328 (MiB) +[12/28/2023-03:03:10] [I] Start parsing network model +[12/28/2023-03:03:11] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-03:03:11] [I] [TRT] Input filename: yolo_nas_pose_s_fp16.onnx +[12/28/2023-03:03:11] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-03:03:11] [I] [TRT] Opset version: 17 +[12/28/2023-03:03:11] [I] [TRT] Producer name: pytorch +[12/28/2023-03:03:11] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-03:03:11] [I] [TRT] Domain: +[12/28/2023-03:03:11] [I] [TRT] Model version: 0 +[12/28/2023-03:03:11] [I] [TRT] Doc string: +[12/28/2023-03:03:11] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-03:03:11] [I] Finish parsing network model +[12/28/2023-03:03:11] [I] FP32 and INT8 precisions have been specified - more performance might be enabled by additionally specifying --fp16 or --best +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp16.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp16.onnx.int8.engine diff --git a/yolo_nas_pose_s_fp16.onnx.usage.txt b/yolo_nas_pose_s_fp16.onnx.usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..5543aa709d5b92cc6e776796bea6866e2e67fbfd --- /dev/null +++ b/yolo_nas_pose_s_fp16.onnx.usage.txt @@ -0,0 +1,58 @@ + +Model exported successfully to yolo_nas_pose_s_fp16.onnx +Model expects input image of shape [1, 3, 640, 640] +Input image dtype is torch.uint8 + +Exported model already contains preprocessing (normalization) step, so you don't need to do it manually. +Preprocessing steps to be applied to input image are: +Sequential( + (0): CastTensorTo(dtype=torch.float16) + (1): ChannelSelect(channels_indexes=tensor([2, 1, 0], device='cuda:0')) + (2): ApplyMeanStd(mean=[0.], scale=[255.]) +) + + +Exported model contains postprocessing (NMS) step with the following parameters: + num_pre_nms_predictions=1000 + max_predictions_per_image=10 + nms_threshold=0.5 + confidence_threshold=0.15 + output_predictions_format=flat + + +Exported model is in ONNX format and can be used with ONNXRuntime +To run inference with ONNXRuntime, please use the following code snippet: + + import onnxruntime + import numpy as np + session = onnxruntime.InferenceSession("yolo_nas_pose_s_fp16.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + example_input_image = np.zeros((1, 3, 640, 640)).astype(np.uint8) + predictions = session.run(outputs, {inputs[0]: example_input_image}) + +Exported model can also be used with TensorRT +To run inference with TensorRT, please see TensorRT deployment documentation +You can benchmark the model using the following code snippet: + + trtexec --onnx=yolo_nas_pose_s_fp16.onnx --fp16 --avgRuns=100 --duration=15 + + +Exported model has predictions in flat format: + +# flat_predictions is a 2D array of [N,K] shape +# Each row represents (image_index, x_min, y_min, x_max, y_max, confidence, joints...) +# Please note all values are floats, so you have to convert them to integers if needed + +[flat_predictions] = predictions +pred_bboxes = flat_predictions[:, 1:5] +pred_scores = flat_predictions[:, 5] +pred_joints = flat_predictions[:, 6:].reshape((len(pred_bboxes), -1, 3)) +for i in range(len(pred_bboxes)): + confidence = pred_scores[i] + x_min, y_min, x_max, y_max = pred_bboxes[i] + print(f"Detected pose with confidence={{confidence}}, x_min={{x_min}}, y_min={{y_min}}, x_max={{x_max}}, y_max={{y_max}}") + for joint_index, (x, y, confidence) in enumerate(pred_joints[i]):") + print(f"Joint {{joint_index}} has coordinates x={{x}}, y={{y}}, confidence={{confidence}}") + diff --git a/yolo_nas_pose_s_fp32.onnx b/yolo_nas_pose_s_fp32.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c64fb135948166ef94813c9a1d4485ddb8a27bc0 --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c940f729387bebd4f4ffc1f34df78f495ca7f0b4e8099e575c88ca5185e0f6c +size 61676424 diff --git a/yolo_nas_pose_s_fp32.onnx.best.engine b/yolo_nas_pose_s_fp32.onnx.best.engine new file mode 100644 index 0000000000000000000000000000000000000000..e51fc508f2c203622ad457c474cc2a13f9cda2e1 --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.best.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba2cdb513e36fa01685b210650d3954ae3c22f2472fe2e9fdfac9193cb3a5a26 +size 17781881 diff --git a/yolo_nas_pose_s_fp32.onnx.best.engine.err b/yolo_nas_pose_s_fp32.onnx.best.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..2b0e5df88432311ef2253bfa5009638244aa2289 --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.best.engine.err @@ -0,0 +1,309 @@ +[12/27/2023-22:36:28] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-22:36:28] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-22:36:29] [W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool. +[12/27/2023-22:49:00] [W] [TRT] Tactic Device request: 3160MB Available: 3105MB. Device memory is insufficient to use tactic. +[12/27/2023-22:49:00] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3160 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:49:00] [W] [TRT] Tactic Device request: 3160MB Available: 3105MB. Device memory is insufficient to use tactic. +[12/27/2023-22:49:00] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3160 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:49:00] [W] [TRT] Tactic Device request: 3160MB Available: 3105MB. Device memory is insufficient to use tactic. +[12/27/2023-22:49:00] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3160 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:49:01] [W] [TRT] Tactic Device request: 3152MB Available: 3104MB. Device memory is insufficient to use tactic. +[12/27/2023-22:49:02] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:49:02] [W] [TRT] Tactic Device request: 3152MB Available: 3104MB. Device memory is insufficient to use tactic. +[12/27/2023-22:49:02] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3152 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:56:33] [W] [TRT] Tactic Device request: 3143MB Available: 2914MB. Device memory is insufficient to use tactic. +[12/27/2023-22:56:33] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3143 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:56:33] [W] [TRT] Tactic Device request: 3143MB Available: 2914MB. Device memory is insufficient to use tactic. +[12/27/2023-22:56:33] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3143 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:56:33] [W] [TRT] Tactic Device request: 3143MB Available: 2914MB. Device memory is insufficient to use tactic. +[12/27/2023-22:56:33] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3143 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:56:35] [W] [TRT] Tactic Device request: 3136MB Available: 2914MB. Device memory is insufficient to use tactic. +[12/27/2023-22:56:35] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3136 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:56:35] [W] [TRT] Tactic Device request: 3136MB Available: 2914MB. Device memory is insufficient to use tactic. +[12/27/2023-22:56:35] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3136 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:56:37] [W] [TRT] Tactic Device request: 4711MB Available: 2914MB. Device memory is insufficient to use tactic. +[12/27/2023-22:56:37] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:56:37] [W] [TRT] Tactic Device request: 4711MB Available: 2914MB. Device memory is insufficient to use tactic. +[12/27/2023-22:56:37] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:56:37] [W] [TRT] Tactic Device request: 4711MB Available: 2914MB. Device memory is insufficient to use tactic. +[12/27/2023-22:56:37] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:56:39] [W] [TRT] Tactic Device request: 4701MB Available: 2915MB. Device memory is insufficient to use tactic. +[12/27/2023-22:56:39] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4701 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:56:39] [W] [TRT] Tactic Device request: 4701MB Available: 2915MB. Device memory is insufficient to use tactic. +[12/27/2023-22:56:39] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4701 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:57:00] [W] [TRT] Tactic Device request: 3152MB Available: 2916MB. Device memory is insufficient to use tactic. +[12/27/2023-22:57:00] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:57:00] [W] [TRT] Tactic Device request: 3152MB Available: 2916MB. Device memory is insufficient to use tactic. +[12/27/2023-22:57:00] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3152 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:57:00] [W] [TRT] Tactic Device request: 3152MB Available: 2916MB. Device memory is insufficient to use tactic. +[12/27/2023-22:57:00] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:57:01] [W] [TRT] Tactic Device request: 3148MB Available: 2916MB. Device memory is insufficient to use tactic. +[12/27/2023-22:57:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3148 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:57:01] [W] [TRT] Tactic Device request: 3148MB Available: 2916MB. Device memory is insufficient to use tactic. +[12/27/2023-22:57:01] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3148 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:06:43] [W] [TRT] Tactic Device request: 3144MB Available: 2894MB. Device memory is insufficient to use tactic. +[12/27/2023-23:06:43] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:06:43] [W] [TRT] Tactic Device request: 3144MB Available: 2894MB. Device memory is insufficient to use tactic. +[12/27/2023-23:06:43] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3144 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:06:43] [W] [TRT] Tactic Device request: 3144MB Available: 2894MB. Device memory is insufficient to use tactic. +[12/27/2023-23:06:43] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:06:44] [W] [TRT] Tactic Device request: 3140MB Available: 2893MB. Device memory is insufficient to use tactic. +[12/27/2023-23:06:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:06:44] [W] [TRT] Tactic Device request: 3140MB Available: 2893MB. Device memory is insufficient to use tactic. +[12/27/2023-23:06:44] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:06:46] [W] [TRT] Tactic Device request: 7056MB Available: 2892MB. Device memory is insufficient to use tactic. +[12/27/2023-23:06:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:06:46] [W] [TRT] Tactic Device request: 7056MB Available: 2892MB. Device memory is insufficient to use tactic. +[12/27/2023-23:06:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:06:46] [W] [TRT] Tactic Device request: 7056MB Available: 2892MB. Device memory is insufficient to use tactic. +[12/27/2023-23:06:46] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:06:48] [W] [TRT] Tactic Device request: 7050MB Available: 2899MB. Device memory is insufficient to use tactic. +[12/27/2023-23:06:48] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7050 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:06:48] [W] [TRT] Tactic Device request: 7050MB Available: 2899MB. Device memory is insufficient to use tactic. +[12/27/2023-23:06:48] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 7050 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:07:11] [W] [TRT] Tactic Device request: 2392MB Available: 2278MB. Device memory is insufficient to use tactic. +[12/27/2023-23:07:11] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:07:11] [W] [TRT] Tactic Device request: 2392MB Available: 2278MB. Device memory is insufficient to use tactic. +[12/27/2023-23:07:11] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:07:12] [W] [TRT] Tactic Device request: 2390MB Available: 2276MB. Device memory is insufficient to use tactic. +[12/27/2023-23:07:12] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:07:12] [W] [TRT] Tactic Device request: 2390MB Available: 2276MB. Device memory is insufficient to use tactic. +[12/27/2023-23:07:12] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:22] [W] [TRT] Tactic Device request: 2385MB Available: 2013MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:22] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:22] [W] [TRT] Tactic Device request: 2385MB Available: 2013MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:22] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:22] [W] [TRT] Tactic Device request: 2385MB Available: 2014MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:22] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:23] [W] [TRT] Tactic Device request: 2384MB Available: 2013MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:23] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2384 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:23] [W] [TRT] Tactic Device request: 2384MB Available: 2013MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:23] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2384 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:25] [W] [TRT] Tactic Device request: 2394MB Available: 2022MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:25] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:25] [W] [TRT] Tactic Device request: 2394MB Available: 2022MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:25] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:25] [W] [TRT] Tactic Device request: 2394MB Available: 2022MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:25] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:26] [W] [TRT] Tactic Device request: 2392MB Available: 2022MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:26] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:26] [W] [TRT] Tactic Device request: 2392MB Available: 2022MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:26] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:26] [W] [TRT] Tactic Device request: 2392MB Available: 2022MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:26] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:27] [W] [TRT] Tactic Device request: 2391MB Available: 2022MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:27] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2391 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:27] [W] [TRT] Tactic Device request: 2391MB Available: 2022MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:28] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2391 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:28] [W] [TRT] Tactic Device request: 2390MB Available: 2022MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:28] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:28] [W] [TRT] Tactic Device request: 2390MB Available: 2022MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:28] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:45] [W] [TRT] Tactic Device request: 2457MB Available: 1983MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:45] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:45] [W] [TRT] Tactic Device request: 2457MB Available: 1983MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:45] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:45] [W] [TRT] Tactic Device request: 2457MB Available: 1984MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:45] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:46] [W] [TRT] Tactic Device request: 2456MB Available: 1984MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:21:46] [W] [TRT] Tactic Device request: 2456MB Available: 1984MB. Device memory is insufficient to use tactic. +[12/27/2023-23:21:46] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:29:29] [W] [TRT] Tactic Device request: 2454MB Available: 1853MB. Device memory is insufficient to use tactic. +[12/27/2023-23:29:29] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:29:29] [W] [TRT] Tactic Device request: 2454MB Available: 1853MB. Device memory is insufficient to use tactic. +[12/27/2023-23:29:29] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2454 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:29:29] [W] [TRT] Tactic Device request: 2454MB Available: 1853MB. Device memory is insufficient to use tactic. +[12/27/2023-23:29:29] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:29:31] [W] [TRT] Tactic Device request: 2453MB Available: 1853MB. Device memory is insufficient to use tactic. +[12/27/2023-23:29:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2453 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:29:31] [W] [TRT] Tactic Device request: 2453MB Available: 1853MB. Device memory is insufficient to use tactic. +[12/27/2023-23:29:31] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2453 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:29:33] [W] [TRT] Tactic Device request: 2457MB Available: 1852MB. Device memory is insufficient to use tactic. +[12/27/2023-23:29:33] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:29:33] [W] [TRT] Tactic Device request: 2457MB Available: 1852MB. Device memory is insufficient to use tactic. +[12/27/2023-23:29:33] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:29:33] [W] [TRT] Tactic Device request: 2457MB Available: 1852MB. Device memory is insufficient to use tactic. +[12/27/2023-23:29:33] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:29:34] [W] [TRT] Tactic Device request: 2456MB Available: 1853MB. Device memory is insufficient to use tactic. +[12/27/2023-23:29:34] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:29:34] [W] [TRT] Tactic Device request: 2456MB Available: 1853MB. Device memory is insufficient to use tactic. +[12/27/2023-23:29:34] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:30:02] [W] [TRT] Tactic Device request: 3587MB Available: 1885MB. Device memory is insufficient to use tactic. +[12/27/2023-23:30:02] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:30:02] [W] [TRT] Tactic Device request: 3587MB Available: 1886MB. Device memory is insufficient to use tactic. +[12/27/2023-23:30:02] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:30:02] [W] [TRT] Tactic Device request: 3587MB Available: 1886MB. Device memory is insufficient to use tactic. +[12/27/2023-23:30:02] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:30:03] [W] [TRT] Tactic Device request: 3585MB Available: 1890MB. Device memory is insufficient to use tactic. +[12/27/2023-23:30:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3585 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:30:03] [W] [TRT] Tactic Device request: 3585MB Available: 1890MB. Device memory is insufficient to use tactic. +[12/27/2023-23:30:03] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3585 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:38:09] [W] [TRT] Tactic Device request: 3556MB Available: 1733MB. Device memory is insufficient to use tactic. +[12/27/2023-23:38:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:38:09] [W] [TRT] Tactic Device request: 3556MB Available: 1733MB. Device memory is insufficient to use tactic. +[12/27/2023-23:38:09] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:38:09] [W] [TRT] Tactic Device request: 3556MB Available: 1733MB. Device memory is insufficient to use tactic. +[12/27/2023-23:38:09] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:38:11] [W] [TRT] Tactic Device request: 3551MB Available: 1734MB. Device memory is insufficient to use tactic. +[12/27/2023-23:38:11] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3551 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:38:11] [W] [TRT] Tactic Device request: 3551MB Available: 1734MB. Device memory is insufficient to use tactic. +[12/27/2023-23:38:11] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3551 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:09] [W] [TRT] Tactic Device request: 2385MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:09] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:09] [W] [TRT] Tactic Device request: 2385MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:09] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:09] [W] [TRT] Tactic Device request: 2385MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:09] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:10] [W] [TRT] Tactic Device request: 2384MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2384 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:10] [W] [TRT] Tactic Device request: 2384MB Available: 1679MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:10] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2384 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:34] [W] [TRT] Tactic Device request: 2126MB Available: 1652MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:34] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2126 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:35] [W] [TRT] Tactic Device request: 2126MB Available: 1652MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:35] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2126 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:35] [W] [TRT] Tactic Device request: 2126MB Available: 1653MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:35] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2126 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:38] [W] [TRT] Tactic Device request: 2124MB Available: 1653MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:38] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:38] [W] [TRT] Tactic Device request: 2124MB Available: 1652MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:38] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:41] [W] [TRT] Tactic Device request: 2125MB Available: 1652MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:41] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:42] [W] [TRT] Tactic Device request: 2125MB Available: 1653MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:42] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2125 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:42] [W] [TRT] Tactic Device request: 2125MB Available: 1653MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:42] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:45] [W] [TRT] Tactic Device request: 2124MB Available: 1653MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:45] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:45] [W] [TRT] Tactic Device request: 2124MB Available: 1652MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:45] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:53:59] [W] [TRT] Tactic Device request: 2125MB Available: 1653MB. Device memory is insufficient to use tactic. +[12/27/2023-23:53:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:54:00] [W] [TRT] Tactic Device request: 2125MB Available: 1652MB. Device memory is insufficient to use tactic. +[12/27/2023-23:54:00] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2125 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:54:00] [W] [TRT] Tactic Device request: 2125MB Available: 1654MB. Device memory is insufficient to use tactic. +[12/27/2023-23:54:00] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:54:03] [W] [TRT] Tactic Device request: 2124MB Available: 1653MB. Device memory is insufficient to use tactic. +[12/27/2023-23:54:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-23:54:03] [W] [TRT] Tactic Device request: 2124MB Available: 1653MB. Device memory is insufficient to use tactic. +[12/27/2023-23:54:03] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:01:52] [W] [TRT] Tactic Device request: 1637MB Available: 1510MB. Device memory is insufficient to use tactic. +[12/28/2023-00:01:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:01:52] [W] [TRT] Tactic Device request: 1637MB Available: 1510MB. Device memory is insufficient to use tactic. +[12/28/2023-00:01:52] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 1637 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:01:52] [W] [TRT] Tactic Device request: 1637MB Available: 1510MB. Device memory is insufficient to use tactic. +[12/28/2023-00:01:52] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 1637 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:01:53] [W] [TRT] Tactic Device request: 1636MB Available: 1511MB. Device memory is insufficient to use tactic. +[12/28/2023-00:01:53] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 1636 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:01:53] [W] [TRT] Tactic Device request: 1636MB Available: 1510MB. Device memory is insufficient to use tactic. +[12/28/2023-00:01:53] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 1636 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:03:10] [W] [TRT] TensorRT encountered issues when converting weights between types and that could affect accuracy. +[12/28/2023-00:03:10] [W] [TRT] If this is not the desired behavior, please modify the weights or retrain with regularization to adjust the magnitude of the weights. +[12/28/2023-00:03:10] [W] [TRT] Check verbose logs for the list of affected weights. +[12/28/2023-00:03:10] [W] [TRT] - 96 weights are affected by this issue: Detected subnormal FP16 values. +[12/28/2023-00:03:10] [W] [TRT] - 17 weights are affected by this issue: Detected values less than smallest positive FP16 subnormal value and converted them to the FP16 minimum subnormalized value. +[12/28/2023-00:03:27] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-00:03:27] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-00:03:27] [W] * GPU compute time is unstable, with coefficient of variance = 5.52789%. +[12/28/2023-00:03:27] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_s_fp32.onnx.best.engine.log b/yolo_nas_pose_s_fp32.onnx.best.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..ca15d554ac2a5027e0d32c66d516ca89325737b1 --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.best.engine.log @@ -0,0 +1,301 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp32.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp32.onnx.best.engine +[12/27/2023-22:36:19] [I] === Model Options === +[12/27/2023-22:36:19] [I] Format: ONNX +[12/27/2023-22:36:19] [I] Model: yolo_nas_pose_s_fp32.onnx +[12/27/2023-22:36:19] [I] Output: +[12/27/2023-22:36:19] [I] === Build Options === +[12/27/2023-22:36:19] [I] Max batch: explicit batch +[12/27/2023-22:36:19] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-22:36:19] [I] minTiming: 1 +[12/27/2023-22:36:19] [I] avgTiming: 8 +[12/27/2023-22:36:19] [I] Precision: FP32+FP16+INT8 +[12/27/2023-22:36:19] [I] LayerPrecisions: +[12/27/2023-22:36:19] [I] Calibration: Dynamic +[12/27/2023-22:36:19] [I] Refit: Disabled +[12/27/2023-22:36:19] [I] Sparsity: Disabled +[12/27/2023-22:36:19] [I] Safe mode: Disabled +[12/27/2023-22:36:19] [I] DirectIO mode: Disabled +[12/27/2023-22:36:19] [I] Restricted mode: Disabled +[12/27/2023-22:36:19] [I] Build only: Disabled +[12/27/2023-22:36:19] [I] Save engine: yolo_nas_pose_s_fp32.onnx.best.engine +[12/27/2023-22:36:19] [I] Load engine: +[12/27/2023-22:36:19] [I] Profiling verbosity: 0 +[12/27/2023-22:36:19] [I] Tactic sources: Using default tactic sources +[12/27/2023-22:36:19] [I] timingCacheMode: local +[12/27/2023-22:36:19] [I] timingCacheFile: +[12/27/2023-22:36:19] [I] Heuristic: Disabled +[12/27/2023-22:36:19] [I] Preview Features: Use default preview flags. +[12/27/2023-22:36:19] [I] Input(s)s format: fp32:CHW +[12/27/2023-22:36:19] [I] Output(s)s format: fp32:CHW +[12/27/2023-22:36:19] [I] Input build shapes: model +[12/27/2023-22:36:19] [I] Input calibration shapes: model +[12/27/2023-22:36:19] [I] === System Options === +[12/27/2023-22:36:19] [I] Device: 0 +[12/27/2023-22:36:19] [I] DLACore: +[12/27/2023-22:36:19] [I] Plugins: +[12/27/2023-22:36:19] [I] === Inference Options === +[12/27/2023-22:36:19] [I] Batch: Explicit +[12/27/2023-22:36:19] [I] Input inference shapes: model +[12/27/2023-22:36:19] [I] Iterations: 10 +[12/27/2023-22:36:19] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-22:36:19] [I] Sleep time: 0ms +[12/27/2023-22:36:19] [I] Idle time: 0ms +[12/27/2023-22:36:19] [I] Streams: 1 +[12/27/2023-22:36:19] [I] ExposeDMA: Disabled +[12/27/2023-22:36:19] [I] Data transfers: Enabled +[12/27/2023-22:36:19] [I] Spin-wait: Disabled +[12/27/2023-22:36:19] [I] Multithreading: Disabled +[12/27/2023-22:36:19] [I] CUDA Graph: Disabled +[12/27/2023-22:36:19] [I] Separate profiling: Disabled +[12/27/2023-22:36:19] [I] Time Deserialize: Disabled +[12/27/2023-22:36:19] [I] Time Refit: Disabled +[12/27/2023-22:36:19] [I] NVTX verbosity: 0 +[12/27/2023-22:36:19] [I] Persistent Cache Ratio: 0 +[12/27/2023-22:36:19] [I] Inputs: +[12/27/2023-22:36:19] [I] === Reporting Options === +[12/27/2023-22:36:19] [I] Verbose: Disabled +[12/27/2023-22:36:19] [I] Averages: 100 inferences +[12/27/2023-22:36:19] [I] Percentiles: 90,95,99 +[12/27/2023-22:36:19] [I] Dump refittable layers:Disabled +[12/27/2023-22:36:19] [I] Dump output: Disabled +[12/27/2023-22:36:19] [I] Profile: Disabled +[12/27/2023-22:36:19] [I] Export timing to JSON file: +[12/27/2023-22:36:19] [I] Export output to JSON file: +[12/27/2023-22:36:19] [I] Export profile to JSON file: +[12/27/2023-22:36:19] [I] +[12/27/2023-22:36:20] [I] === Device Information === +[12/27/2023-22:36:20] [I] Selected Device: Orin +[12/27/2023-22:36:20] [I] Compute Capability: 8.7 +[12/27/2023-22:36:20] [I] SMs: 8 +[12/27/2023-22:36:20] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-22:36:20] [I] Device Global Memory: 7471 MiB +[12/27/2023-22:36:20] [I] Shared Memory per SM: 164 KiB +[12/27/2023-22:36:20] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-22:36:20] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-22:36:20] [I] +[12/27/2023-22:36:20] [I] TensorRT version: 8.5.2 +[12/27/2023-22:36:23] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3010 (MiB) +[12/27/2023-22:36:27] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +286, now: CPU 574, GPU 3319 (MiB) +[12/27/2023-22:36:27] [I] Start parsing network model +[12/27/2023-22:36:28] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-22:36:28] [I] [TRT] Input filename: yolo_nas_pose_s_fp32.onnx +[12/27/2023-22:36:28] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-22:36:28] [I] [TRT] Opset version: 17 +[12/27/2023-22:36:28] [I] [TRT] Producer name: pytorch +[12/27/2023-22:36:28] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-22:36:28] [I] [TRT] Domain: +[12/27/2023-22:36:28] [I] [TRT] Model version: 0 +[12/27/2023-22:36:28] [I] [TRT] Doc string: +[12/27/2023-22:36:28] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-22:36:29] [I] Finish parsing network model +[12/27/2023-22:36:29] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/27/2023-22:36:29] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 391) [Constant] +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 392) [Constant] +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 393) [Constant] +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 208) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 239) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 262) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 293) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 316) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 395) [NMS]_1_output[DevicetoShapeHostCopy] +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/27/2023-22:36:29] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/27/2023-22:36:40] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +374, now: CPU 1179, GPU 3798 (MiB) +[12/27/2023-22:36:41] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +74, now: CPU 1262, GPU 3872 (MiB) +[12/27/2023-22:36:41] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-00:02:52] [I] [TRT] Total Activation Memory: 7907887616 +[12/28/2023-00:02:52] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-00:03:04] [I] [TRT] Total Host Persistent Memory: 286784 +[12/28/2023-00:03:04] [I] [TRT] Total Device Persistent Memory: 80384 +[12/28/2023-00:03:04] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-00:03:04] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 25 MiB, GPU 2396 MiB +[12/28/2023-00:03:04] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 151 steps to complete. +[12/28/2023-00:03:04] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 63.9939ms to assign 13 blocks to 151 nodes requiring 140295680 bytes. +[12/28/2023-00:03:04] [I] [TRT] Total Activation Memory: 140295680 +[12/28/2023-00:03:10] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -1, now: CPU 1628, GPU 5686 (MiB) +[12/28/2023-00:03:10] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +15, GPU +16, now: CPU 15, GPU 16 (MiB) +[12/28/2023-00:03:11] [I] Engine built in 5210.96 sec. +[12/28/2023-00:03:11] [I] [TRT] Loaded engine size: 16 MiB +[12/28/2023-00:03:11] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +1, now: CPU 1261, GPU 5549 (MiB) +[12/28/2023-00:03:11] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +15, now: CPU 0, GPU 15 (MiB) +[12/28/2023-00:03:11] [I] Engine deserialized in 0.110723 sec. +[12/28/2023-00:03:11] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1261, GPU 5549 (MiB) +[12/28/2023-00:03:11] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +133, now: CPU 0, GPU 148 (MiB) +[12/28/2023-00:03:11] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-00:03:11] [I] Using random values for input onnx::Cast_0 +[12/28/2023-00:03:12] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-00:03:12] [I] Using random values for output graph2_flat_predictions +[12/28/2023-00:03:12] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-00:03:12] [I] Starting inference +[12/28/2023-00:03:27] [I] Warmup completed 12 queries over 200 ms +[12/28/2023-00:03:27] [I] Timing trace has 1286 queries over 15.0283 s +[12/28/2023-00:03:27] [I] +[12/28/2023-00:03:27] [I] === Trace details === +[12/28/2023-00:03:27] [I] Trace averages of 100 runs: +[12/28/2023-00:03:27] [I] Average on 100 runs - GPU latency: 12.1475 ms - Host latency: 12.2659 ms (enqueue 12.2148 ms) +[12/28/2023-00:03:27] [I] Average on 100 runs - GPU latency: 11.5941 ms - Host latency: 11.7095 ms (enqueue 11.6642 ms) +[12/28/2023-00:03:27] [I] Average on 100 runs - GPU latency: 11.0822 ms - Host latency: 11.1934 ms (enqueue 11.1738 ms) +[12/28/2023-00:03:27] [I] Average on 100 runs - GPU latency: 11.9621 ms - Host latency: 12.0778 ms (enqueue 12.0214 ms) +[12/28/2023-00:03:27] [I] Average on 100 runs - GPU latency: 12.1016 ms - Host latency: 12.2183 ms (enqueue 12.1713 ms) +[12/28/2023-00:03:27] [I] Average on 100 runs - GPU latency: 12.1557 ms - Host latency: 12.273 ms (enqueue 12.2234 ms) +[12/28/2023-00:03:27] [I] Average on 100 runs - GPU latency: 11.5276 ms - Host latency: 11.642 ms (enqueue 11.5974 ms) +[12/28/2023-00:03:27] [I] Average on 100 runs - GPU latency: 11.0702 ms - Host latency: 11.1808 ms (enqueue 11.151 ms) +[12/28/2023-00:03:27] [I] Average on 100 runs - GPU latency: 11.3391 ms - Host latency: 11.4503 ms (enqueue 11.409 ms) +[12/28/2023-00:03:27] [I] Average on 100 runs - GPU latency: 11.2812 ms - Host latency: 11.3941 ms (enqueue 11.3561 ms) +[12/28/2023-00:03:27] [I] Average on 100 runs - GPU latency: 11.069 ms - Host latency: 11.1804 ms (enqueue 11.1488 ms) +[12/28/2023-00:03:27] [I] Average on 100 runs - GPU latency: 11.2753 ms - Host latency: 11.3883 ms (enqueue 11.3511 ms) +[12/28/2023-00:03:27] [I] +[12/28/2023-00:03:27] [I] === Performance summary === +[12/28/2023-00:03:27] [I] Throughput: 85.5718 qps +[12/28/2023-00:03:27] [I] Latency: min = 10.7617 ms, max = 16.5669 ms, mean = 11.6499 ms, median = 11.4467 ms, percentile(90%) = 12.4453 ms, percentile(95%) = 12.7886 ms, percentile(99%) = 13.4463 ms +[12/28/2023-00:03:27] [I] Enqueue Time: min = 10.7305 ms, max = 16.8987 ms, mean = 11.6093 ms, median = 11.4165 ms, percentile(90%) = 12.3955 ms, percentile(95%) = 12.7394 ms, percentile(99%) = 13.4116 ms +[12/28/2023-00:03:27] [I] H2D Latency: min = 0.0810547 ms, max = 0.12793 ms, mean = 0.0981499 ms, median = 0.0986328 ms, percentile(90%) = 0.100586 ms, percentile(95%) = 0.100586 ms, percentile(99%) = 0.108398 ms +[12/28/2023-00:03:27] [I] GPU Compute Time: min = 10.6494 ms, max = 16.4612 ms, mean = 11.5359 ms, median = 11.3354 ms, percentile(90%) = 12.3306 ms, percentile(95%) = 12.6711 ms, percentile(99%) = 13.3516 ms +[12/28/2023-00:03:27] [I] D2H Latency: min = 0.00292969 ms, max = 0.0517578 ms, mean = 0.0158704 ms, median = 0.0152588 ms, percentile(90%) = 0.0239258 ms, percentile(95%) = 0.0248718 ms, percentile(99%) = 0.0292969 ms +[12/28/2023-00:03:27] [I] Total Host Walltime: 15.0283 s +[12/28/2023-00:03:27] [I] Total GPU Compute Time: 14.8351 s +[12/28/2023-00:03:27] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-00:03:27] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp32.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp32.onnx.best.engine diff --git a/yolo_nas_pose_s_fp32.onnx.engine b/yolo_nas_pose_s_fp32.onnx.engine new file mode 100644 index 0000000000000000000000000000000000000000..58796add1b3734e1b3899199bf46ce0dd506d190 --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d66d0a2b372c143d2865e948e0ee5fb991acb217e8cd382594da6ee1df9c9365 +size 63422602 diff --git a/yolo_nas_pose_s_fp32.onnx.engine.err b/yolo_nas_pose_s_fp32.onnx.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..d47e52fe6f960e5fddcb84f44c2529a5ae8daa2f --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.engine.err @@ -0,0 +1,111 @@ +[12/27/2023-21:30:35] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-21:30:35] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-21:31:11] [W] [TRT] Tactic Device request: 3160MB Available: 2999MB. Device memory is insufficient to use tactic. +[12/27/2023-21:31:11] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3160 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:31:11] [W] [TRT] Tactic Device request: 3160MB Available: 2999MB. Device memory is insufficient to use tactic. +[12/27/2023-21:31:11] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3160 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:31:11] [W] [TRT] Tactic Device request: 3160MB Available: 2999MB. Device memory is insufficient to use tactic. +[12/27/2023-21:31:11] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3160 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:32:44] [W] [TRT] Tactic Device request: 3143MB Available: 2508MB. Device memory is insufficient to use tactic. +[12/27/2023-21:32:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3143 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:32:44] [W] [TRT] Tactic Device request: 3143MB Available: 2508MB. Device memory is insufficient to use tactic. +[12/27/2023-21:32:44] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3143 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:32:44] [W] [TRT] Tactic Device request: 3143MB Available: 2508MB. Device memory is insufficient to use tactic. +[12/27/2023-21:32:44] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3143 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:32:46] [W] [TRT] Tactic Device request: 4711MB Available: 2508MB. Device memory is insufficient to use tactic. +[12/27/2023-21:32:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:32:46] [W] [TRT] Tactic Device request: 4711MB Available: 2508MB. Device memory is insufficient to use tactic. +[12/27/2023-21:32:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:32:46] [W] [TRT] Tactic Device request: 4711MB Available: 2508MB. Device memory is insufficient to use tactic. +[12/27/2023-21:32:46] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:32:57] [W] [TRT] Tactic Device request: 3152MB Available: 2509MB. Device memory is insufficient to use tactic. +[12/27/2023-21:32:57] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:32:57] [W] [TRT] Tactic Device request: 3152MB Available: 2509MB. Device memory is insufficient to use tactic. +[12/27/2023-21:32:57] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3152 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:32:57] [W] [TRT] Tactic Device request: 3152MB Available: 2509MB. Device memory is insufficient to use tactic. +[12/27/2023-21:32:57] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:34:52] [W] [TRT] Tactic Device request: 3144MB Available: 2467MB. Device memory is insufficient to use tactic. +[12/27/2023-21:34:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:34:52] [W] [TRT] Tactic Device request: 3144MB Available: 2467MB. Device memory is insufficient to use tactic. +[12/27/2023-21:34:52] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3144 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:34:53] [W] [TRT] Tactic Device request: 3144MB Available: 2465MB. Device memory is insufficient to use tactic. +[12/27/2023-21:34:53] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:34:54] [W] [TRT] Tactic Device request: 7056MB Available: 2467MB. Device memory is insufficient to use tactic. +[12/27/2023-21:34:54] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:34:54] [W] [TRT] Tactic Device request: 7056MB Available: 2467MB. Device memory is insufficient to use tactic. +[12/27/2023-21:34:54] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:34:54] [W] [TRT] Tactic Device request: 7056MB Available: 2467MB. Device memory is insufficient to use tactic. +[12/27/2023-21:34:54] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:37:55] [W] [TRT] Tactic Device request: 2385MB Available: 2340MB. Device memory is insufficient to use tactic. +[12/27/2023-21:37:55] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:37:55] [W] [TRT] Tactic Device request: 2385MB Available: 2337MB. Device memory is insufficient to use tactic. +[12/27/2023-21:37:55] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:37:56] [W] [TRT] Tactic Device request: 2394MB Available: 2338MB. Device memory is insufficient to use tactic. +[12/27/2023-21:37:56] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:37:56] [W] [TRT] Tactic Device request: 2394MB Available: 2338MB. Device memory is insufficient to use tactic. +[12/27/2023-21:37:56] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:37:56] [W] [TRT] Tactic Device request: 2394MB Available: 2338MB. Device memory is insufficient to use tactic. +[12/27/2023-21:37:56] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:38:05] [W] [TRT] Tactic Device request: 2457MB Available: 2335MB. Device memory is insufficient to use tactic. +[12/27/2023-21:38:05] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:38:05] [W] [TRT] Tactic Device request: 2457MB Available: 2335MB. Device memory is insufficient to use tactic. +[12/27/2023-21:38:05] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:38:05] [W] [TRT] Tactic Device request: 2457MB Available: 2335MB. Device memory is insufficient to use tactic. +[12/27/2023-21:38:05] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:39:38] [W] [TRT] Tactic Device request: 2454MB Available: 2307MB. Device memory is insufficient to use tactic. +[12/27/2023-21:39:38] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:39:38] [W] [TRT] Tactic Device request: 2454MB Available: 2307MB. Device memory is insufficient to use tactic. +[12/27/2023-21:39:38] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2454 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:39:38] [W] [TRT] Tactic Device request: 2454MB Available: 2306MB. Device memory is insufficient to use tactic. +[12/27/2023-21:39:38] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:39:51] [W] [TRT] Tactic Device request: 3587MB Available: 2375MB. Device memory is insufficient to use tactic. +[12/27/2023-21:39:51] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:39:52] [W] [TRT] Tactic Device request: 3587MB Available: 2375MB. Device memory is insufficient to use tactic. +[12/27/2023-21:39:52] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:39:52] [W] [TRT] Tactic Device request: 3587MB Available: 2375MB. Device memory is insufficient to use tactic. +[12/27/2023-21:39:52] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:41:34] [W] [TRT] Tactic Device request: 3556MB Available: 2337MB. Device memory is insufficient to use tactic. +[12/27/2023-21:41:34] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:41:34] [W] [TRT] Tactic Device request: 3556MB Available: 2337MB. Device memory is insufficient to use tactic. +[12/27/2023-21:41:34] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:41:34] [W] [TRT] Tactic Device request: 3556MB Available: 2337MB. Device memory is insufficient to use tactic. +[12/27/2023-21:41:34] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:47:51] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/27/2023-21:47:51] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/27/2023-21:47:51] [W] * GPU compute time is unstable, with coefficient of variance = 2.98691%. +[12/27/2023-21:47:51] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_s_fp32.onnx.engine.log b/yolo_nas_pose_s_fp32.onnx.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..455266ad4df340ad05eb3e8c15a04590db71900b --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.engine.log @@ -0,0 +1,294 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp32.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp32.onnx.engine +[12/27/2023-21:30:31] [I] === Model Options === +[12/27/2023-21:30:31] [I] Format: ONNX +[12/27/2023-21:30:31] [I] Model: yolo_nas_pose_s_fp32.onnx +[12/27/2023-21:30:31] [I] Output: +[12/27/2023-21:30:31] [I] === Build Options === +[12/27/2023-21:30:31] [I] Max batch: explicit batch +[12/27/2023-21:30:31] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-21:30:31] [I] minTiming: 1 +[12/27/2023-21:30:31] [I] avgTiming: 8 +[12/27/2023-21:30:31] [I] Precision: FP32 +[12/27/2023-21:30:31] [I] LayerPrecisions: +[12/27/2023-21:30:31] [I] Calibration: +[12/27/2023-21:30:31] [I] Refit: Disabled +[12/27/2023-21:30:31] [I] Sparsity: Disabled +[12/27/2023-21:30:31] [I] Safe mode: Disabled +[12/27/2023-21:30:31] [I] DirectIO mode: Disabled +[12/27/2023-21:30:31] [I] Restricted mode: Disabled +[12/27/2023-21:30:31] [I] Build only: Disabled +[12/27/2023-21:30:31] [I] Save engine: yolo_nas_pose_s_fp32.onnx.engine +[12/27/2023-21:30:31] [I] Load engine: +[12/27/2023-21:30:31] [I] Profiling verbosity: 0 +[12/27/2023-21:30:31] [I] Tactic sources: Using default tactic sources +[12/27/2023-21:30:31] [I] timingCacheMode: local +[12/27/2023-21:30:31] [I] timingCacheFile: +[12/27/2023-21:30:31] [I] Heuristic: Disabled +[12/27/2023-21:30:31] [I] Preview Features: Use default preview flags. +[12/27/2023-21:30:31] [I] Input(s)s format: fp32:CHW +[12/27/2023-21:30:31] [I] Output(s)s format: fp32:CHW +[12/27/2023-21:30:31] [I] Input build shapes: model +[12/27/2023-21:30:31] [I] Input calibration shapes: model +[12/27/2023-21:30:31] [I] === System Options === +[12/27/2023-21:30:31] [I] Device: 0 +[12/27/2023-21:30:31] [I] DLACore: +[12/27/2023-21:30:31] [I] Plugins: +[12/27/2023-21:30:31] [I] === Inference Options === +[12/27/2023-21:30:31] [I] Batch: Explicit +[12/27/2023-21:30:31] [I] Input inference shapes: model +[12/27/2023-21:30:31] [I] Iterations: 10 +[12/27/2023-21:30:31] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-21:30:31] [I] Sleep time: 0ms +[12/27/2023-21:30:31] [I] Idle time: 0ms +[12/27/2023-21:30:31] [I] Streams: 1 +[12/27/2023-21:30:31] [I] ExposeDMA: Disabled +[12/27/2023-21:30:31] [I] Data transfers: Enabled +[12/27/2023-21:30:31] [I] Spin-wait: Disabled +[12/27/2023-21:30:31] [I] Multithreading: Disabled +[12/27/2023-21:30:31] [I] CUDA Graph: Disabled +[12/27/2023-21:30:31] [I] Separate profiling: Disabled +[12/27/2023-21:30:31] [I] Time Deserialize: Disabled +[12/27/2023-21:30:31] [I] Time Refit: Disabled +[12/27/2023-21:30:31] [I] NVTX verbosity: 0 +[12/27/2023-21:30:31] [I] Persistent Cache Ratio: 0 +[12/27/2023-21:30:31] [I] Inputs: +[12/27/2023-21:30:31] [I] === Reporting Options === +[12/27/2023-21:30:31] [I] Verbose: Disabled +[12/27/2023-21:30:31] [I] Averages: 100 inferences +[12/27/2023-21:30:31] [I] Percentiles: 90,95,99 +[12/27/2023-21:30:31] [I] Dump refittable layers:Disabled +[12/27/2023-21:30:31] [I] Dump output: Disabled +[12/27/2023-21:30:31] [I] Profile: Disabled +[12/27/2023-21:30:31] [I] Export timing to JSON file: +[12/27/2023-21:30:31] [I] Export output to JSON file: +[12/27/2023-21:30:31] [I] Export profile to JSON file: +[12/27/2023-21:30:31] [I] +[12/27/2023-21:30:31] [I] === Device Information === +[12/27/2023-21:30:31] [I] Selected Device: Orin +[12/27/2023-21:30:31] [I] Compute Capability: 8.7 +[12/27/2023-21:30:31] [I] SMs: 8 +[12/27/2023-21:30:31] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-21:30:31] [I] Device Global Memory: 7471 MiB +[12/27/2023-21:30:31] [I] Shared Memory per SM: 164 KiB +[12/27/2023-21:30:31] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-21:30:31] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-21:30:31] [I] +[12/27/2023-21:30:31] [I] TensorRT version: 8.5.2 +[12/27/2023-21:30:32] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3028 (MiB) +[12/27/2023-21:30:34] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +283, now: CPU 574, GPU 3333 (MiB) +[12/27/2023-21:30:34] [I] Start parsing network model +[12/27/2023-21:30:35] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-21:30:35] [I] [TRT] Input filename: yolo_nas_pose_s_fp32.onnx +[12/27/2023-21:30:35] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-21:30:35] [I] [TRT] Opset version: 17 +[12/27/2023-21:30:35] [I] [TRT] Producer name: pytorch +[12/27/2023-21:30:35] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-21:30:35] [I] [TRT] Domain: +[12/27/2023-21:30:35] [I] [TRT] Model version: 0 +[12/27/2023-21:30:35] [I] [TRT] Doc string: +[12/27/2023-21:30:35] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-21:30:35] [I] Finish parsing network model +[12/27/2023-21:30:36] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/27/2023-21:30:36] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 391) [Constant] +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 392) [Constant] +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 393) [Constant] +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 208) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 239) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 262) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 293) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 316) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 395) [NMS]_1_output[DevicetoShapeHostCopy] +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/27/2023-21:30:36] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/27/2023-21:30:37] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +540, now: CPU 1179, GPU 3981 (MiB) +[12/27/2023-21:30:37] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +84, now: CPU 1261, GPU 4065 (MiB) +[12/27/2023-21:30:37] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/27/2023-21:47:29] [I] [TRT] Total Activation Memory: 8066936832 +[12/27/2023-21:47:29] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/27/2023-21:47:33] [I] [TRT] Total Host Persistent Memory: 323888 +[12/27/2023-21:47:33] [I] [TRT] Total Device Persistent Memory: 41472 +[12/27/2023-21:47:33] [I] [TRT] Total Scratch Memory: 134217728 +[12/27/2023-21:47:33] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 19 MiB, GPU 2396 MiB +[12/27/2023-21:47:33] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 167 steps to complete. +[12/27/2023-21:47:33] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 195.625ms to assign 14 blocks to 167 nodes requiring 157423104 bytes. +[12/27/2023-21:47:33] [I] [TRT] Total Activation Memory: 157423104 +[12/27/2023-21:47:34] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1605, GPU 5465 (MiB) +[12/27/2023-21:47:34] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +9, GPU +64, now: CPU 9, GPU 64 (MiB) +[12/27/2023-21:47:35] [I] Engine built in 1023.74 sec. +[12/27/2023-21:47:35] [I] [TRT] Loaded engine size: 60 MiB +[12/27/2023-21:47:35] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1301, GPU 5236 (MiB) +[12/27/2023-21:47:35] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +58, now: CPU 0, GPU 58 (MiB) +[12/27/2023-21:47:35] [I] Engine deserialized in 0.101763 sec. +[12/27/2023-21:47:35] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1301, GPU 5236 (MiB) +[12/27/2023-21:47:36] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +150, now: CPU 0, GPU 208 (MiB) +[12/27/2023-21:47:36] [I] Setting persistentCacheLimit to 0 bytes. +[12/27/2023-21:47:36] [I] Using random values for input onnx::Cast_0 +[12/27/2023-21:47:36] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/27/2023-21:47:36] [I] Using random values for output graph2_flat_predictions +[12/27/2023-21:47:36] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/27/2023-21:47:36] [I] Starting inference +[12/27/2023-21:47:51] [I] Warmup completed 6 queries over 200 ms +[12/27/2023-21:47:51] [I] Timing trace has 506 queries over 15.0612 s +[12/27/2023-21:47:51] [I] +[12/27/2023-21:47:51] [I] === Trace details === +[12/27/2023-21:47:51] [I] Trace averages of 100 runs: +[12/27/2023-21:47:51] [I] Average on 100 runs - GPU latency: 29.9546 ms - Host latency: 30.0697 ms (enqueue 30.0061 ms) +[12/27/2023-21:47:51] [I] Average on 100 runs - GPU latency: 30.1741 ms - Host latency: 30.2899 ms (enqueue 30.2167 ms) +[12/27/2023-21:47:51] [I] Average on 100 runs - GPU latency: 29.3044 ms - Host latency: 29.414 ms (enqueue 29.3649 ms) +[12/27/2023-21:47:51] [I] Average on 100 runs - GPU latency: 29.3066 ms - Host latency: 29.4167 ms (enqueue 29.3737 ms) +[12/27/2023-21:47:51] [I] Average on 100 runs - GPU latency: 29.4091 ms - Host latency: 29.5185 ms (enqueue 29.4701 ms) +[12/27/2023-21:47:51] [I] +[12/27/2023-21:47:51] [I] === Performance summary === +[12/27/2023-21:47:51] [I] Throughput: 33.5963 qps +[12/27/2023-21:47:51] [I] Latency: min = 27.8343 ms, max = 34.8818 ms, mean = 29.7414 ms, median = 29.5557 ms, percentile(90%) = 30.7683 ms, percentile(95%) = 31.2769 ms, percentile(99%) = 32.2954 ms +[12/27/2023-21:47:51] [I] Enqueue Time: min = 27.8016 ms, max = 34.8301 ms, mean = 29.6862 ms, median = 29.5234 ms, percentile(90%) = 30.7906 ms, percentile(95%) = 31.0035 ms, percentile(99%) = 32.2573 ms +[12/27/2023-21:47:51] [I] H2D Latency: min = 0.0800781 ms, max = 0.115234 ms, mean = 0.0949853 ms, median = 0.0966797 ms, percentile(90%) = 0.0986328 ms, percentile(95%) = 0.0996094 ms, percentile(99%) = 0.0998535 ms +[12/27/2023-21:47:51] [I] GPU Compute Time: min = 27.7263 ms, max = 34.7671 ms, mean = 29.6293 ms, median = 29.4424 ms, percentile(90%) = 30.6462 ms, percentile(95%) = 31.178 ms, percentile(99%) = 32.1846 ms +[12/27/2023-21:47:51] [I] D2H Latency: min = 0.00292969 ms, max = 0.0610352 ms, mean = 0.017042 ms, median = 0.015625 ms, percentile(90%) = 0.02771 ms, percentile(95%) = 0.0292969 ms, percentile(99%) = 0.034668 ms +[12/27/2023-21:47:51] [I] Total Host Walltime: 15.0612 s +[12/27/2023-21:47:51] [I] Total GPU Compute Time: 14.9924 s +[12/27/2023-21:47:51] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/27/2023-21:47:51] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp32.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp32.onnx.engine diff --git a/yolo_nas_pose_s_fp32.onnx.fp16.engine b/yolo_nas_pose_s_fp32.onnx.fp16.engine new file mode 100644 index 0000000000000000000000000000000000000000..f16f7933733adb13c485cdc2c3807434084150ef --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.fp16.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f6d886e78627d213d14919f900ede76bc153af095a370cda9f3c9a24c5e93c6 +size 32544217 diff --git a/yolo_nas_pose_s_fp32.onnx.fp16.engine.err b/yolo_nas_pose_s_fp32.onnx.fp16.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..5bf516bb2b6e450744c634903b92f58288e07501 --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.fp16.engine.err @@ -0,0 +1,230 @@ +[12/27/2023-21:47:58] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/27/2023-21:47:58] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/27/2023-21:50:12] [W] [TRT] Tactic Device request: 3160MB Available: 3035MB. Device memory is insufficient to use tactic. +[12/27/2023-21:50:12] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3160 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:50:12] [W] [TRT] Tactic Device request: 3160MB Available: 3035MB. Device memory is insufficient to use tactic. +[12/27/2023-21:50:12] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3160 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:50:13] [W] [TRT] Tactic Device request: 3160MB Available: 3034MB. Device memory is insufficient to use tactic. +[12/27/2023-21:50:13] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3160 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:50:14] [W] [TRT] Tactic Device request: 3152MB Available: 3029MB. Device memory is insufficient to use tactic. +[12/27/2023-21:50:14] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:50:14] [W] [TRT] Tactic Device request: 3152MB Available: 3029MB. Device memory is insufficient to use tactic. +[12/27/2023-21:50:14] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3152 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:54:49] [W] [TRT] Tactic Device request: 3143MB Available: 2454MB. Device memory is insufficient to use tactic. +[12/27/2023-21:54:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3143 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:54:49] [W] [TRT] Tactic Device request: 3143MB Available: 2454MB. Device memory is insufficient to use tactic. +[12/27/2023-21:54:49] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3143 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:54:49] [W] [TRT] Tactic Device request: 3143MB Available: 2454MB. Device memory is insufficient to use tactic. +[12/27/2023-21:54:49] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3143 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:54:51] [W] [TRT] Tactic Device request: 3136MB Available: 2453MB. Device memory is insufficient to use tactic. +[12/27/2023-21:54:51] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3136 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:54:51] [W] [TRT] Tactic Device request: 3136MB Available: 2452MB. Device memory is insufficient to use tactic. +[12/27/2023-21:54:51] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3136 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:54:52] [W] [TRT] Tactic Device request: 4711MB Available: 2453MB. Device memory is insufficient to use tactic. +[12/27/2023-21:54:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:54:52] [W] [TRT] Tactic Device request: 4711MB Available: 2453MB. Device memory is insufficient to use tactic. +[12/27/2023-21:54:52] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:54:52] [W] [TRT] Tactic Device request: 4711MB Available: 2453MB. Device memory is insufficient to use tactic. +[12/27/2023-21:54:52] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:54:54] [W] [TRT] Tactic Device request: 4701MB Available: 2453MB. Device memory is insufficient to use tactic. +[12/27/2023-21:54:54] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4701 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:54:54] [W] [TRT] Tactic Device request: 4701MB Available: 2453MB. Device memory is insufficient to use tactic. +[12/27/2023-21:54:54] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 4701 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:55:10] [W] [TRT] Tactic Device request: 3152MB Available: 2452MB. Device memory is insufficient to use tactic. +[12/27/2023-21:55:10] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:55:10] [W] [TRT] Tactic Device request: 3152MB Available: 2452MB. Device memory is insufficient to use tactic. +[12/27/2023-21:55:10] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3152 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:55:10] [W] [TRT] Tactic Device request: 3152MB Available: 2452MB. Device memory is insufficient to use tactic. +[12/27/2023-21:55:10] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:55:11] [W] [TRT] Tactic Device request: 3148MB Available: 2453MB. Device memory is insufficient to use tactic. +[12/27/2023-21:55:11] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3148 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-21:55:11] [W] [TRT] Tactic Device request: 3148MB Available: 2453MB. Device memory is insufficient to use tactic. +[12/27/2023-21:55:11] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3148 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:01] [W] [TRT] Tactic Device request: 3144MB Available: 2902MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:01] [W] [TRT] Tactic Device request: 3144MB Available: 2901MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:01] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3144 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:01] [W] [TRT] Tactic Device request: 3144MB Available: 2901MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:01] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:03] [W] [TRT] Tactic Device request: 3140MB Available: 2902MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3140 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:03] [W] [TRT] Tactic Device request: 3140MB Available: 2902MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:03] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3140 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:03] [W] [TRT] Tactic Device request: 7056MB Available: 2908MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:03] [W] [TRT] Tactic Device request: 7056MB Available: 2908MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:04] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:04] [W] [TRT] Tactic Device request: 7056MB Available: 2908MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:04] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:05] [W] [TRT] Tactic Device request: 7050MB Available: 2907MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:06] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7050 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:06] [W] [TRT] Tactic Device request: 7050MB Available: 2907MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:06] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 7050 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:24] [W] [TRT] Tactic Device request: 2392MB Available: 2325MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:24] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:24] [W] [TRT] Tactic Device request: 2392MB Available: 2325MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:24] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:25] [W] [TRT] Tactic Device request: 2390MB Available: 2324MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:25] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2390 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:01:25] [W] [TRT] Tactic Device request: 2390MB Available: 2324MB. Device memory is insufficient to use tactic. +[12/27/2023-22:01:25] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2390 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:35] [W] [TRT] Tactic Device request: 2385MB Available: 2159MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:35] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:35] [W] [TRT] Tactic Device request: 2385MB Available: 2159MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:35] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:35] [W] [TRT] Tactic Device request: 2385MB Available: 2159MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:35] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:36] [W] [TRT] Tactic Device request: 2384MB Available: 2159MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:36] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2384 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:36] [W] [TRT] Tactic Device request: 2384MB Available: 2159MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:36] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2384 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:37] [W] [TRT] Tactic Device request: 2394MB Available: 2159MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:37] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:37] [W] [TRT] Tactic Device request: 2394MB Available: 2159MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:37] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:37] [W] [TRT] Tactic Device request: 2394MB Available: 2159MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:37] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:38] [W] [TRT] Tactic Device request: 2391MB Available: 2159MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:38] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2391 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:38] [W] [TRT] Tactic Device request: 2391MB Available: 2159MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:38] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2391 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:51] [W] [TRT] Tactic Device request: 2457MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:51] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:51] [W] [TRT] Tactic Device request: 2457MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:51] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:51] [W] [TRT] Tactic Device request: 2457MB Available: 2131MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:51] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:52] [W] [TRT] Tactic Device request: 2456MB Available: 2130MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:52] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2456 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:09:52] [W] [TRT] Tactic Device request: 2456MB Available: 2130MB. Device memory is insufficient to use tactic. +[12/27/2023-22:09:52] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2456 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:14:38] [W] [TRT] Tactic Device request: 2454MB Available: 2315MB. Device memory is insufficient to use tactic. +[12/27/2023-22:14:38] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2454 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:14:38] [W] [TRT] Tactic Device request: 2454MB Available: 2315MB. Device memory is insufficient to use tactic. +[12/27/2023-22:14:38] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:14:39] [W] [TRT] Tactic Device request: 2453MB Available: 2313MB. Device memory is insufficient to use tactic. +[12/27/2023-22:14:39] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2453 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:14:39] [W] [TRT] Tactic Device request: 2453MB Available: 2313MB. Device memory is insufficient to use tactic. +[12/27/2023-22:14:39] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 2453 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:14:59] [W] [TRT] Tactic Device request: 3587MB Available: 2312MB. Device memory is insufficient to use tactic. +[12/27/2023-22:14:59] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:14:59] [W] [TRT] Tactic Device request: 3587MB Available: 2312MB. Device memory is insufficient to use tactic. +[12/27/2023-22:14:59] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:15:00] [W] [TRT] Tactic Device request: 3587MB Available: 2312MB. Device memory is insufficient to use tactic. +[12/27/2023-22:15:00] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:15:01] [W] [TRT] Tactic Device request: 3585MB Available: 2311MB. Device memory is insufficient to use tactic. +[12/27/2023-22:15:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3585 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:15:01] [W] [TRT] Tactic Device request: 3585MB Available: 2311MB. Device memory is insufficient to use tactic. +[12/27/2023-22:15:01] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3585 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:20:00] [W] [TRT] Tactic Device request: 3556MB Available: 2213MB. Device memory is insufficient to use tactic. +[12/27/2023-22:20:00] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:20:00] [W] [TRT] Tactic Device request: 3556MB Available: 2213MB. Device memory is insufficient to use tactic. +[12/27/2023-22:20:00] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:20:00] [W] [TRT] Tactic Device request: 3556MB Available: 2212MB. Device memory is insufficient to use tactic. +[12/27/2023-22:20:00] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:20:01] [W] [TRT] Tactic Device request: 3551MB Available: 2212MB. Device memory is insufficient to use tactic. +[12/27/2023-22:20:01] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3551 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:20:01] [W] [TRT] Tactic Device request: 3551MB Available: 2212MB. Device memory is insufficient to use tactic. +[12/27/2023-22:20:01] [W] [TRT] Skipping tactic 7 due to insufficient memory on requested size of 3551 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:29:49] [W] [TRT] Tactic Device request: 2126MB Available: 2084MB. Device memory is insufficient to use tactic. +[12/27/2023-22:29:49] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2126 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:29:50] [W] [TRT] Tactic Device request: 2126MB Available: 2084MB. Device memory is insufficient to use tactic. +[12/27/2023-22:29:50] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2126 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:29:50] [W] [TRT] Tactic Device request: 2126MB Available: 2084MB. Device memory is insufficient to use tactic. +[12/27/2023-22:29:50] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2126 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:29:53] [W] [TRT] Tactic Device request: 2124MB Available: 2085MB. Device memory is insufficient to use tactic. +[12/27/2023-22:29:53] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:29:53] [W] [TRT] Tactic Device request: 2124MB Available: 2083MB. Device memory is insufficient to use tactic. +[12/27/2023-22:29:53] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:29:55] [W] [TRT] Tactic Device request: 2125MB Available: 2082MB. Device memory is insufficient to use tactic. +[12/27/2023-22:29:55] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:29:55] [W] [TRT] Tactic Device request: 2125MB Available: 2081MB. Device memory is insufficient to use tactic. +[12/27/2023-22:29:55] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2125 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:29:56] [W] [TRT] Tactic Device request: 2125MB Available: 2081MB. Device memory is insufficient to use tactic. +[12/27/2023-22:29:56] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:29:58] [W] [TRT] Tactic Device request: 2124MB Available: 2088MB. Device memory is insufficient to use tactic. +[12/27/2023-22:29:58] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2124 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:29:59] [W] [TRT] Tactic Device request: 2124MB Available: 2087MB. Device memory is insufficient to use tactic. +[12/27/2023-22:29:59] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2124 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/27/2023-22:35:59] [W] [TRT] TensorRT encountered issues when converting weights between types and that could affect accuracy. +[12/27/2023-22:35:59] [W] [TRT] If this is not the desired behavior, please modify the weights or retrain with regularization to adjust the magnitude of the weights. +[12/27/2023-22:35:59] [W] [TRT] Check verbose logs for the list of affected weights. +[12/27/2023-22:35:59] [W] [TRT] - 96 weights are affected by this issue: Detected subnormal FP16 values. +[12/27/2023-22:35:59] [W] [TRT] - 17 weights are affected by this issue: Detected values less than smallest positive FP16 subnormal value and converted them to the FP16 minimum subnormalized value. +[12/27/2023-22:36:16] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/27/2023-22:36:16] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/27/2023-22:36:16] [W] * GPU compute time is unstable, with coefficient of variance = 3.51322%. +[12/27/2023-22:36:16] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_s_fp32.onnx.fp16.engine.log b/yolo_nas_pose_s_fp32.onnx.fp16.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..c4e29f7e047c978f6ce2bcdb789b5664d6580be7 --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.fp16.engine.log @@ -0,0 +1,298 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp32.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp32.onnx.fp16.engine +[12/27/2023-21:47:53] [I] === Model Options === +[12/27/2023-21:47:53] [I] Format: ONNX +[12/27/2023-21:47:53] [I] Model: yolo_nas_pose_s_fp32.onnx +[12/27/2023-21:47:53] [I] Output: +[12/27/2023-21:47:53] [I] === Build Options === +[12/27/2023-21:47:53] [I] Max batch: explicit batch +[12/27/2023-21:47:53] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/27/2023-21:47:53] [I] minTiming: 1 +[12/27/2023-21:47:53] [I] avgTiming: 8 +[12/27/2023-21:47:53] [I] Precision: FP32+FP16 +[12/27/2023-21:47:53] [I] LayerPrecisions: +[12/27/2023-21:47:53] [I] Calibration: +[12/27/2023-21:47:53] [I] Refit: Disabled +[12/27/2023-21:47:53] [I] Sparsity: Disabled +[12/27/2023-21:47:53] [I] Safe mode: Disabled +[12/27/2023-21:47:53] [I] DirectIO mode: Disabled +[12/27/2023-21:47:53] [I] Restricted mode: Disabled +[12/27/2023-21:47:53] [I] Build only: Disabled +[12/27/2023-21:47:53] [I] Save engine: yolo_nas_pose_s_fp32.onnx.fp16.engine +[12/27/2023-21:47:53] [I] Load engine: +[12/27/2023-21:47:53] [I] Profiling verbosity: 0 +[12/27/2023-21:47:53] [I] Tactic sources: Using default tactic sources +[12/27/2023-21:47:53] [I] timingCacheMode: local +[12/27/2023-21:47:53] [I] timingCacheFile: +[12/27/2023-21:47:53] [I] Heuristic: Disabled +[12/27/2023-21:47:53] [I] Preview Features: Use default preview flags. +[12/27/2023-21:47:53] [I] Input(s)s format: fp32:CHW +[12/27/2023-21:47:53] [I] Output(s)s format: fp32:CHW +[12/27/2023-21:47:53] [I] Input build shapes: model +[12/27/2023-21:47:53] [I] Input calibration shapes: model +[12/27/2023-21:47:53] [I] === System Options === +[12/27/2023-21:47:53] [I] Device: 0 +[12/27/2023-21:47:53] [I] DLACore: +[12/27/2023-21:47:53] [I] Plugins: +[12/27/2023-21:47:53] [I] === Inference Options === +[12/27/2023-21:47:53] [I] Batch: Explicit +[12/27/2023-21:47:53] [I] Input inference shapes: model +[12/27/2023-21:47:53] [I] Iterations: 10 +[12/27/2023-21:47:53] [I] Duration: 15s (+ 200ms warm up) +[12/27/2023-21:47:53] [I] Sleep time: 0ms +[12/27/2023-21:47:53] [I] Idle time: 0ms +[12/27/2023-21:47:53] [I] Streams: 1 +[12/27/2023-21:47:53] [I] ExposeDMA: Disabled +[12/27/2023-21:47:53] [I] Data transfers: Enabled +[12/27/2023-21:47:53] [I] Spin-wait: Disabled +[12/27/2023-21:47:53] [I] Multithreading: Disabled +[12/27/2023-21:47:53] [I] CUDA Graph: Disabled +[12/27/2023-21:47:53] [I] Separate profiling: Disabled +[12/27/2023-21:47:53] [I] Time Deserialize: Disabled +[12/27/2023-21:47:53] [I] Time Refit: Disabled +[12/27/2023-21:47:53] [I] NVTX verbosity: 0 +[12/27/2023-21:47:53] [I] Persistent Cache Ratio: 0 +[12/27/2023-21:47:53] [I] Inputs: +[12/27/2023-21:47:53] [I] === Reporting Options === +[12/27/2023-21:47:53] [I] Verbose: Disabled +[12/27/2023-21:47:53] [I] Averages: 100 inferences +[12/27/2023-21:47:53] [I] Percentiles: 90,95,99 +[12/27/2023-21:47:53] [I] Dump refittable layers:Disabled +[12/27/2023-21:47:53] [I] Dump output: Disabled +[12/27/2023-21:47:53] [I] Profile: Disabled +[12/27/2023-21:47:53] [I] Export timing to JSON file: +[12/27/2023-21:47:53] [I] Export output to JSON file: +[12/27/2023-21:47:53] [I] Export profile to JSON file: +[12/27/2023-21:47:53] [I] +[12/27/2023-21:47:53] [I] === Device Information === +[12/27/2023-21:47:53] [I] Selected Device: Orin +[12/27/2023-21:47:53] [I] Compute Capability: 8.7 +[12/27/2023-21:47:53] [I] SMs: 8 +[12/27/2023-21:47:53] [I] Compute Clock Rate: 0.624 GHz +[12/27/2023-21:47:53] [I] Device Global Memory: 7471 MiB +[12/27/2023-21:47:53] [I] Shared Memory per SM: 164 KiB +[12/27/2023-21:47:53] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/27/2023-21:47:53] [I] Memory Clock Rate: 0.624 GHz +[12/27/2023-21:47:53] [I] +[12/27/2023-21:47:53] [I] TensorRT version: 8.5.2 +[12/27/2023-21:47:54] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2971 (MiB) +[12/27/2023-21:47:57] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +286, now: CPU 574, GPU 3278 (MiB) +[12/27/2023-21:47:57] [I] Start parsing network model +[12/27/2023-21:47:58] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-21:47:58] [I] [TRT] Input filename: yolo_nas_pose_s_fp32.onnx +[12/27/2023-21:47:58] [I] [TRT] ONNX IR version: 0.0.8 +[12/27/2023-21:47:58] [I] [TRT] Opset version: 17 +[12/27/2023-21:47:58] [I] [TRT] Producer name: pytorch +[12/27/2023-21:47:58] [I] [TRT] Producer version: 2.1.2 +[12/27/2023-21:47:58] [I] [TRT] Domain: +[12/27/2023-21:47:58] [I] [TRT] Model version: 0 +[12/27/2023-21:47:58] [I] [TRT] Doc string: +[12/27/2023-21:47:58] [I] [TRT] ---------------------------------------------------------------- +[12/27/2023-21:47:59] [I] Finish parsing network model +[12/27/2023-21:47:59] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/27/2023-21:47:59] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 391) [Constant] +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 392) [Constant] +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 393) [Constant] +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 208) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 239) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 262) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 293) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 316) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 395) [NMS]_1_output[DevicetoShapeHostCopy] +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/27/2023-21:47:59] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/27/2023-21:48:00] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +457, now: CPU 1179, GPU 3799 (MiB) +[12/27/2023-21:48:00] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +129, now: CPU 1261, GPU 3928 (MiB) +[12/27/2023-21:48:00] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/27/2023-22:35:46] [I] [TRT] Total Activation Memory: 7948043264 +[12/27/2023-22:35:46] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/27/2023-22:35:55] [I] [TRT] Total Host Persistent Memory: 313696 +[12/27/2023-22:35:55] [I] [TRT] Total Device Persistent Memory: 139264 +[12/27/2023-22:35:55] [I] [TRT] Total Scratch Memory: 134217728 +[12/27/2023-22:35:55] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 39 MiB, GPU 2458 MiB +[12/27/2023-22:35:55] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 156 steps to complete. +[12/27/2023-22:35:55] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 64.046ms to assign 16 blocks to 156 nodes requiring 145578496 bytes. +[12/27/2023-22:35:55] [I] [TRT] Total Activation Memory: 145578496 +[12/27/2023-22:35:59] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1637, GPU 5552 (MiB) +[12/27/2023-22:35:59] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +30, GPU +32, now: CPU 30, GPU 32 (MiB) +[12/27/2023-22:36:00] [I] Engine built in 2886.7 sec. +[12/27/2023-22:36:00] [I] [TRT] Loaded engine size: 31 MiB +[12/27/2023-22:36:01] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1272, GPU 4953 (MiB) +[12/27/2023-22:36:01] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +29, now: CPU 0, GPU 29 (MiB) +[12/27/2023-22:36:01] [I] Engine deserialized in 0.247625 sec. +[12/27/2023-22:36:01] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1272, GPU 4953 (MiB) +[12/27/2023-22:36:01] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +139, now: CPU 0, GPU 168 (MiB) +[12/27/2023-22:36:01] [I] Setting persistentCacheLimit to 0 bytes. +[12/27/2023-22:36:01] [I] Using random values for input onnx::Cast_0 +[12/27/2023-22:36:01] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/27/2023-22:36:01] [I] Using random values for output graph2_flat_predictions +[12/27/2023-22:36:01] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/27/2023-22:36:01] [I] Starting inference +[12/27/2023-22:36:16] [I] Warmup completed 12 queries over 200 ms +[12/27/2023-22:36:16] [I] Timing trace has 983 queries over 15.0224 s +[12/27/2023-22:36:16] [I] +[12/27/2023-22:36:16] [I] === Trace details === +[12/27/2023-22:36:16] [I] Trace averages of 100 runs: +[12/27/2023-22:36:16] [I] Average on 100 runs - GPU latency: 14.9352 ms - Host latency: 15.0469 ms (enqueue 15.0055 ms) +[12/27/2023-22:36:16] [I] Average on 100 runs - GPU latency: 14.9493 ms - Host latency: 15.0623 ms (enqueue 15.0171 ms) +[12/27/2023-22:36:16] [I] Average on 100 runs - GPU latency: 15.2153 ms - Host latency: 15.333 ms (enqueue 15.2708 ms) +[12/27/2023-22:36:16] [I] Average on 100 runs - GPU latency: 15.3467 ms - Host latency: 15.465 ms (enqueue 15.3946 ms) +[12/27/2023-22:36:16] [I] Average on 100 runs - GPU latency: 14.9039 ms - Host latency: 15.0135 ms (enqueue 14.9702 ms) +[12/27/2023-22:36:16] [I] Average on 100 runs - GPU latency: 15.0013 ms - Host latency: 15.1112 ms (enqueue 15.0729 ms) +[12/27/2023-22:36:16] [I] Average on 100 runs - GPU latency: 15.1277 ms - Host latency: 15.2458 ms (enqueue 15.1859 ms) +[12/27/2023-22:36:16] [I] Average on 100 runs - GPU latency: 15.3136 ms - Host latency: 15.4334 ms (enqueue 15.3714 ms) +[12/27/2023-22:36:16] [I] Average on 100 runs - GPU latency: 15.4663 ms - Host latency: 15.5831 ms (enqueue 15.5286 ms) +[12/27/2023-22:36:16] [I] +[12/27/2023-22:36:16] [I] === Performance summary === +[12/27/2023-22:36:16] [I] Throughput: 65.4357 qps +[12/27/2023-22:36:16] [I] Latency: min = 14.2769 ms, max = 19.2117 ms, mean = 15.2511 ms, median = 15.1333 ms, percentile(90%) = 16.042 ms, percentile(95%) = 16.2092 ms, percentile(99%) = 17.0708 ms +[12/27/2023-22:36:16] [I] Enqueue Time: min = 14.2293 ms, max = 19.1707 ms, mean = 15.198 ms, median = 15.0898 ms, percentile(90%) = 15.9346 ms, percentile(95%) = 16.0596 ms, percentile(99%) = 16.7104 ms +[12/27/2023-22:36:16] [I] H2D Latency: min = 0.0800781 ms, max = 0.125977 ms, mean = 0.0962097 ms, median = 0.0966797 ms, percentile(90%) = 0.0993652 ms, percentile(95%) = 0.0996094 ms, percentile(99%) = 0.110352 ms +[12/27/2023-22:36:16] [I] GPU Compute Time: min = 14.1544 ms, max = 19.0962 ms, mean = 15.1363 ms, median = 15.0176 ms, percentile(90%) = 15.9209 ms, percentile(95%) = 16.1016 ms, percentile(99%) = 16.9819 ms +[12/27/2023-22:36:16] [I] D2H Latency: min = 0.00292969 ms, max = 0.0537109 ms, mean = 0.0186334 ms, median = 0.0205078 ms, percentile(90%) = 0.0263672 ms, percentile(95%) = 0.0283203 ms, percentile(99%) = 0.0332031 ms +[12/27/2023-22:36:16] [I] Total Host Walltime: 15.0224 s +[12/27/2023-22:36:16] [I] Total GPU Compute Time: 14.879 s +[12/27/2023-22:36:16] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/27/2023-22:36:16] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp32.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp32.onnx.fp16.engine diff --git a/yolo_nas_pose_s_fp32.onnx.int8.engine b/yolo_nas_pose_s_fp32.onnx.int8.engine new file mode 100644 index 0000000000000000000000000000000000000000..82e30a099f64ff094f634e12ca0a599a5d231eb5 --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.int8.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00495785abd2b7cc73a782844279735de3541225e84d4b1516c3c4d439f1e3f1 +size 17760505 diff --git a/yolo_nas_pose_s_fp32.onnx.int8.engine.err b/yolo_nas_pose_s_fp32.onnx.int8.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..6ba854d3919e9a4c41a24060747c74cdd5c569ad --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.int8.engine.err @@ -0,0 +1,166 @@ +[12/28/2023-00:03:40] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-00:03:40] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-00:03:41] [W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool. +[12/28/2023-00:06:40] [W] [TRT] Tactic Device request: 3160MB Available: 3073MB. Device memory is insufficient to use tactic. +[12/28/2023-00:06:40] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3160 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:06:41] [W] [TRT] Tactic Device request: 3160MB Available: 3076MB. Device memory is insufficient to use tactic. +[12/28/2023-00:06:41] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3160 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:06:41] [W] [TRT] Tactic Device request: 3160MB Available: 3076MB. Device memory is insufficient to use tactic. +[12/28/2023-00:06:41] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3160 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:10:28] [W] [TRT] Tactic Device request: 3143MB Available: 2841MB. Device memory is insufficient to use tactic. +[12/28/2023-00:10:28] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3143 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:10:28] [W] [TRT] Tactic Device request: 3143MB Available: 2841MB. Device memory is insufficient to use tactic. +[12/28/2023-00:10:28] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3143 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:10:28] [W] [TRT] Tactic Device request: 3143MB Available: 2841MB. Device memory is insufficient to use tactic. +[12/28/2023-00:10:28] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3143 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:10:31] [W] [TRT] Tactic Device request: 4711MB Available: 2850MB. Device memory is insufficient to use tactic. +[12/28/2023-00:10:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:10:31] [W] [TRT] Tactic Device request: 4711MB Available: 2850MB. Device memory is insufficient to use tactic. +[12/28/2023-00:10:31] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 4711 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:10:31] [W] [TRT] Tactic Device request: 4711MB Available: 2850MB. Device memory is insufficient to use tactic. +[12/28/2023-00:10:31] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 4711 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:10:46] [W] [TRT] Tactic Device request: 3152MB Available: 2848MB. Device memory is insufficient to use tactic. +[12/28/2023-00:10:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:10:46] [W] [TRT] Tactic Device request: 3152MB Available: 2848MB. Device memory is insufficient to use tactic. +[12/28/2023-00:10:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3152 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:10:46] [W] [TRT] Tactic Device request: 3152MB Available: 2848MB. Device memory is insufficient to use tactic. +[12/28/2023-00:10:46] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3152 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:15:43] [W] [TRT] Tactic Device request: 3144MB Available: 2724MB. Device memory is insufficient to use tactic. +[12/28/2023-00:15:44] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:15:44] [W] [TRT] Tactic Device request: 3144MB Available: 2724MB. Device memory is insufficient to use tactic. +[12/28/2023-00:15:44] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3144 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:15:44] [W] [TRT] Tactic Device request: 3144MB Available: 2724MB. Device memory is insufficient to use tactic. +[12/28/2023-00:15:44] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3144 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:15:46] [W] [TRT] Tactic Device request: 7056MB Available: 2726MB. Device memory is insufficient to use tactic. +[12/28/2023-00:15:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:15:46] [W] [TRT] Tactic Device request: 7056MB Available: 2726MB. Device memory is insufficient to use tactic. +[12/28/2023-00:15:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 7056 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:15:46] [W] [TRT] Tactic Device request: 7056MB Available: 2726MB. Device memory is insufficient to use tactic. +[12/28/2023-00:15:46] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 7056 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:23:28] [W] [TRT] Tactic Device request: 2385MB Available: 2290MB. Device memory is insufficient to use tactic. +[12/28/2023-00:23:28] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:23:28] [W] [TRT] Tactic Device request: 2385MB Available: 2289MB. Device memory is insufficient to use tactic. +[12/28/2023-00:23:28] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:23:28] [W] [TRT] Tactic Device request: 2385MB Available: 2289MB. Device memory is insufficient to use tactic. +[12/28/2023-00:23:28] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:23:30] [W] [TRT] Tactic Device request: 2394MB Available: 2289MB. Device memory is insufficient to use tactic. +[12/28/2023-00:23:30] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:23:30] [W] [TRT] Tactic Device request: 2394MB Available: 2289MB. Device memory is insufficient to use tactic. +[12/28/2023-00:23:30] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2394 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:23:30] [W] [TRT] Tactic Device request: 2394MB Available: 2289MB. Device memory is insufficient to use tactic. +[12/28/2023-00:23:30] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2394 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:23:31] [W] [TRT] Tactic Device request: 2392MB Available: 2289MB. Device memory is insufficient to use tactic. +[12/28/2023-00:23:31] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:23:31] [W] [TRT] Tactic Device request: 2392MB Available: 2289MB. Device memory is insufficient to use tactic. +[12/28/2023-00:23:31] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2392 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:23:31] [W] [TRT] Tactic Device request: 2392MB Available: 2289MB. Device memory is insufficient to use tactic. +[12/28/2023-00:23:31] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2392 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:23:43] [W] [TRT] Tactic Device request: 2457MB Available: 2266MB. Device memory is insufficient to use tactic. +[12/28/2023-00:23:43] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:23:43] [W] [TRT] Tactic Device request: 2457MB Available: 2266MB. Device memory is insufficient to use tactic. +[12/28/2023-00:23:43] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:23:43] [W] [TRT] Tactic Device request: 2457MB Available: 2266MB. Device memory is insufficient to use tactic. +[12/28/2023-00:23:44] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:27:35] [W] [TRT] Tactic Device request: 2454MB Available: 2202MB. Device memory is insufficient to use tactic. +[12/28/2023-00:27:35] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:27:35] [W] [TRT] Tactic Device request: 2454MB Available: 2202MB. Device memory is insufficient to use tactic. +[12/28/2023-00:27:35] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2454 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:27:35] [W] [TRT] Tactic Device request: 2454MB Available: 2202MB. Device memory is insufficient to use tactic. +[12/28/2023-00:27:35] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2454 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:27:37] [W] [TRT] Tactic Device request: 2457MB Available: 2203MB. Device memory is insufficient to use tactic. +[12/28/2023-00:27:37] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:27:38] [W] [TRT] Tactic Device request: 2457MB Available: 2203MB. Device memory is insufficient to use tactic. +[12/28/2023-00:27:38] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2457 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:27:38] [W] [TRT] Tactic Device request: 2457MB Available: 2203MB. Device memory is insufficient to use tactic. +[12/28/2023-00:27:38] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2457 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:27:57] [W] [TRT] Tactic Device request: 3587MB Available: 2188MB. Device memory is insufficient to use tactic. +[12/28/2023-00:27:57] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:27:57] [W] [TRT] Tactic Device request: 3587MB Available: 2188MB. Device memory is insufficient to use tactic. +[12/28/2023-00:27:57] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3587 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:27:57] [W] [TRT] Tactic Device request: 3587MB Available: 2189MB. Device memory is insufficient to use tactic. +[12/28/2023-00:27:57] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3587 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:32:03] [W] [TRT] Tactic Device request: 3556MB Available: 2112MB. Device memory is insufficient to use tactic. +[12/28/2023-00:32:03] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:32:03] [W] [TRT] Tactic Device request: 3556MB Available: 2112MB. Device memory is insufficient to use tactic. +[12/28/2023-00:32:03] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 3556 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:32:03] [W] [TRT] Tactic Device request: 3556MB Available: 2112MB. Device memory is insufficient to use tactic. +[12/28/2023-00:32:03] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 3556 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:39:46] [W] [TRT] Tactic Device request: 2385MB Available: 2093MB. Device memory is insufficient to use tactic. +[12/28/2023-00:39:46] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:39:46] [W] [TRT] Tactic Device request: 2385MB Available: 2093MB. Device memory is insufficient to use tactic. +[12/28/2023-00:39:46] [W] [TRT] Skipping tactic 8 due to insufficient memory on requested size of 2385 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:39:46] [W] [TRT] Tactic Device request: 2385MB Available: 2094MB. Device memory is insufficient to use tactic. +[12/28/2023-00:39:46] [W] [TRT] Skipping tactic 13 due to insufficient memory on requested size of 2385 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:40:07] [W] [TRT] Tactic Device request: 2126MB Available: 2093MB. Device memory is insufficient to use tactic. +[12/28/2023-00:40:07] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2126 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:40:07] [W] [TRT] Tactic Device request: 2126MB Available: 2093MB. Device memory is insufficient to use tactic. +[12/28/2023-00:40:07] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2126 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:40:11] [W] [TRT] Tactic Device request: 2125MB Available: 2092MB. Device memory is insufficient to use tactic. +[12/28/2023-00:40:11] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:40:12] [W] [TRT] Tactic Device request: 2125MB Available: 2092MB. Device memory is insufficient to use tactic. +[12/28/2023-00:40:12] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2125 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:40:12] [W] [TRT] Tactic Device request: 2125MB Available: 2093MB. Device memory is insufficient to use tactic. +[12/28/2023-00:40:12] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:40:22] [W] [TRT] Tactic Device request: 2125MB Available: 2094MB. Device memory is insufficient to use tactic. +[12/28/2023-00:40:22] [W] [TRT] Skipping tactic 3 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000004. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:40:23] [W] [TRT] Tactic Device request: 2125MB Available: 2093MB. Device memory is insufficient to use tactic. +[12/28/2023-00:40:23] [W] [TRT] Skipping tactic 9 due to insufficient memory on requested size of 2125 detected for tactic 0x000000000000003c. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:40:23] [W] [TRT] Tactic Device request: 2125MB Available: 2094MB. Device memory is insufficient to use tactic. +[12/28/2023-00:40:23] [W] [TRT] Skipping tactic 15 due to insufficient memory on requested size of 2125 detected for tactic 0x0000000000000074. +Try decreasing the workspace size with IBuilderConfig::setMemoryPoolLimit(). +[12/28/2023-00:45:25] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-00:45:25] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-00:45:25] [W] * GPU compute time is unstable, with coefficient of variance = 4.17057%. +[12/28/2023-00:45:25] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_s_fp32.onnx.int8.engine.log b/yolo_nas_pose_s_fp32.onnx.int8.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..b6b3183d3e86773c0dcdb73d63708bbf71256656 --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.int8.engine.log @@ -0,0 +1,302 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp32.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp32.onnx.int8.engine +[12/28/2023-00:03:31] [I] === Model Options === +[12/28/2023-00:03:31] [I] Format: ONNX +[12/28/2023-00:03:31] [I] Model: yolo_nas_pose_s_fp32.onnx +[12/28/2023-00:03:31] [I] Output: +[12/28/2023-00:03:31] [I] === Build Options === +[12/28/2023-00:03:31] [I] Max batch: explicit batch +[12/28/2023-00:03:31] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-00:03:31] [I] minTiming: 1 +[12/28/2023-00:03:31] [I] avgTiming: 8 +[12/28/2023-00:03:31] [I] Precision: FP32+INT8 +[12/28/2023-00:03:31] [I] LayerPrecisions: +[12/28/2023-00:03:31] [I] Calibration: Dynamic +[12/28/2023-00:03:31] [I] Refit: Disabled +[12/28/2023-00:03:31] [I] Sparsity: Disabled +[12/28/2023-00:03:31] [I] Safe mode: Disabled +[12/28/2023-00:03:31] [I] DirectIO mode: Disabled +[12/28/2023-00:03:31] [I] Restricted mode: Disabled +[12/28/2023-00:03:31] [I] Build only: Disabled +[12/28/2023-00:03:31] [I] Save engine: yolo_nas_pose_s_fp32.onnx.int8.engine +[12/28/2023-00:03:31] [I] Load engine: +[12/28/2023-00:03:31] [I] Profiling verbosity: 0 +[12/28/2023-00:03:31] [I] Tactic sources: Using default tactic sources +[12/28/2023-00:03:31] [I] timingCacheMode: local +[12/28/2023-00:03:31] [I] timingCacheFile: +[12/28/2023-00:03:31] [I] Heuristic: Disabled +[12/28/2023-00:03:31] [I] Preview Features: Use default preview flags. +[12/28/2023-00:03:31] [I] Input(s)s format: fp32:CHW +[12/28/2023-00:03:31] [I] Output(s)s format: fp32:CHW +[12/28/2023-00:03:31] [I] Input build shapes: model +[12/28/2023-00:03:31] [I] Input calibration shapes: model +[12/28/2023-00:03:31] [I] === System Options === +[12/28/2023-00:03:31] [I] Device: 0 +[12/28/2023-00:03:31] [I] DLACore: +[12/28/2023-00:03:31] [I] Plugins: +[12/28/2023-00:03:31] [I] === Inference Options === +[12/28/2023-00:03:31] [I] Batch: Explicit +[12/28/2023-00:03:31] [I] Input inference shapes: model +[12/28/2023-00:03:31] [I] Iterations: 10 +[12/28/2023-00:03:31] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-00:03:31] [I] Sleep time: 0ms +[12/28/2023-00:03:31] [I] Idle time: 0ms +[12/28/2023-00:03:31] [I] Streams: 1 +[12/28/2023-00:03:31] [I] ExposeDMA: Disabled +[12/28/2023-00:03:31] [I] Data transfers: Enabled +[12/28/2023-00:03:31] [I] Spin-wait: Disabled +[12/28/2023-00:03:31] [I] Multithreading: Disabled +[12/28/2023-00:03:31] [I] CUDA Graph: Disabled +[12/28/2023-00:03:31] [I] Separate profiling: Disabled +[12/28/2023-00:03:31] [I] Time Deserialize: Disabled +[12/28/2023-00:03:31] [I] Time Refit: Disabled +[12/28/2023-00:03:31] [I] NVTX verbosity: 0 +[12/28/2023-00:03:31] [I] Persistent Cache Ratio: 0 +[12/28/2023-00:03:31] [I] Inputs: +[12/28/2023-00:03:31] [I] === Reporting Options === +[12/28/2023-00:03:31] [I] Verbose: Disabled +[12/28/2023-00:03:31] [I] Averages: 100 inferences +[12/28/2023-00:03:31] [I] Percentiles: 90,95,99 +[12/28/2023-00:03:31] [I] Dump refittable layers:Disabled +[12/28/2023-00:03:31] [I] Dump output: Disabled +[12/28/2023-00:03:31] [I] Profile: Disabled +[12/28/2023-00:03:31] [I] Export timing to JSON file: +[12/28/2023-00:03:31] [I] Export output to JSON file: +[12/28/2023-00:03:31] [I] Export profile to JSON file: +[12/28/2023-00:03:31] [I] +[12/28/2023-00:03:31] [I] === Device Information === +[12/28/2023-00:03:31] [I] Selected Device: Orin +[12/28/2023-00:03:31] [I] Compute Capability: 8.7 +[12/28/2023-00:03:31] [I] SMs: 8 +[12/28/2023-00:03:31] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-00:03:31] [I] Device Global Memory: 7471 MiB +[12/28/2023-00:03:31] [I] Shared Memory per SM: 164 KiB +[12/28/2023-00:03:31] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-00:03:31] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-00:03:31] [I] +[12/28/2023-00:03:31] [I] TensorRT version: 8.5.2 +[12/28/2023-00:03:35] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3004 (MiB) +[12/28/2023-00:03:39] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +285, now: CPU 574, GPU 3310 (MiB) +[12/28/2023-00:03:39] [I] Start parsing network model +[12/28/2023-00:03:40] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-00:03:40] [I] [TRT] Input filename: yolo_nas_pose_s_fp32.onnx +[12/28/2023-00:03:40] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-00:03:40] [I] [TRT] Opset version: 17 +[12/28/2023-00:03:40] [I] [TRT] Producer name: pytorch +[12/28/2023-00:03:40] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-00:03:40] [I] [TRT] Domain: +[12/28/2023-00:03:40] [I] [TRT] Model version: 0 +[12/28/2023-00:03:40] [I] [TRT] Doc string: +[12/28/2023-00:03:40] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-00:03:41] [I] Finish parsing network model +[12/28/2023-00:03:41] [I] FP32 and INT8 precisions have been specified - more performance might be enabled by additionally specifying --fp16 or --best +[12/28/2023-00:03:41] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-00:03:41] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 391) [Constant] +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 392) [Constant] +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 393) [Constant] +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stem/conv/rbr_reparam/Conv + /model/backbone/stem/conv/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/downsample/rbr_reparam/Conv + /model/backbone/stage1/downsample/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv2/conv/Conv + /model/backbone/stage1/blocks/conv2/act/Relu || /model/backbone/stage1/blocks/conv1/conv/Conv + /model/backbone/stage1/blocks/conv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 15) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 23) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage1/blocks/conv3/conv/Conv + /model/backbone/stage1/blocks/conv3/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_skip2/conv/Conv + /model/neck/neck2/reduce_skip2/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/downsample/rbr_reparam/Conv + /model/backbone/stage2/downsample/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/downsample/conv/Conv + /model/neck/neck2/downsample/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv2/conv/Conv + /model/backbone/stage2/blocks/conv2/act/Relu || /model/backbone/stage2/blocks/conv1/conv/Conv + /model/backbone/stage2/blocks/conv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 44) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 52) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 60) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage2/blocks/conv3/conv/Conv + /model/backbone/stage2/blocks/conv3/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip2/conv/Conv + /model/neck/neck1/reduce_skip2/act/Relu || /model/neck/neck2/reduce_skip1/conv/Conv + /model/neck/neck2/reduce_skip1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/downsample/rbr_reparam/Conv + /model/backbone/stage3/downsample/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/downsample/conv/Conv + /model/neck/neck1/downsample/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv2/conv/Conv + /model/backbone/stage3/blocks/conv2/act/Relu || /model/backbone/stage3/blocks/conv1/conv/Conv + /model/backbone/stage3/blocks/conv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 83) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 91) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 99) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 107) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 115) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage3/blocks/conv3/conv/Conv + /model/backbone/stage3/blocks/conv3/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_skip1/conv/Conv + /model/neck/neck1/reduce_skip1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/downsample/rbr_reparam/Conv + /model/backbone/stage4/downsample/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv2/conv/Conv + /model/backbone/stage4/blocks/conv2/act/Relu || /model/backbone/stage4/blocks/conv1/conv/Conv + /model/backbone/stage4/blocks/conv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 134) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 142) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/stage4/blocks/conv3/conv/Conv + /model/backbone/stage4/blocks/conv3/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv1/conv/Conv + /model/backbone/context_module/cv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/cv1/act/Relu_output_0 copy +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/backbone/context_module/cv2/conv/Conv + /model/backbone/context_module/cv2/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/conv/conv/Conv + /model/neck/neck1/conv/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/reduce_after_concat/conv/Conv + /model/neck/neck1/reduce_after_concat/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv2/conv/Conv + /model/neck/neck1/blocks/conv2/act/Relu || /model/neck/neck1/blocks/conv1/conv/Conv + /model/neck/neck1/blocks/conv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 171) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 179) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck1/blocks/conv3/conv/Conv + /model/neck/neck1/blocks/conv3/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/conv/conv/Conv + /model/neck/neck2/conv/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] DECONVOLUTION: /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/reduce_skip1/act/Relu_output_0 copy +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/reduce_after_concat/conv/Conv + /model/neck/neck2/reduce_after_concat/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv2/conv/Conv + /model/neck/neck2/blocks/conv2/act/Relu || /model/neck/neck2/blocks/conv1/conv/Conv + /model/neck/neck2/blocks/conv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 200) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/nonlinearity/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 208) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add_output_0 copy +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck2/blocks/conv3/conv/Conv + /model/neck/neck2/blocks/conv3/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/bbox_stem/seq/conv/Conv + /model/heads/head1/bbox_stem/seq/act/Relu || /model/heads/head1/pose_stem/seq/conv/Conv + /model/heads/head1/pose_stem/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/conv/conv/Conv + /model/neck/neck3/conv/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head1/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head1/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv2/conv/Conv + /model/neck/neck3/blocks/conv2/act/Relu || /model/neck/neck3/blocks/conv1/conv/Conv + /model/neck/neck3/blocks/conv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/cls_pred/Conv +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/reg_pred/Conv +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head1/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head1/pose_pred/Conv +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 239) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 262) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck3/blocks/conv3/conv/Conv + /model/neck/neck3/blocks/conv3/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_stem/seq/conv/Conv + /model/heads/head2/pose_stem/seq/act/Relu || /model/heads/head2/bbox_stem/seq/conv/Conv + /model/heads/head2/bbox_stem/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/conv/conv/Conv + /model/neck/neck4/conv/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head2/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head2/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv2/conv/Conv + /model/neck/neck4/blocks/conv2/act/Relu || /model/neck/neck4/blocks/conv1/conv/Conv + /model/neck/neck4/blocks/conv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/cls_pred/Conv +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/reg_pred/Conv +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head2/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head2/pose_pred/Conv +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 293) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 316) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/conv2/act/Relu_output_0 copy +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/neck/neck4/blocks/conv3/conv/Conv + /model/neck/neck4/blocks/conv3/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/bbox_stem/seq/conv/Conv + /model/heads/head3/bbox_stem/seq/act/Relu || /model/heads/head3/pose_stem/seq/conv/Conv + /model/heads/head3/pose_stem/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv + /model/heads/head3/reg_convs/reg_convs.0/seq/act/Relu || /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv + /model/heads/head3/cls_convs/cls_convs.0/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.0/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/cls_pred/Conv +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/reg_pred/Conv +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.1/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv + /model/heads/head3/pose_convs/pose_convs.2/seq/act/Relu +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/head3/pose_pred/Conv +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] NMS: batched_nms_26 +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 395) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-00:03:41] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-00:03:52] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +357, now: CPU 1179, GPU 3774 (MiB) +[12/28/2023-00:03:54] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +83, GPU +72, now: CPU 1262, GPU 3846 (MiB) +[12/28/2023-00:03:54] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-00:44:55] [I] [TRT] Total Activation Memory: 7904091648 +[12/28/2023-00:44:55] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-00:45:04] [I] [TRT] Total Host Persistent Memory: 285376 +[12/28/2023-00:45:04] [I] [TRT] Total Device Persistent Memory: 77824 +[12/28/2023-00:45:04] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-00:45:04] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 25 MiB, GPU 2396 MiB +[12/28/2023-00:45:04] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 139 steps to complete. +[12/28/2023-00:45:04] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 42.0375ms to assign 13 blocks to 139 nodes requiring 140113920 bytes. +[12/28/2023-00:45:04] [I] [TRT] Total Activation Memory: 140113920 +[12/28/2023-00:45:08] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU -1, now: CPU 1625, GPU 5602 (MiB) +[12/28/2023-00:45:08] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +15, GPU +16, now: CPU 15, GPU 16 (MiB) +[12/28/2023-00:45:08] [I] Engine built in 2496.59 sec. +[12/28/2023-00:45:09] [I] [TRT] Loaded engine size: 16 MiB +[12/28/2023-00:45:09] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +1, GPU +0, now: CPU 1262, GPU 5453 (MiB) +[12/28/2023-00:45:09] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +15, now: CPU 0, GPU 15 (MiB) +[12/28/2023-00:45:09] [I] Engine deserialized in 0.14892 sec. +[12/28/2023-00:45:09] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1262, GPU 5453 (MiB) +[12/28/2023-00:45:09] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +133, now: CPU 0, GPU 148 (MiB) +[12/28/2023-00:45:09] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-00:45:09] [I] Using random values for input onnx::Cast_0 +[12/28/2023-00:45:09] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-00:45:09] [I] Using random values for output graph2_flat_predictions +[12/28/2023-00:45:09] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-00:45:09] [I] Starting inference +[12/28/2023-00:45:25] [I] Warmup completed 2 queries over 200 ms +[12/28/2023-00:45:25] [I] Timing trace has 1297 queries over 15.0254 s +[12/28/2023-00:45:25] [I] +[12/28/2023-00:45:25] [I] === Trace details === +[12/28/2023-00:45:25] [I] Trace averages of 100 runs: +[12/28/2023-00:45:25] [I] Average on 100 runs - GPU latency: 11.1621 ms - Host latency: 11.2749 ms (enqueue 11.2437 ms) +[12/28/2023-00:45:25] [I] Average on 100 runs - GPU latency: 11.0409 ms - Host latency: 11.151 ms (enqueue 11.1179 ms) +[12/28/2023-00:45:25] [I] Average on 100 runs - GPU latency: 10.9223 ms - Host latency: 11.0317 ms (enqueue 10.9957 ms) +[12/28/2023-00:45:25] [I] Average on 100 runs - GPU latency: 11.4173 ms - Host latency: 11.5324 ms (enqueue 11.4842 ms) +[12/28/2023-00:45:25] [I] Average on 100 runs - GPU latency: 11.5522 ms - Host latency: 11.6689 ms (enqueue 11.6133 ms) +[12/28/2023-00:45:25] [I] Average on 100 runs - GPU latency: 11.5825 ms - Host latency: 11.6978 ms (enqueue 11.6486 ms) +[12/28/2023-00:45:25] [I] Average on 100 runs - GPU latency: 11.598 ms - Host latency: 11.7137 ms (enqueue 11.6601 ms) +[12/28/2023-00:45:25] [I] Average on 100 runs - GPU latency: 11.5367 ms - Host latency: 11.6521 ms (enqueue 11.6049 ms) +[12/28/2023-00:45:25] [I] Average on 100 runs - GPU latency: 11.6019 ms - Host latency: 11.7186 ms (enqueue 11.6624 ms) +[12/28/2023-00:45:25] [I] Average on 100 runs - GPU latency: 11.6033 ms - Host latency: 11.7186 ms (enqueue 11.6635 ms) +[12/28/2023-00:45:25] [I] Average on 100 runs - GPU latency: 11.4973 ms - Host latency: 11.6131 ms (enqueue 11.5614 ms) +[12/28/2023-00:45:25] [I] Average on 100 runs - GPU latency: 11.5793 ms - Host latency: 11.6959 ms (enqueue 11.6461 ms) +[12/28/2023-00:45:25] [I] +[12/28/2023-00:45:25] [I] === Performance summary === +[12/28/2023-00:45:25] [I] Throughput: 86.3202 qps +[12/28/2023-00:45:25] [I] Latency: min = 10.6523 ms, max = 16.295 ms, mean = 11.5466 ms, median = 11.6436 ms, percentile(90%) = 11.7739 ms, percentile(95%) = 11.8501 ms, percentile(99%) = 13.1489 ms +[12/28/2023-00:45:25] [I] Enqueue Time: min = 10.6284 ms, max = 16.268 ms, mean = 11.499 ms, median = 11.5928 ms, percentile(90%) = 11.7246 ms, percentile(95%) = 11.8066 ms, percentile(99%) = 12.9248 ms +[12/28/2023-00:45:25] [I] H2D Latency: min = 0.0810547 ms, max = 0.161957 ms, mean = 0.096772 ms, median = 0.0966797 ms, percentile(90%) = 0.0996094 ms, percentile(95%) = 0.0998535 ms, percentile(99%) = 0.109375 ms +[12/28/2023-00:45:25] [I] GPU Compute Time: min = 10.5449 ms, max = 16.1248 ms, mean = 11.4319 ms, median = 11.5303 ms, percentile(90%) = 11.6597 ms, percentile(95%) = 11.7344 ms, percentile(99%) = 13.0488 ms +[12/28/2023-00:45:25] [I] D2H Latency: min = 0.00292969 ms, max = 0.0498047 ms, mean = 0.0179352 ms, median = 0.0175781 ms, percentile(90%) = 0.0244141 ms, percentile(95%) = 0.0253906 ms, percentile(99%) = 0.0400391 ms +[12/28/2023-00:45:25] [I] Total Host Walltime: 15.0254 s +[12/28/2023-00:45:25] [I] Total GPU Compute Time: 14.8272 s +[12/28/2023-00:45:25] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-00:45:25] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_fp32.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_fp32.onnx.int8.engine diff --git a/yolo_nas_pose_s_fp32.onnx.usage.txt b/yolo_nas_pose_s_fp32.onnx.usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..dd9a62298c8c8a3705aa31c75668a7873fcc557c --- /dev/null +++ b/yolo_nas_pose_s_fp32.onnx.usage.txt @@ -0,0 +1,58 @@ + +Model exported successfully to yolo_nas_pose_s_fp32.onnx +Model expects input image of shape [1, 3, 640, 640] +Input image dtype is torch.uint8 + +Exported model already contains preprocessing (normalization) step, so you don't need to do it manually. +Preprocessing steps to be applied to input image are: +Sequential( + (0): CastTensorTo(dtype=torch.float32) + (1): ChannelSelect(channels_indexes=tensor([2, 1, 0])) + (2): ApplyMeanStd(mean=[0.], scale=[255.]) +) + + +Exported model contains postprocessing (NMS) step with the following parameters: + num_pre_nms_predictions=1000 + max_predictions_per_image=10 + nms_threshold=0.5 + confidence_threshold=0.15 + output_predictions_format=flat + + +Exported model is in ONNX format and can be used with ONNXRuntime +To run inference with ONNXRuntime, please use the following code snippet: + + import onnxruntime + import numpy as np + session = onnxruntime.InferenceSession("yolo_nas_pose_s_fp32.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + example_input_image = np.zeros((1, 3, 640, 640)).astype(np.uint8) + predictions = session.run(outputs, {inputs[0]: example_input_image}) + +Exported model can also be used with TensorRT +To run inference with TensorRT, please see TensorRT deployment documentation +You can benchmark the model using the following code snippet: + + trtexec --onnx=yolo_nas_pose_s_fp32.onnx --fp16 --avgRuns=100 --duration=15 + + +Exported model has predictions in flat format: + +# flat_predictions is a 2D array of [N,K] shape +# Each row represents (image_index, x_min, y_min, x_max, y_max, confidence, joints...) +# Please note all values are floats, so you have to convert them to integers if needed + +[flat_predictions] = predictions +pred_bboxes = flat_predictions[:, 1:5] +pred_scores = flat_predictions[:, 5] +pred_joints = flat_predictions[:, 6:].reshape((len(pred_bboxes), -1, 3)) +for i in range(len(pred_bboxes)): + confidence = pred_scores[i] + x_min, y_min, x_max, y_max = pred_bboxes[i] + print(f"Detected pose with confidence={{confidence}}, x_min={{x_min}}, y_min={{y_min}}, x_max={{x_max}}, y_max={{y_max}}") + for joint_index, (x, y, confidence) in enumerate(pred_joints[i]):") + print(f"Joint {{joint_index}} has coordinates x={{x}}, y={{y}}, confidence={{confidence}}") + diff --git a/yolo_nas_pose_s_int8.onnx b/yolo_nas_pose_s_int8.onnx new file mode 100644 index 0000000000000000000000000000000000000000..423f93f48a34d7cb33c4deed638de9c7f4d1fd95 --- /dev/null +++ b/yolo_nas_pose_s_int8.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:371f621790e231135ba258da2f5607d0df9c4505109cd07f4bb1a392dee03da2 +size 62288937 diff --git a/yolo_nas_pose_s_int8.onnx.best.engine b/yolo_nas_pose_s_int8.onnx.best.engine new file mode 100644 index 0000000000000000000000000000000000000000..925cec6792d36d4adc2fe0dfc256e19e763c3378 --- /dev/null +++ b/yolo_nas_pose_s_int8.onnx.best.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19f0e5f7536fe7f1f802536c4f9de7a56072c2f442043fe76c02813262b89a90 +size 18008336 diff --git a/yolo_nas_pose_s_int8.onnx.best.engine.err b/yolo_nas_pose_s_int8.onnx.best.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..3a596b3807c3cfa9aa59eb305a2cf02a25181afa --- /dev/null +++ b/yolo_nas_pose_s_int8.onnx.best.engine.err @@ -0,0 +1,7 @@ +[12/28/2023-03:03:31] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-03:03:31] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-03:03:34] [W] [TRT] Calibrator won't be used in explicit precision mode. Use quantization aware training to generate network with Quantize/Dequantize nodes. +[12/28/2023-03:36:12] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-03:36:12] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-03:36:12] [W] * GPU compute time is unstable, with coefficient of variance = 3.47567%. +[12/28/2023-03:36:12] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_s_int8.onnx.best.engine.log b/yolo_nas_pose_s_int8.onnx.best.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..460f9f253d30528b24063d2a3f5f970208f0a27f --- /dev/null +++ b/yolo_nas_pose_s_int8.onnx.best.engine.log @@ -0,0 +1,323 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_int8.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_int8.onnx.best.engine +[12/28/2023-03:03:27] [I] === Model Options === +[12/28/2023-03:03:27] [I] Format: ONNX +[12/28/2023-03:03:27] [I] Model: yolo_nas_pose_s_int8.onnx +[12/28/2023-03:03:27] [I] Output: +[12/28/2023-03:03:27] [I] === Build Options === +[12/28/2023-03:03:27] [I] Max batch: explicit batch +[12/28/2023-03:03:27] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-03:03:27] [I] minTiming: 1 +[12/28/2023-03:03:27] [I] avgTiming: 8 +[12/28/2023-03:03:27] [I] Precision: FP32+FP16+INT8 +[12/28/2023-03:03:27] [I] LayerPrecisions: +[12/28/2023-03:03:27] [I] Calibration: Dynamic +[12/28/2023-03:03:27] [I] Refit: Disabled +[12/28/2023-03:03:27] [I] Sparsity: Disabled +[12/28/2023-03:03:27] [I] Safe mode: Disabled +[12/28/2023-03:03:27] [I] DirectIO mode: Disabled +[12/28/2023-03:03:27] [I] Restricted mode: Disabled +[12/28/2023-03:03:27] [I] Build only: Disabled +[12/28/2023-03:03:27] [I] Save engine: yolo_nas_pose_s_int8.onnx.best.engine +[12/28/2023-03:03:27] [I] Load engine: +[12/28/2023-03:03:27] [I] Profiling verbosity: 0 +[12/28/2023-03:03:27] [I] Tactic sources: Using default tactic sources +[12/28/2023-03:03:27] [I] timingCacheMode: local +[12/28/2023-03:03:27] [I] timingCacheFile: +[12/28/2023-03:03:27] [I] Heuristic: Disabled +[12/28/2023-03:03:27] [I] Preview Features: Use default preview flags. +[12/28/2023-03:03:27] [I] Input(s)s format: fp32:CHW +[12/28/2023-03:03:27] [I] Output(s)s format: fp32:CHW +[12/28/2023-03:03:27] [I] Input build shapes: model +[12/28/2023-03:03:27] [I] Input calibration shapes: model +[12/28/2023-03:03:27] [I] === System Options === +[12/28/2023-03:03:27] [I] Device: 0 +[12/28/2023-03:03:27] [I] DLACore: +[12/28/2023-03:03:27] [I] Plugins: +[12/28/2023-03:03:27] [I] === Inference Options === +[12/28/2023-03:03:27] [I] Batch: Explicit +[12/28/2023-03:03:27] [I] Input inference shapes: model +[12/28/2023-03:03:27] [I] Iterations: 10 +[12/28/2023-03:03:27] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-03:03:27] [I] Sleep time: 0ms +[12/28/2023-03:03:27] [I] Idle time: 0ms +[12/28/2023-03:03:27] [I] Streams: 1 +[12/28/2023-03:03:27] [I] ExposeDMA: Disabled +[12/28/2023-03:03:27] [I] Data transfers: Enabled +[12/28/2023-03:03:27] [I] Spin-wait: Disabled +[12/28/2023-03:03:27] [I] Multithreading: Disabled +[12/28/2023-03:03:27] [I] CUDA Graph: Disabled +[12/28/2023-03:03:27] [I] Separate profiling: Disabled +[12/28/2023-03:03:27] [I] Time Deserialize: Disabled +[12/28/2023-03:03:27] [I] Time Refit: Disabled +[12/28/2023-03:03:27] [I] NVTX verbosity: 0 +[12/28/2023-03:03:27] [I] Persistent Cache Ratio: 0 +[12/28/2023-03:03:27] [I] Inputs: +[12/28/2023-03:03:27] [I] === Reporting Options === +[12/28/2023-03:03:27] [I] Verbose: Disabled +[12/28/2023-03:03:27] [I] Averages: 100 inferences +[12/28/2023-03:03:27] [I] Percentiles: 90,95,99 +[12/28/2023-03:03:27] [I] Dump refittable layers:Disabled +[12/28/2023-03:03:27] [I] Dump output: Disabled +[12/28/2023-03:03:27] [I] Profile: Disabled +[12/28/2023-03:03:27] [I] Export timing to JSON file: +[12/28/2023-03:03:27] [I] Export output to JSON file: +[12/28/2023-03:03:27] [I] Export profile to JSON file: +[12/28/2023-03:03:27] [I] +[12/28/2023-03:03:27] [I] === Device Information === +[12/28/2023-03:03:27] [I] Selected Device: Orin +[12/28/2023-03:03:27] [I] Compute Capability: 8.7 +[12/28/2023-03:03:27] [I] SMs: 8 +[12/28/2023-03:03:27] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-03:03:27] [I] Device Global Memory: 7471 MiB +[12/28/2023-03:03:27] [I] Shared Memory per SM: 164 KiB +[12/28/2023-03:03:27] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-03:03:27] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-03:03:27] [I] +[12/28/2023-03:03:27] [I] TensorRT version: 8.5.2 +[12/28/2023-03:03:28] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3018 (MiB) +[12/28/2023-03:03:31] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +284, now: CPU 574, GPU 3324 (MiB) +[12/28/2023-03:03:31] [I] Start parsing network model +[12/28/2023-03:03:31] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-03:03:31] [I] [TRT] Input filename: yolo_nas_pose_s_int8.onnx +[12/28/2023-03:03:31] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-03:03:31] [I] [TRT] Opset version: 17 +[12/28/2023-03:03:31] [I] [TRT] Producer name: pytorch +[12/28/2023-03:03:31] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-03:03:31] [I] [TRT] Domain: +[12/28/2023-03:03:31] [I] [TRT] Model version: 0 +[12/28/2023-03:03:31] [I] [TRT] Doc string: +[12/28/2023-03:03:31] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-03:03:34] [I] Finish parsing network model +[12/28/2023-03:03:38] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-03:03:38] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1228) [Constant] +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1229) [Constant] +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1230) [Constant] +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stem/conv/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stem.conv.rbr_reparam.weight + /model/backbone/stem/conv/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stem/conv/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.downsample.rbr_reparam.weight + /model/backbone/stage1/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/downsample/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv2.conv.weight + /model/backbone/stage1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv1.conv.weight + /model/backbone/stage1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 494) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 510) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv3.conv.weight + /model/backbone/stage1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv3/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_skip2.conv.weight + /model/neck/neck2/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.downsample.rbr_reparam.weight + /model/backbone/stage2/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/downsample/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.downsample.conv.weight + /model/neck/neck2/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/downsample/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv2.conv.weight + /model/backbone/stage2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv1.conv.weight + /model/backbone/stage2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 557) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 573) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 589) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv3.conv.weight + /model/backbone/stage2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv3/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip2.conv.weight + /model/neck/neck1/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip2/conv/Conv || model.neck.neck2.reduce_skip1.conv.weight + /model/neck/neck2/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.downsample.rbr_reparam.weight + /model/backbone/stage3/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/downsample/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.downsample.conv.weight + /model/neck/neck1/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/downsample/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv2.conv.weight + /model/backbone/stage3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv1.conv.weight + /model/backbone/stage3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 639) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 655) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 671) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 687) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.4.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.4.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 703) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv3.conv.weight + /model/backbone/stage3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv3/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip1.conv.weight + /model/neck/neck1/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.downsample.rbr_reparam.weight + /model/backbone/stage4/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/downsample/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv2.conv.weight + /model/backbone/stage4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv1.conv.weight + /model/backbone/stage4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 744) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 760) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv3.conv.weight + /model/backbone/stage4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv3/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv1.conv.weight + /model/backbone/context_module/cv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/m.2/MaxPool_output_0 copy +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv2.conv.weight + /model/backbone/context_module/cv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.conv.conv.weight + /model/neck/neck1/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/conv/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck1.upsample.weight + /model/neck/neck1/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_after_concat.conv.weight + /model/neck/neck1/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_after_concat/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv2.conv.weight + /model/neck/neck1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv1.conv.weight + /model/neck/neck1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 825) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 841) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv3.conv.weight + /model/neck/neck1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv3/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.conv.conv.weight + /model/neck/neck2/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/conv/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/upsample/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck2.upsample.weight + /model/neck/neck2/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/Concat_/model/neck/neck2/reduce_skip1/act/Relu_output_0_clone_1 copy +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_after_concat.conv.weight + /model/neck/neck2/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_after_concat/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv2.conv.weight + /model/neck/neck2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv1.conv.weight + /model/neck/neck2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 890) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 906) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/Concat_/model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add_output_0_clone_0 copy +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv3.conv.weight + /model/neck/neck2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv3/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.bbox_stem.seq.conv.weight + /model/heads/head1/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/bbox_stem/seq/conv/Conv || model.heads.head1.pose_stem.seq.conv.weight + /model/heads/head1/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_stem/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.conv.conv.weight + /model/neck/neck3/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/conv/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_convs.0.seq.conv.weight + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head1.cls_convs.0.seq.conv.weight + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.0.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv1/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.cls_pred.weight + /model/heads/head1/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_pred/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_pred.weight + /model/heads/head1/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_pred/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.1.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv2.conv.weight + /model/neck/neck3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv1.conv.weight + /model/neck/neck3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_pred.weight + /model/heads/head1/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_pred/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 972) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1013) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv3.conv.weight + /model/neck/neck3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv3/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_stem.seq.conv.weight + /model/heads/head2/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_stem/seq/conv/Conv || model.heads.head2.bbox_stem.seq.conv.weight + /model/heads/head2/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/bbox_stem/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.conv.conv.weight + /model/neck/neck4/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/conv/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_convs.0.seq.conv.weight + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head2.cls_convs.0.seq.conv.weight + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.0.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv2.conv.weight + /model/neck/neck4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv1.conv.weight + /model/neck/neck4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.cls_pred.weight + /model/heads/head2/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_pred/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_pred.weight + /model/heads/head2/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_pred/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.1.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_pred.weight + /model/heads/head2/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_pred/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 1078) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1119) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv3.conv.weight + /model/neck/neck4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv3/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.bbox_stem.seq.conv.weight + /model/heads/head3/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/bbox_stem/seq/conv/Conv || model.heads.head3.pose_stem.seq.conv.weight + /model/heads/head3/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_stem/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_convs.0.seq.conv.weight + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head3.cls_convs.0.seq.conv.weight + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.0.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.cls_pred.weight + /model/heads/head3/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_pred/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_pred.weight + /model/heads/head3/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_pred/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.1.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.2.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_pred.weight + /model/heads/head3/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_pred/Conv +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] NMS: batched_nms_243 +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 1232) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-03:03:38] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-03:03:51] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +341, now: CPU 1233, GPU 3794 (MiB) +[12/28/2023-03:03:53] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +76, now: CPU 1315, GPU 3870 (MiB) +[12/28/2023-03:03:53] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-03:35:46] [I] [TRT] Total Activation Memory: 7917157888 +[12/28/2023-03:35:46] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-03:35:53] [I] [TRT] Total Host Persistent Memory: 308256 +[12/28/2023-03:35:53] [I] [TRT] Total Device Persistent Memory: 38912 +[12/28/2023-03:35:53] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-03:35:53] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 74 MiB, GPU 154 MiB +[12/28/2023-03:35:53] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 164 steps to complete. +[12/28/2023-03:35:53] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 59.5318ms to assign 13 blocks to 164 nodes requiring 142029824 bytes. +[12/28/2023-03:35:53] [I] [TRT] Total Activation Memory: 142029824 +[12/28/2023-03:35:56] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1667, GPU 5515 (MiB) +[12/28/2023-03:35:56] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +15, GPU +16, now: CPU 15, GPU 16 (MiB) +[12/28/2023-03:35:56] [I] Engine built in 1949.31 sec. +[12/28/2023-03:35:57] [I] [TRT] Loaded engine size: 17 MiB +[12/28/2023-03:35:57] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1252, GPU 5486 (MiB) +[12/28/2023-03:35:57] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +15, now: CPU 0, GPU 15 (MiB) +[12/28/2023-03:35:57] [I] Engine deserialized in 0.208393 sec. +[12/28/2023-03:35:57] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 1252, GPU 5486 (MiB) +[12/28/2023-03:35:57] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +135, now: CPU 0, GPU 150 (MiB) +[12/28/2023-03:35:57] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-03:35:57] [I] Using random values for input onnx::Cast_0 +[12/28/2023-03:35:57] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-03:35:57] [I] Using random values for output graph2_flat_predictions +[12/28/2023-03:35:57] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-03:35:57] [I] Starting inference +[12/28/2023-03:36:12] [I] Warmup completed 11 queries over 200 ms +[12/28/2023-03:36:12] [I] Timing trace has 1116 queries over 15.0304 s +[12/28/2023-03:36:12] [I] +[12/28/2023-03:36:12] [I] === Trace details === +[12/28/2023-03:36:12] [I] Trace averages of 100 runs: +[12/28/2023-03:36:12] [I] Average on 100 runs - GPU latency: 13.3509 ms - Host latency: 13.4657 ms (enqueue 13.4291 ms) +[12/28/2023-03:36:12] [I] Average on 100 runs - GPU latency: 13.6114 ms - Host latency: 13.7267 ms (enqueue 13.6814 ms) +[12/28/2023-03:36:12] [I] Average on 100 runs - GPU latency: 13.2966 ms - Host latency: 13.4103 ms (enqueue 13.3777 ms) +[12/28/2023-03:36:12] [I] Average on 100 runs - GPU latency: 13.3229 ms - Host latency: 13.4369 ms (enqueue 13.4032 ms) +[12/28/2023-03:36:12] [I] Average on 100 runs - GPU latency: 13.3265 ms - Host latency: 13.4397 ms (enqueue 13.4066 ms) +[12/28/2023-03:36:12] [I] Average on 100 runs - GPU latency: 13.3048 ms - Host latency: 13.4183 ms (enqueue 13.3859 ms) +[12/28/2023-03:36:12] [I] Average on 100 runs - GPU latency: 13.2897 ms - Host latency: 13.4033 ms (enqueue 13.375 ms) +[12/28/2023-03:36:12] [I] Average on 100 runs - GPU latency: 13.2914 ms - Host latency: 13.4051 ms (enqueue 13.3713 ms) +[12/28/2023-03:36:12] [I] Average on 100 runs - GPU latency: 13.2786 ms - Host latency: 13.3924 ms (enqueue 13.3595 ms) +[12/28/2023-03:36:12] [I] Average on 100 runs - GPU latency: 13.2361 ms - Host latency: 13.3498 ms (enqueue 13.3173 ms) +[12/28/2023-03:36:12] [I] Average on 100 runs - GPU latency: 13.2792 ms - Host latency: 13.3929 ms (enqueue 13.3575 ms) +[12/28/2023-03:36:12] [I] +[12/28/2023-03:36:12] [I] === Performance summary === +[12/28/2023-03:36:12] [I] Throughput: 74.2494 qps +[12/28/2023-03:36:12] [I] Latency: min = 12.5967 ms, max = 18.459 ms, mean = 13.4387 ms, median = 13.2627 ms, percentile(90%) = 13.9451 ms, percentile(95%) = 14.0898 ms, percentile(99%) = 15.3137 ms +[12/28/2023-03:36:12] [I] Enqueue Time: min = 12.5688 ms, max = 18.406 ms, mean = 13.4045 ms, median = 13.2305 ms, percentile(90%) = 13.9082 ms, percentile(95%) = 14.0646 ms, percentile(99%) = 14.8955 ms +[12/28/2023-03:36:12] [I] H2D Latency: min = 0.0820312 ms, max = 0.120605 ms, mean = 0.100692 ms, median = 0.101074 ms, percentile(90%) = 0.102295 ms, percentile(95%) = 0.102539 ms, percentile(99%) = 0.113281 ms +[12/28/2023-03:36:12] [I] GPU Compute Time: min = 12.4849 ms, max = 18.3318 ms, mean = 13.3247 ms, median = 13.1485 ms, percentile(90%) = 13.8306 ms, percentile(95%) = 13.973 ms, percentile(99%) = 15.1929 ms +[12/28/2023-03:36:12] [I] D2H Latency: min = 0.00292969 ms, max = 0.0505371 ms, mean = 0.0132362 ms, median = 0.0117188 ms, percentile(90%) = 0.0186768 ms, percentile(95%) = 0.0222168 ms, percentile(99%) = 0.0280762 ms +[12/28/2023-03:36:12] [I] Total Host Walltime: 15.0304 s +[12/28/2023-03:36:12] [I] Total GPU Compute Time: 14.8704 s +[12/28/2023-03:36:12] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-03:36:12] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_int8.onnx --best --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_int8.onnx.best.engine diff --git a/yolo_nas_pose_s_int8.onnx.engine.err b/yolo_nas_pose_s_int8.onnx.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..b743c66ec07f96d293adbe78044576ba95d54aa1 --- /dev/null +++ b/yolo_nas_pose_s_int8.onnx.engine.err @@ -0,0 +1,8 @@ +[12/28/2023-03:03:16] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-03:03:16] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-03:03:19] [E] Error[4]: [network.cpp::validate::2922] Error Code 4: Internal Error (Int8 precision has been set for a layer or layer output, but int8 is not configured in the builder) +[12/28/2023-03:03:19] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/28/2023-03:03:19] [E] Engine could not be created from network +[12/28/2023-03:03:19] [E] Building engine failed +[12/28/2023-03:03:19] [E] Failed to create engine from model or file. +[12/28/2023-03:03:19] [E] Engine set up failed diff --git a/yolo_nas_pose_s_int8.onnx.engine.log b/yolo_nas_pose_s_int8.onnx.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..fbb8f9deb22ed614aba257abb704e1c05e5f5d7e --- /dev/null +++ b/yolo_nas_pose_s_int8.onnx.engine.log @@ -0,0 +1,91 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_int8.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_int8.onnx.engine +[12/28/2023-03:03:12] [I] === Model Options === +[12/28/2023-03:03:12] [I] Format: ONNX +[12/28/2023-03:03:12] [I] Model: yolo_nas_pose_s_int8.onnx +[12/28/2023-03:03:12] [I] Output: +[12/28/2023-03:03:12] [I] === Build Options === +[12/28/2023-03:03:12] [I] Max batch: explicit batch +[12/28/2023-03:03:12] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-03:03:12] [I] minTiming: 1 +[12/28/2023-03:03:12] [I] avgTiming: 8 +[12/28/2023-03:03:12] [I] Precision: FP32 +[12/28/2023-03:03:12] [I] LayerPrecisions: +[12/28/2023-03:03:12] [I] Calibration: +[12/28/2023-03:03:12] [I] Refit: Disabled +[12/28/2023-03:03:12] [I] Sparsity: Disabled +[12/28/2023-03:03:12] [I] Safe mode: Disabled +[12/28/2023-03:03:12] [I] DirectIO mode: Disabled +[12/28/2023-03:03:12] [I] Restricted mode: Disabled +[12/28/2023-03:03:12] [I] Build only: Disabled +[12/28/2023-03:03:12] [I] Save engine: yolo_nas_pose_s_int8.onnx.engine +[12/28/2023-03:03:12] [I] Load engine: +[12/28/2023-03:03:12] [I] Profiling verbosity: 0 +[12/28/2023-03:03:12] [I] Tactic sources: Using default tactic sources +[12/28/2023-03:03:12] [I] timingCacheMode: local +[12/28/2023-03:03:12] [I] timingCacheFile: +[12/28/2023-03:03:12] [I] Heuristic: Disabled +[12/28/2023-03:03:12] [I] Preview Features: Use default preview flags. +[12/28/2023-03:03:12] [I] Input(s)s format: fp32:CHW +[12/28/2023-03:03:12] [I] Output(s)s format: fp32:CHW +[12/28/2023-03:03:12] [I] Input build shapes: model +[12/28/2023-03:03:12] [I] Input calibration shapes: model +[12/28/2023-03:03:12] [I] === System Options === +[12/28/2023-03:03:12] [I] Device: 0 +[12/28/2023-03:03:12] [I] DLACore: +[12/28/2023-03:03:12] [I] Plugins: +[12/28/2023-03:03:12] [I] === Inference Options === +[12/28/2023-03:03:12] [I] Batch: Explicit +[12/28/2023-03:03:12] [I] Input inference shapes: model +[12/28/2023-03:03:12] [I] Iterations: 10 +[12/28/2023-03:03:12] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-03:03:12] [I] Sleep time: 0ms +[12/28/2023-03:03:12] [I] Idle time: 0ms +[12/28/2023-03:03:12] [I] Streams: 1 +[12/28/2023-03:03:12] [I] ExposeDMA: Disabled +[12/28/2023-03:03:12] [I] Data transfers: Enabled +[12/28/2023-03:03:12] [I] Spin-wait: Disabled +[12/28/2023-03:03:12] [I] Multithreading: Disabled +[12/28/2023-03:03:12] [I] CUDA Graph: Disabled +[12/28/2023-03:03:12] [I] Separate profiling: Disabled +[12/28/2023-03:03:12] [I] Time Deserialize: Disabled +[12/28/2023-03:03:12] [I] Time Refit: Disabled +[12/28/2023-03:03:12] [I] NVTX verbosity: 0 +[12/28/2023-03:03:12] [I] Persistent Cache Ratio: 0 +[12/28/2023-03:03:12] [I] Inputs: +[12/28/2023-03:03:12] [I] === Reporting Options === +[12/28/2023-03:03:12] [I] Verbose: Disabled +[12/28/2023-03:03:12] [I] Averages: 100 inferences +[12/28/2023-03:03:12] [I] Percentiles: 90,95,99 +[12/28/2023-03:03:12] [I] Dump refittable layers:Disabled +[12/28/2023-03:03:12] [I] Dump output: Disabled +[12/28/2023-03:03:12] [I] Profile: Disabled +[12/28/2023-03:03:12] [I] Export timing to JSON file: +[12/28/2023-03:03:12] [I] Export output to JSON file: +[12/28/2023-03:03:12] [I] Export profile to JSON file: +[12/28/2023-03:03:12] [I] +[12/28/2023-03:03:12] [I] === Device Information === +[12/28/2023-03:03:12] [I] Selected Device: Orin +[12/28/2023-03:03:12] [I] Compute Capability: 8.7 +[12/28/2023-03:03:12] [I] SMs: 8 +[12/28/2023-03:03:12] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-03:03:12] [I] Device Global Memory: 7471 MiB +[12/28/2023-03:03:12] [I] Shared Memory per SM: 164 KiB +[12/28/2023-03:03:12] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-03:03:12] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-03:03:12] [I] +[12/28/2023-03:03:12] [I] TensorRT version: 8.5.2 +[12/28/2023-03:03:12] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3021 (MiB) +[12/28/2023-03:03:15] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +285, now: CPU 574, GPU 3326 (MiB) +[12/28/2023-03:03:15] [I] Start parsing network model +[12/28/2023-03:03:16] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-03:03:16] [I] [TRT] Input filename: yolo_nas_pose_s_int8.onnx +[12/28/2023-03:03:16] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-03:03:16] [I] [TRT] Opset version: 17 +[12/28/2023-03:03:16] [I] [TRT] Producer name: pytorch +[12/28/2023-03:03:16] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-03:03:16] [I] [TRT] Domain: +[12/28/2023-03:03:16] [I] [TRT] Model version: 0 +[12/28/2023-03:03:16] [I] [TRT] Doc string: +[12/28/2023-03:03:16] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-03:03:19] [I] Finish parsing network model +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_int8.onnx --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_int8.onnx.engine diff --git a/yolo_nas_pose_s_int8.onnx.fp16.engine.err b/yolo_nas_pose_s_int8.onnx.fp16.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..efe5da28e0258b8a82f836971900fa5679048962 --- /dev/null +++ b/yolo_nas_pose_s_int8.onnx.fp16.engine.err @@ -0,0 +1,8 @@ +[12/28/2023-03:03:24] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-03:03:24] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-03:03:27] [E] Error[4]: [network.cpp::validate::2922] Error Code 4: Internal Error (Int8 precision has been set for a layer or layer output, but int8 is not configured in the builder) +[12/28/2023-03:03:27] [E] Error[2]: [builder.cpp::buildSerializedNetwork::751] Error Code 2: Internal Error (Assertion engine != nullptr failed. ) +[12/28/2023-03:03:27] [E] Engine could not be created from network +[12/28/2023-03:03:27] [E] Building engine failed +[12/28/2023-03:03:27] [E] Failed to create engine from model or file. +[12/28/2023-03:03:27] [E] Engine set up failed diff --git a/yolo_nas_pose_s_int8.onnx.fp16.engine.log b/yolo_nas_pose_s_int8.onnx.fp16.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..b4ec6479e3a9445b947485924aab0363dbd8e3dc --- /dev/null +++ b/yolo_nas_pose_s_int8.onnx.fp16.engine.log @@ -0,0 +1,91 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_int8.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_int8.onnx.fp16.engine +[12/28/2023-03:03:20] [I] === Model Options === +[12/28/2023-03:03:20] [I] Format: ONNX +[12/28/2023-03:03:20] [I] Model: yolo_nas_pose_s_int8.onnx +[12/28/2023-03:03:20] [I] Output: +[12/28/2023-03:03:20] [I] === Build Options === +[12/28/2023-03:03:20] [I] Max batch: explicit batch +[12/28/2023-03:03:20] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-03:03:20] [I] minTiming: 1 +[12/28/2023-03:03:20] [I] avgTiming: 8 +[12/28/2023-03:03:20] [I] Precision: FP32+FP16 +[12/28/2023-03:03:20] [I] LayerPrecisions: +[12/28/2023-03:03:20] [I] Calibration: +[12/28/2023-03:03:20] [I] Refit: Disabled +[12/28/2023-03:03:20] [I] Sparsity: Disabled +[12/28/2023-03:03:20] [I] Safe mode: Disabled +[12/28/2023-03:03:20] [I] DirectIO mode: Disabled +[12/28/2023-03:03:20] [I] Restricted mode: Disabled +[12/28/2023-03:03:20] [I] Build only: Disabled +[12/28/2023-03:03:20] [I] Save engine: yolo_nas_pose_s_int8.onnx.fp16.engine +[12/28/2023-03:03:20] [I] Load engine: +[12/28/2023-03:03:20] [I] Profiling verbosity: 0 +[12/28/2023-03:03:20] [I] Tactic sources: Using default tactic sources +[12/28/2023-03:03:20] [I] timingCacheMode: local +[12/28/2023-03:03:20] [I] timingCacheFile: +[12/28/2023-03:03:20] [I] Heuristic: Disabled +[12/28/2023-03:03:20] [I] Preview Features: Use default preview flags. +[12/28/2023-03:03:20] [I] Input(s)s format: fp32:CHW +[12/28/2023-03:03:20] [I] Output(s)s format: fp32:CHW +[12/28/2023-03:03:20] [I] Input build shapes: model +[12/28/2023-03:03:20] [I] Input calibration shapes: model +[12/28/2023-03:03:20] [I] === System Options === +[12/28/2023-03:03:20] [I] Device: 0 +[12/28/2023-03:03:20] [I] DLACore: +[12/28/2023-03:03:20] [I] Plugins: +[12/28/2023-03:03:20] [I] === Inference Options === +[12/28/2023-03:03:20] [I] Batch: Explicit +[12/28/2023-03:03:20] [I] Input inference shapes: model +[12/28/2023-03:03:20] [I] Iterations: 10 +[12/28/2023-03:03:20] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-03:03:20] [I] Sleep time: 0ms +[12/28/2023-03:03:20] [I] Idle time: 0ms +[12/28/2023-03:03:20] [I] Streams: 1 +[12/28/2023-03:03:20] [I] ExposeDMA: Disabled +[12/28/2023-03:03:20] [I] Data transfers: Enabled +[12/28/2023-03:03:20] [I] Spin-wait: Disabled +[12/28/2023-03:03:20] [I] Multithreading: Disabled +[12/28/2023-03:03:20] [I] CUDA Graph: Disabled +[12/28/2023-03:03:20] [I] Separate profiling: Disabled +[12/28/2023-03:03:20] [I] Time Deserialize: Disabled +[12/28/2023-03:03:20] [I] Time Refit: Disabled +[12/28/2023-03:03:20] [I] NVTX verbosity: 0 +[12/28/2023-03:03:20] [I] Persistent Cache Ratio: 0 +[12/28/2023-03:03:20] [I] Inputs: +[12/28/2023-03:03:20] [I] === Reporting Options === +[12/28/2023-03:03:20] [I] Verbose: Disabled +[12/28/2023-03:03:20] [I] Averages: 100 inferences +[12/28/2023-03:03:20] [I] Percentiles: 90,95,99 +[12/28/2023-03:03:20] [I] Dump refittable layers:Disabled +[12/28/2023-03:03:20] [I] Dump output: Disabled +[12/28/2023-03:03:20] [I] Profile: Disabled +[12/28/2023-03:03:20] [I] Export timing to JSON file: +[12/28/2023-03:03:20] [I] Export output to JSON file: +[12/28/2023-03:03:20] [I] Export profile to JSON file: +[12/28/2023-03:03:20] [I] +[12/28/2023-03:03:20] [I] === Device Information === +[12/28/2023-03:03:20] [I] Selected Device: Orin +[12/28/2023-03:03:20] [I] Compute Capability: 8.7 +[12/28/2023-03:03:20] [I] SMs: 8 +[12/28/2023-03:03:20] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-03:03:20] [I] Device Global Memory: 7471 MiB +[12/28/2023-03:03:20] [I] Shared Memory per SM: 164 KiB +[12/28/2023-03:03:20] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-03:03:20] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-03:03:20] [I] +[12/28/2023-03:03:20] [I] TensorRT version: 8.5.2 +[12/28/2023-03:03:20] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 3022 (MiB) +[12/28/2023-03:03:23] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +285, now: CPU 574, GPU 3327 (MiB) +[12/28/2023-03:03:23] [I] Start parsing network model +[12/28/2023-03:03:23] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-03:03:23] [I] [TRT] Input filename: yolo_nas_pose_s_int8.onnx +[12/28/2023-03:03:23] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-03:03:23] [I] [TRT] Opset version: 17 +[12/28/2023-03:03:23] [I] [TRT] Producer name: pytorch +[12/28/2023-03:03:23] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-03:03:23] [I] [TRT] Domain: +[12/28/2023-03:03:23] [I] [TRT] Model version: 0 +[12/28/2023-03:03:23] [I] [TRT] Doc string: +[12/28/2023-03:03:23] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-03:03:27] [I] Finish parsing network model +&&&& FAILED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_int8.onnx --fp16 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_int8.onnx.fp16.engine diff --git a/yolo_nas_pose_s_int8.onnx.int8.engine b/yolo_nas_pose_s_int8.onnx.int8.engine new file mode 100644 index 0000000000000000000000000000000000000000..549821982a96a767297f1ec53d638370de77a2db --- /dev/null +++ b/yolo_nas_pose_s_int8.onnx.int8.engine @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b03544a46558d98432ec484cef728f0be750bcbb79762856b78125143998b2d +size 17958627 diff --git a/yolo_nas_pose_s_int8.onnx.int8.engine.err b/yolo_nas_pose_s_int8.onnx.int8.engine.err new file mode 100644 index 0000000000000000000000000000000000000000..b37b01dab390ef8d0907486ee3ef6b474d5fe436 --- /dev/null +++ b/yolo_nas_pose_s_int8.onnx.int8.engine.err @@ -0,0 +1,7 @@ +[12/28/2023-03:36:18] [W] [TRT] onnx2trt_utils.cpp:375: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32. +[12/28/2023-03:36:18] [W] [TRT] onnx2trt_utils.cpp:403: One or more weights outside the range of INT32 was clamped +[12/28/2023-03:36:21] [W] [TRT] Calibrator won't be used in explicit precision mode. Use quantization aware training to generate network with Quantize/Dequantize nodes. +[12/28/2023-03:46:46] [W] * Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized. +[12/28/2023-03:46:46] [W] If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the throughput. +[12/28/2023-03:46:46] [W] * GPU compute time is unstable, with coefficient of variance = 3.57305%. +[12/28/2023-03:46:46] [W] If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the stability. diff --git a/yolo_nas_pose_s_int8.onnx.int8.engine.log b/yolo_nas_pose_s_int8.onnx.int8.engine.log new file mode 100644 index 0000000000000000000000000000000000000000..8750356854c2e87b1b9e1db554a8605602fccb15 --- /dev/null +++ b/yolo_nas_pose_s_int8.onnx.int8.engine.log @@ -0,0 +1,322 @@ +&&&& RUNNING TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_int8.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_int8.onnx.int8.engine +[12/28/2023-03:36:14] [I] === Model Options === +[12/28/2023-03:36:14] [I] Format: ONNX +[12/28/2023-03:36:14] [I] Model: yolo_nas_pose_s_int8.onnx +[12/28/2023-03:36:14] [I] Output: +[12/28/2023-03:36:14] [I] === Build Options === +[12/28/2023-03:36:14] [I] Max batch: explicit batch +[12/28/2023-03:36:14] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default +[12/28/2023-03:36:14] [I] minTiming: 1 +[12/28/2023-03:36:14] [I] avgTiming: 8 +[12/28/2023-03:36:14] [I] Precision: FP32+INT8 +[12/28/2023-03:36:14] [I] LayerPrecisions: +[12/28/2023-03:36:14] [I] Calibration: Dynamic +[12/28/2023-03:36:14] [I] Refit: Disabled +[12/28/2023-03:36:14] [I] Sparsity: Disabled +[12/28/2023-03:36:14] [I] Safe mode: Disabled +[12/28/2023-03:36:14] [I] DirectIO mode: Disabled +[12/28/2023-03:36:14] [I] Restricted mode: Disabled +[12/28/2023-03:36:14] [I] Build only: Disabled +[12/28/2023-03:36:14] [I] Save engine: yolo_nas_pose_s_int8.onnx.int8.engine +[12/28/2023-03:36:14] [I] Load engine: +[12/28/2023-03:36:14] [I] Profiling verbosity: 0 +[12/28/2023-03:36:14] [I] Tactic sources: Using default tactic sources +[12/28/2023-03:36:14] [I] timingCacheMode: local +[12/28/2023-03:36:14] [I] timingCacheFile: +[12/28/2023-03:36:14] [I] Heuristic: Disabled +[12/28/2023-03:36:14] [I] Preview Features: Use default preview flags. +[12/28/2023-03:36:14] [I] Input(s)s format: fp32:CHW +[12/28/2023-03:36:14] [I] Output(s)s format: fp32:CHW +[12/28/2023-03:36:14] [I] Input build shapes: model +[12/28/2023-03:36:14] [I] Input calibration shapes: model +[12/28/2023-03:36:14] [I] === System Options === +[12/28/2023-03:36:14] [I] Device: 0 +[12/28/2023-03:36:14] [I] DLACore: +[12/28/2023-03:36:14] [I] Plugins: +[12/28/2023-03:36:14] [I] === Inference Options === +[12/28/2023-03:36:14] [I] Batch: Explicit +[12/28/2023-03:36:14] [I] Input inference shapes: model +[12/28/2023-03:36:14] [I] Iterations: 10 +[12/28/2023-03:36:14] [I] Duration: 15s (+ 200ms warm up) +[12/28/2023-03:36:14] [I] Sleep time: 0ms +[12/28/2023-03:36:14] [I] Idle time: 0ms +[12/28/2023-03:36:14] [I] Streams: 1 +[12/28/2023-03:36:14] [I] ExposeDMA: Disabled +[12/28/2023-03:36:14] [I] Data transfers: Enabled +[12/28/2023-03:36:14] [I] Spin-wait: Disabled +[12/28/2023-03:36:14] [I] Multithreading: Disabled +[12/28/2023-03:36:14] [I] CUDA Graph: Disabled +[12/28/2023-03:36:14] [I] Separate profiling: Disabled +[12/28/2023-03:36:14] [I] Time Deserialize: Disabled +[12/28/2023-03:36:14] [I] Time Refit: Disabled +[12/28/2023-03:36:14] [I] NVTX verbosity: 0 +[12/28/2023-03:36:14] [I] Persistent Cache Ratio: 0 +[12/28/2023-03:36:14] [I] Inputs: +[12/28/2023-03:36:14] [I] === Reporting Options === +[12/28/2023-03:36:14] [I] Verbose: Disabled +[12/28/2023-03:36:14] [I] Averages: 100 inferences +[12/28/2023-03:36:14] [I] Percentiles: 90,95,99 +[12/28/2023-03:36:14] [I] Dump refittable layers:Disabled +[12/28/2023-03:36:14] [I] Dump output: Disabled +[12/28/2023-03:36:14] [I] Profile: Disabled +[12/28/2023-03:36:14] [I] Export timing to JSON file: +[12/28/2023-03:36:14] [I] Export output to JSON file: +[12/28/2023-03:36:14] [I] Export profile to JSON file: +[12/28/2023-03:36:14] [I] +[12/28/2023-03:36:14] [I] === Device Information === +[12/28/2023-03:36:14] [I] Selected Device: Orin +[12/28/2023-03:36:14] [I] Compute Capability: 8.7 +[12/28/2023-03:36:14] [I] SMs: 8 +[12/28/2023-03:36:14] [I] Compute Clock Rate: 0.624 GHz +[12/28/2023-03:36:14] [I] Device Global Memory: 7471 MiB +[12/28/2023-03:36:14] [I] Shared Memory per SM: 164 KiB +[12/28/2023-03:36:14] [I] Memory Bus Width: 128 bits (ECC disabled) +[12/28/2023-03:36:14] [I] Memory Clock Rate: 0.624 GHz +[12/28/2023-03:36:14] [I] +[12/28/2023-03:36:14] [I] TensorRT version: 8.5.2 +[12/28/2023-03:36:15] [I] [TRT] [MemUsageChange] Init CUDA: CPU +220, GPU +0, now: CPU 249, GPU 2931 (MiB) +[12/28/2023-03:36:17] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +302, GPU +285, now: CPU 574, GPU 3236 (MiB) +[12/28/2023-03:36:18] [I] Start parsing network model +[12/28/2023-03:36:18] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-03:36:18] [I] [TRT] Input filename: yolo_nas_pose_s_int8.onnx +[12/28/2023-03:36:18] [I] [TRT] ONNX IR version: 0.0.8 +[12/28/2023-03:36:18] [I] [TRT] Opset version: 17 +[12/28/2023-03:36:18] [I] [TRT] Producer name: pytorch +[12/28/2023-03:36:18] [I] [TRT] Producer version: 2.1.2 +[12/28/2023-03:36:18] [I] [TRT] Domain: +[12/28/2023-03:36:18] [I] [TRT] Model version: 0 +[12/28/2023-03:36:18] [I] [TRT] Doc string: +[12/28/2023-03:36:18] [I] [TRT] ---------------------------------------------------------------- +[12/28/2023-03:36:21] [I] Finish parsing network model +[12/28/2023-03:36:21] [I] FP32 and INT8 precisions have been specified - more performance might be enabled by additionally specifying --fp16 or --best +[12/28/2023-03:36:24] [I] [TRT] ---------- Layers Running on DLA ---------- +[12/28/2023-03:36:24] [I] [TRT] ---------- Layers Running on GPU ---------- +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation1] +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/pre_process/pre_process.0/Cast.../pre_process/pre_process.2/Mul]} +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1228) [Constant] +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1229) [Constant] +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONSTANT: (Unnamed Layer* 1230) [Constant] +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stem/conv/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stem.conv.rbr_reparam.weight + /model/backbone/stem/conv/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stem/conv/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.downsample.rbr_reparam.weight + /model/backbone/stage1/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/downsample/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv2.conv.weight + /model/backbone/stage1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv1.conv.weight + /model/backbone/stage1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 494) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 510) [Shuffle] + /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage1.blocks.conv3.conv.weight + /model/backbone/stage1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage1/blocks/conv3/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_skip2.conv.weight + /model/neck/neck2/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.downsample.rbr_reparam.weight + /model/backbone/stage2/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/downsample/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.downsample.conv.weight + /model/neck/neck2/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/downsample/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv2.conv.weight + /model/backbone/stage2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv1.conv.weight + /model/backbone/stage2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 557) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 573) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage2.blocks.bottlenecks.2.alpha + (Unnamed Layer* 589) [Shuffle] + /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage2/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage2.blocks.conv3.conv.weight + /model/backbone/stage2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage2/blocks/conv3/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip2.conv.weight + /model/neck/neck1/reduce_skip2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip2/conv/Conv || model.neck.neck2.reduce_skip1.conv.weight + /model/neck/neck2/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_skip1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.downsample.rbr_reparam.weight + /model/backbone/stage3/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/downsample/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.downsample.conv.weight + /model/neck/neck1/downsample/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/downsample/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv2.conv.weight + /model/backbone/stage3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv1.conv.weight + /model/backbone/stage3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 639) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 655) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.2.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.2.alpha + (Unnamed Layer* 671) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.2/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.3.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.3.alpha + (Unnamed Layer* 687) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.3/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.4.cv1.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.bottlenecks.4.cv2.rbr_reparam.weight + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage3.blocks.bottlenecks.4.alpha + (Unnamed Layer* 703) [Shuffle] + /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Mul, /model/backbone/stage3/blocks/bottlenecks/bottlenecks.4/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage3.blocks.conv3.conv.weight + /model/backbone/stage3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage3/blocks/conv3/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_skip1.conv.weight + /model/neck/neck1/reduce_skip1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_skip1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.downsample.rbr_reparam.weight + /model/backbone/stage4/downsample/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/downsample/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv2.conv.weight + /model/backbone/stage4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv1.conv.weight + /model/backbone/stage4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 744) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.backbone.stage4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 760) [Shuffle] + /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Mul, /model/backbone/stage4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.stage4.blocks.conv3.conv.weight + /model/backbone/stage4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/backbone/stage4/blocks/conv3/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv1.conv.weight + /model/backbone/context_module/cv1/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.2/MaxPool +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.1/MaxPool +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POOLING: /model/backbone/context_module/m.0/MaxPool +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/backbone/context_module/m.2/MaxPool_output_0 copy +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.backbone.context_module.cv2.conv.weight + /model/backbone/context_module/cv2/conv/_weight_quantizer/QuantizeLinear + /model/backbone/context_module/cv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.conv.conv.weight + /model/neck/neck1/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/conv/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck1.upsample.weight + /model/neck/neck1/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck1/upsample/ConvTranspose +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.reduce_after_concat.conv.weight + /model/neck/neck1/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/reduce_after_concat/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv2.conv.weight + /model/neck/neck1/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv1.conv.weight + /model/neck/neck1/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.0.alpha + (Unnamed Layer* 825) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck1.blocks.bottlenecks.1.alpha + (Unnamed Layer* 841) [Shuffle] + /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck1/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck1.blocks.conv3.conv.weight + /model/neck/neck1/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck1/blocks/conv3/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.conv.conv.weight + /model/neck/neck2/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/conv/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/upsample/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] DECONVOLUTION: model.neck.neck2.upsample.weight + /model/neck/neck2/upsample/_weight_quantizer/QuantizeLinear + /model/neck/neck2/upsample/ConvTranspose +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/Concat_/model/neck/neck2/reduce_skip1/act/Relu_output_0_clone_1 copy +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.reduce_after_concat.conv.weight + /model/neck/neck2/reduce_after_concat/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/reduce_after_concat/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv2.conv.weight + /model/neck/neck2/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv1.conv.weight + /model/neck/neck2/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.0.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.0.alpha + (Unnamed Layer* 890) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv1.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv1/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.bottlenecks.1.cv2.rbr_reparam.weight + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/cv2/rbr_reparam/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck2.blocks.bottlenecks.1.alpha + (Unnamed Layer* 906) [Shuffle] + /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck2/blocks/Concat_/model/neck/neck2/blocks/bottlenecks/bottlenecks.1/Add_output_0_clone_0 copy +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck2.blocks.conv3.conv.weight + /model/neck/neck2/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck2/blocks/conv3/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.bbox_stem.seq.conv.weight + /model/heads/head1/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/bbox_stem/seq/conv/Conv || model.heads.head1.pose_stem.seq.conv.weight + /model/heads/head1/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_stem/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.conv.conv.weight + /model/neck/neck3/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/conv/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_convs.0.seq.conv.weight + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head1.cls_convs.0.seq.conv.weight + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.0.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/conv1/conv/_input_quantizer/QuantizeLinear_clone_1 +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.cls_pred.weight + /model/heads/head1/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/cls_pred/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.reg_pred.weight + /model/heads/head1/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/reg_pred/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_convs.1.seq.conv.weight + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv2.conv.weight + /model/neck/neck3/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv1.conv.weight + /model/neck/neck3/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape + /model/heads/Transpose +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head1.pose_pred.weight + /model/heads/head1/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head1/pose_pred/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.0.alpha + (Unnamed Layer* 972) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck3.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1013) [Shuffle] + /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck3/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck3.blocks.conv3.conv.weight + /model/neck/neck3/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck3/blocks/conv3/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_stem.seq.conv.weight + /model/heads/head2/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_stem/seq/conv/Conv || model.heads.head2.bbox_stem.seq.conv.weight + /model/heads/head2/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/bbox_stem/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.conv.conv.weight + /model/neck/neck4/conv/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/conv/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_convs.0.seq.conv.weight + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head2.cls_convs.0.seq.conv.weight + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.0.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv2.conv.weight + /model/neck/neck4/blocks/conv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv1.conv.weight + /model/neck/neck4/blocks/conv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.cls_pred.weight + /model/heads/head2/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/cls_pred/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.reg_pred.weight + /model/heads/head2/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/reg_pred/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_convs.1.seq.conv.weight + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_4 + /model/heads/Transpose_3 +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head2.pose_pred.weight + /model/heads/head2/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head2/pose_pred/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_1 +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.0.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/cv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_1 +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.0.alpha + (Unnamed Layer* 1078) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.0/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] COPY: /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_input_quantizer/QuantizeLinear +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv1.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv1/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.bottlenecks.1.cv2.conv.weight + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/cv2/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] POINTWISE: PWN(model.neck.neck4.blocks.bottlenecks.1.alpha + (Unnamed Layer* 1119) [Shuffle] + /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Mul, /model/neck/neck4/blocks/bottlenecks/bottlenecks.1/Add) +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.neck.neck4.blocks.conv3.conv.weight + /model/neck/neck4/blocks/conv3/conv/_weight_quantizer/QuantizeLinear + /model/neck/neck4/blocks/conv3/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.bbox_stem.seq.conv.weight + /model/heads/head3/bbox_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/bbox_stem/seq/conv/Conv || model.heads.head3.pose_stem.seq.conv.weight + /model/heads/head3/pose_stem/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_stem/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_convs.0.seq.conv.weight + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_convs/reg_convs.0/seq/conv/Conv || model.heads.head3.cls_convs.0.seq.conv.weight + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_convs/cls_convs.0/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.0.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.0/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.cls_pred.weight + /model/heads/head3/cls_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/cls_pred/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.reg_pred.weight + /model/heads/head3/reg_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/reg_pred/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.1.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.1/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] SHUFFLE: /model/heads/Reshape_8 + /model/heads/Transpose_6 +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_convs.2.seq.conv.weight + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_convs/pose_convs.2/seq/conv/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] SOFTMAX: /model/heads/Softmax_2 +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: model.heads.head3.pose_pred.weight + /model/heads/head3/pose_pred/_weight_quantizer/QuantizeLinear + /model/heads/head3/pose_pred/Conv +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] CONVOLUTION: /model/heads/Conv_2 +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice_1.../post_process/Reshape_2]} +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] NMS: batched_nms_243 +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] DEVICE_TO_SHAPE_HOST: (Unnamed Layer* 1232) [NMS]_1_output[DevicetoShapeHostCopy] +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation2] +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] MYELIN: {ForeignNode[/model/heads/head1/Slice...graph2_/Concat_5]} +[12/28/2023-03:36:24] [I] [TRT] [GpuLayer] TRAIN_STATION: [trainStation3] +[12/28/2023-03:36:26] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +534, GPU +773, now: CPU 1233, GPU 4134 (MiB) +[12/28/2023-03:36:26] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +82, GPU +118, now: CPU 1315, GPU 4252 (MiB) +[12/28/2023-03:36:26] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored. +[12/28/2023-03:46:28] [I] [TRT] Total Activation Memory: 7940674048 +[12/28/2023-03:46:28] [I] [TRT] Detected 1 inputs and 1 output network tensors. +[12/28/2023-03:46:29] [I] [TRT] Total Host Persistent Memory: 307232 +[12/28/2023-03:46:29] [I] [TRT] Total Device Persistent Memory: 38912 +[12/28/2023-03:46:29] [I] [TRT] Total Scratch Memory: 134217728 +[12/28/2023-03:46:29] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 74 MiB, GPU 132 MiB +[12/28/2023-03:46:29] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 173 steps to complete. +[12/28/2023-03:46:29] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 42.4134ms to assign 13 blocks to 173 nodes requiring 144692224 bytes. +[12/28/2023-03:46:29] [I] [TRT] Total Activation Memory: 144692224 +[12/28/2023-03:46:30] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +6, now: CPU 1665, GPU 5446 (MiB) +[12/28/2023-03:46:30] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +15, GPU +16, now: CPU 15, GPU 16 (MiB) +[12/28/2023-03:46:31] [I] Engine built in 616.071 sec. +[12/28/2023-03:46:31] [I] [TRT] Loaded engine size: 17 MiB +[12/28/2023-03:46:31] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +12, now: CPU 1250, GPU 5268 (MiB) +[12/28/2023-03:46:31] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +15, now: CPU 0, GPU 15 (MiB) +[12/28/2023-03:46:31] [I] Engine deserialized in 0.135656 sec. +[12/28/2023-03:46:31] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +0, GPU +7, now: CPU 1251, GPU 5268 (MiB) +[12/28/2023-03:46:31] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +138, now: CPU 0, GPU 153 (MiB) +[12/28/2023-03:46:31] [I] Setting persistentCacheLimit to 0 bytes. +[12/28/2023-03:46:31] [I] Using random values for input onnx::Cast_0 +[12/28/2023-03:46:31] [I] Created input binding for onnx::Cast_0 with dimensions 1x3x640x640 +[12/28/2023-03:46:31] [I] Using random values for output graph2_flat_predictions +[12/28/2023-03:46:31] [I] Created output binding for graph2_flat_predictions with dimensions -1x57 +[12/28/2023-03:46:31] [I] Starting inference +[12/28/2023-03:46:46] [I] Warmup completed 8 queries over 200 ms +[12/28/2023-03:46:46] [I] Timing trace has 959 queries over 15.0421 s +[12/28/2023-03:46:46] [I] +[12/28/2023-03:46:46] [I] === Trace details === +[12/28/2023-03:46:46] [I] Trace averages of 100 runs: +[12/28/2023-03:46:46] [I] Average on 100 runs - GPU latency: 15.6862 ms - Host latency: 15.799 ms (enqueue 15.7577 ms) +[12/28/2023-03:46:46] [I] Average on 100 runs - GPU latency: 15.8638 ms - Host latency: 15.9795 ms (enqueue 15.9265 ms) +[12/28/2023-03:46:46] [I] Average on 100 runs - GPU latency: 16.1783 ms - Host latency: 16.2971 ms (enqueue 16.24 ms) +[12/28/2023-03:46:46] [I] Average on 100 runs - GPU latency: 15.4097 ms - Host latency: 15.5201 ms (enqueue 15.487 ms) +[12/28/2023-03:46:46] [I] Average on 100 runs - GPU latency: 15.3654 ms - Host latency: 15.4758 ms (enqueue 15.4444 ms) +[12/28/2023-03:46:46] [I] Average on 100 runs - GPU latency: 15.4303 ms - Host latency: 15.5405 ms (enqueue 15.5048 ms) +[12/28/2023-03:46:46] [I] Average on 100 runs - GPU latency: 15.388 ms - Host latency: 15.5003 ms (enqueue 15.463 ms) +[12/28/2023-03:46:46] [I] Average on 100 runs - GPU latency: 15.3653 ms - Host latency: 15.4758 ms (enqueue 15.4443 ms) +[12/28/2023-03:46:46] [I] Average on 100 runs - GPU latency: 15.3726 ms - Host latency: 15.4828 ms (enqueue 15.4505 ms) +[12/28/2023-03:46:46] [I] +[12/28/2023-03:46:46] [I] === Performance summary === +[12/28/2023-03:46:46] [I] Throughput: 63.7546 qps +[12/28/2023-03:46:46] [I] Latency: min = 14.4111 ms, max = 20.8093 ms, mean = 15.6545 ms, median = 15.4062 ms, percentile(90%) = 16.3127 ms, percentile(95%) = 16.4324 ms, percentile(99%) = 17.4062 ms +[12/28/2023-03:46:46] [I] Enqueue Time: min = 14.3828 ms, max = 20.7617 ms, mean = 15.6158 ms, median = 15.3755 ms, percentile(90%) = 16.2577 ms, percentile(95%) = 16.3701 ms, percentile(99%) = 17.3357 ms +[12/28/2023-03:46:46] [I] H2D Latency: min = 0.0805664 ms, max = 0.117188 ms, mean = 0.0972066 ms, median = 0.0976562 ms, percentile(90%) = 0.0996094 ms, percentile(95%) = 0.0998535 ms, percentile(99%) = 0.111328 ms +[12/28/2023-03:46:46] [I] GPU Compute Time: min = 14.3018 ms, max = 20.6875 ms, mean = 15.5423 ms, median = 15.2939 ms, percentile(90%) = 16.1956 ms, percentile(95%) = 16.3105 ms, percentile(99%) = 17.293 ms +[12/28/2023-03:46:46] [I] D2H Latency: min = 0.00292969 ms, max = 0.0727539 ms, mean = 0.014973 ms, median = 0.0120239 ms, percentile(90%) = 0.0234375 ms, percentile(95%) = 0.0253906 ms, percentile(99%) = 0.0373535 ms +[12/28/2023-03:46:46] [I] Total Host Walltime: 15.0421 s +[12/28/2023-03:46:46] [I] Total GPU Compute Time: 14.9051 s +[12/28/2023-03:46:46] [I] Explanations of the performance metrics are printed in the verbose logs. +[12/28/2023-03:46:46] [I] +&&&& PASSED TensorRT.trtexec [TensorRT v8502] # /usr/src/tensorrt/bin/trtexec --onnx=yolo_nas_pose_s_int8.onnx --int8 --avgRuns=100 --duration=15 --saveEngine=yolo_nas_pose_s_int8.onnx.int8.engine diff --git a/yolo_nas_pose_s_int8.onnx.usage.txt b/yolo_nas_pose_s_int8.onnx.usage.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f2065bbce872894f5c77cb7aa72b0b6d8426166 --- /dev/null +++ b/yolo_nas_pose_s_int8.onnx.usage.txt @@ -0,0 +1,58 @@ + +Model exported successfully to yolo_nas_pose_s_int8.onnx +Model expects input image of shape [1, 3, 640, 640] +Input image dtype is torch.uint8 + +Exported model already contains preprocessing (normalization) step, so you don't need to do it manually. +Preprocessing steps to be applied to input image are: +Sequential( + (0): CastTensorTo(dtype=torch.float32) + (1): ChannelSelect(channels_indexes=tensor([2, 1, 0])) + (2): ApplyMeanStd(mean=[0.], scale=[255.]) +) + + +Exported model contains postprocessing (NMS) step with the following parameters: + num_pre_nms_predictions=1000 + max_predictions_per_image=10 + nms_threshold=0.5 + confidence_threshold=0.15 + output_predictions_format=flat + + +Exported model is in ONNX format and can be used with ONNXRuntime +To run inference with ONNXRuntime, please use the following code snippet: + + import onnxruntime + import numpy as np + session = onnxruntime.InferenceSession("yolo_nas_pose_s_int8.onnx", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + example_input_image = np.zeros((1, 3, 640, 640)).astype(np.uint8) + predictions = session.run(outputs, {inputs[0]: example_input_image}) + +Exported model can also be used with TensorRT +To run inference with TensorRT, please see TensorRT deployment documentation +You can benchmark the model using the following code snippet: + + trtexec --onnx=yolo_nas_pose_s_int8.onnx --int8 --avgRuns=100 --duration=15 + + +Exported model has predictions in flat format: + +# flat_predictions is a 2D array of [N,K] shape +# Each row represents (image_index, x_min, y_min, x_max, y_max, confidence, joints...) +# Please note all values are floats, so you have to convert them to integers if needed + +[flat_predictions] = predictions +pred_bboxes = flat_predictions[:, 1:5] +pred_scores = flat_predictions[:, 5] +pred_joints = flat_predictions[:, 6:].reshape((len(pred_bboxes), -1, 3)) +for i in range(len(pred_bboxes)): + confidence = pred_scores[i] + x_min, y_min, x_max, y_max = pred_bboxes[i] + print(f"Detected pose with confidence={{confidence}}, x_min={{x_min}}, y_min={{y_min}}, x_max={{x_max}}, y_max={{y_max}}") + for joint_index, (x, y, confidence) in enumerate(pred_joints[i]):") + print(f"Joint {{joint_index}} has coordinates x={{x}}, y={{y}}, confidence={{confidence}}") + diff --git a/yolo_nas_pose_to_onnx.py b/yolo_nas_pose_to_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..74a8ba28542fd636d5987bf7d4f1689d4e369909 --- /dev/null +++ b/yolo_nas_pose_to_onnx.py @@ -0,0 +1,151 @@ +#! /usr/bin/python3 + +from termcolor import cprint, colored +from super_gradients.common.object_names import Models +from super_gradients.training import models +from super_gradients.conversion import ExportTargetBackend, ExportQuantizationMode, DetectionOutputFormatMode +import time +import cv2 +import numpy as np +from super_gradients.training.utils.media.image import load_image +import onnxruntime +import os +from super_gradients.training.utils.visualization.pose_estimation import PoseVisualization +import matplotlib.pyplot as plt + +os.environ['CRASH_HANDLER']='0' + +# Conversion Setting + +CONVERSION = True +input_image_shape = [640, 640] +quantization_modes = [None, ExportQuantizationMode.INT8, ExportQuantizationMode.FP16] +output_predictions_format=DetectionOutputFormatMode.FLAT_FORMAT + +# NMS-related Setting +confidence_threshold=.15 +nms_threshold=.5 +num_pre_nms_predictions=1000 +max_predictions_per_image=10 + +# ONNXruntime Benchmark Setting +BENCHMARK=True +n_run = 1000 +n_warm_up = 200 +image_name = "https://deci-pretrained-models.s3.amazonaws.com/sample_images/beatles-abbeyroad.jpg" + +# Check +SHAPE_CHECK=True +VISUAL_CHECK=True + +def iterate_over_flat_predictions(predictions, batch_size): + [flat_predictions] = predictions + + for image_index in range(batch_size): + mask = flat_predictions[:, 0] == image_index + pred_bboxes = flat_predictions[mask, 1:5] + pred_scores = flat_predictions[mask, 5] + pred_joints = flat_predictions[mask, 6:].reshape((len(pred_bboxes), -1, 3)) + yield image_index, pred_bboxes, pred_scores, pred_joints + +def show_predictions_from_flat_format(image, predictions): + image_index, pred_boxes, pred_scores, pred_joints = next(iter(iterate_over_flat_predictions(predictions, 1))) + + image = PoseVisualization.draw_poses( + image=image, poses=pred_joints, scores=pred_scores, boxes=pred_boxes, + edge_links=None, edge_colors=None, keypoint_colors=None, is_crowd=None + ) + + plt.figure(figsize=(8, 8)) + plt.imshow(image) + plt.tight_layout() + plt.show() + +image = load_image(image_name) +image = cv2.resize(image, (input_image_shape[1], input_image_shape[0])) +image_bchw = np.transpose(np.expand_dims(image, 0), (0, 3, 1, 2)) + +for model_name in [Models.YOLO_NAS_POSE_L, Models.YOLO_NAS_POSE_M, Models.YOLO_NAS_POSE_N, Models.YOLO_NAS_POSE_S ]: + for q in quantization_modes: + + # Specify Quantization Mode in Exported ONNX Model Name + if q == None: + q_label = 'fp32' + elif q == ExportQuantizationMode.INT8: + q_label = 'int8' + elif q == ExportQuantizationMode.FP16: + q_label = 'fp16' + else: + raise + + export_name = f"{model_name}_{q_label}.onnx" + + # Perform Model Conversion from PyTorch to ONNX using Super-Gradiant Official Method + print(f"1. Convert {colored(model_name,'blue')} from PyTorch to ONNX format using {colored(q_label,'red')} precision, saved as {colored(export_name,'green')}") + + if CONVERSION: + + model = models.get(model_name, pretrained_weights="coco_pose") + + export_result = model.export( + output=export_name, + confidence_threshold=confidence_threshold, + nms_threshold=nms_threshold, + engine=ExportTargetBackend.ONNXRUNTIME, + quantization_mode=q, + #selective_quantizer: Optional["SelectiveQuantizer"] = None, # noqa + #calibration_loader: Optional[DataLoader] = None, + #calibration_method: str = "percentile", + #calibration_batches: int = 16, + #calibration_percentile: float = 99.99, + preprocessing=True, + postprocessing=True, + #postprocessing_kwargs: Optional[dict] = None, + batch_size=1, + input_image_shape=input_image_shape, + #input_image_channels: Optional[int] = None, + #input_image_dtype: Optional[torch.dtype] = None, + max_predictions_per_image=max_predictions_per_image, + #onnx_export_kwargs: Optional[dict] = None, + onnx_simplify=True, + #device: Optional[Union[torch.device, str]] = None, + output_predictions_format=output_predictions_format, + num_pre_nms_predictions=num_pre_nms_predictions, + ) + + # Export Also Model Usage in Text + usage_name = export_name + '.usage.txt' + with open(usage_name, 'w') as f: + f.write(str(export_result)) + print(f"1.1 Related usage to {colored(export_name, 'green')} has been stored to {colored(usage_name,'yellow')}") + + if BENCHMARK: + # Perform Inference on ONNXruntime + session = onnxruntime.InferenceSession(export_name, providers=['CUDAExecutionProvider',"CPUExecutionProvider"]) + inputs = [o.name for o in session.get_inputs()] + outputs = [o.name for o in session.get_outputs()] + + # Detection Result Shape + for i in range(n_warm_up): result = session.run(outputs, {inputs[0]: image_bchw}) + + t=time.time() + for i in range(n_run): result = session.run(outputs, {inputs[0]: image_bchw}) + latency=(time.time()-t)/n_run + fps = round(1/latency,2) + + print(f'2. Averaged FPS: {colored(fps, "red")}') + + if SHAPE_CHECK: + for image_index, pred_bboxes, pred_scores, pred_joints in iterate_over_flat_predictions(result, batch_size=1): + + N = pred_scores.shape[0] + + for i in range(N): + print(f'Detected Object {colored(i,"green")}') + print(f'Predicted Bounding Box (Dimension: 1 x 4)', pred_bboxes[i,:]) + print(f'Pose Confidence (scalar)', pred_scores[i]) + print(f'Predicted Joints (Dimension: 3 x 17)', pred_joints[i,:,:]) + + if VISUAL_CHECK: + # Detection Result Visual Check + show_predictions_from_flat_format(image, result)