From 6999467d92c35b7fc364042b274dd28ac14dd473 Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Wed, 24 Dec 2025 14:02:46 +0800 Subject: [PATCH 01/19] fix 4.4.0 rc ci error --- .../object_detection/yolov3/igie/ci/prepare.sh | 6 ++++++ .../object_detection/yolov3/ixrt/ci/prepare.sh | 6 ++++++ .../object_detection/yolov5/igie/ci/prepare.sh | 6 ++++++ .../object_detection/yolov5/ixrt/ci/prepare.sh | 6 ++++++ .../object_detection/yolov5s/igie/ci/prepare.sh | 6 ++++++ .../object_detection/yolov5s/ixrt/ci/prepare.sh | 6 ++++++ models/cv/object_detection/yolov7/igie/README.md | 4 +++- .../object_detection/yolov7/igie/ci/prepare.sh | 16 ++++++++-------- .../object_detection/yolov7/ixrt/ci/prepare.sh | 6 ++++++ models/cv/object_detection/yolox/igie/README.md | 4 ++-- .../cv/object_detection/yolox/igie/ci/prepare.sh | 4 ++-- models/cv/object_detection/yolox/ixrt/README.md | 3 ++- .../cv/object_detection/yolox/ixrt/ci/prepare.sh | 3 ++- models/nlp/plm/transformer/igie/ci/prepare.sh | 1 + models/nlp/plm/transformer/ixrt/ci/prepare.sh | 1 + 15 files changed, 63 insertions(+), 15 deletions(-) diff --git a/models/cv/object_detection/yolov3/igie/ci/prepare.sh b/models/cv/object_detection/yolov3/igie/ci/prepare.sh index 73e4fd9e..dea400d2 100644 --- a/models/cv/object_detection/yolov3/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov3/igie/ci/prepare.sh @@ -25,6 +25,12 @@ else echo "Not Support Os" fi +if [ -f /etc/system-release ]; then + if grep -qi "Kylin" /etc/system-release; then + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + fi +fi + pip3 install -r requirements.txt python3 export.py --weight yolov3.pt --output yolov3.onnx diff --git a/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh index c67a36fb..342732ce 100644 --- a/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh @@ -25,6 +25,12 @@ else echo "Not Support Os" fi +if [ -f /etc/system-release ]; then + if grep -qi "Kylin" /etc/system-release; then + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + fi +fi + pip3 install -r ../../ixrt_common/requirements.txt mkdir checkpoints unzip -q /root/data/3rd_party/onnx_tflite_yolov3.zip -d ./ diff --git a/models/cv/object_detection/yolov5/igie/ci/prepare.sh b/models/cv/object_detection/yolov5/igie/ci/prepare.sh index fcf24d4c..2232a189 100644 --- a/models/cv/object_detection/yolov5/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov5/igie/ci/prepare.sh @@ -16,6 +16,12 @@ set -x +if [ -f /etc/system-release ]; then + if grep -qi "Kylin" /etc/system-release; then + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + fi +fi + pip3 install -r requirements.txt python3 export.py --weight yolov5m.pt --output yolov5m.onnx diff --git a/models/cv/object_detection/yolov5/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov5/ixrt/ci/prepare.sh index 23b04131..a41f8c6b 100644 --- a/models/cv/object_detection/yolov5/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov5/ixrt/ci/prepare.sh @@ -16,6 +16,12 @@ set -x +if [ -f /etc/system-release ]; then + if grep -qi "Kylin" /etc/system-release; then + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + fi +fi + pip3 install -r ../../ixrt_common/requirements.txt mkdir checkpoints diff --git a/models/cv/object_detection/yolov5s/igie/ci/prepare.sh b/models/cv/object_detection/yolov5s/igie/ci/prepare.sh index b53ca6d1..1ae5ca61 100644 --- a/models/cv/object_detection/yolov5s/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov5s/igie/ci/prepare.sh @@ -16,6 +16,12 @@ set -x +if [ -f /etc/system-release ]; then + if grep -qi "Kylin" /etc/system-release; then + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + fi +fi + pip3 install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints diff --git a/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh index a08c47d7..61148ce9 100644 --- a/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh @@ -25,6 +25,12 @@ else echo "Not Support Os" fi +if [ -f /etc/system-release ]; then + if grep -qi "Kylin" /etc/system-release; then + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + fi +fi + pip3 install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints diff --git a/models/cv/object_detection/yolov7/igie/README.md b/models/cv/object_detection/yolov7/igie/README.md index 5b0ce587..992cf7ce 100644 --- a/models/cv/object_detection/yolov7/igie/README.md +++ b/models/cv/object_detection/yolov7/igie/README.md @@ -68,7 +68,9 @@ pip3 install -r requirements.txt # clone yolov7 git clone https://github.com/WongKinYiu/yolov7.git cd yolov7 - +git checkout a207844b1ce82d204ab36d87d496728d3d2348e7 +# set weights_only=False to be comaptible with pytorch 2.7 +sed -i '252 s/map_location)/map_location, weights_only=False)/' ./models/experimental.py # export onnx model python3 export.py --weights ../yolov7.pt --simplify --img-size 640 640 --dynamic-batch --grid diff --git a/models/cv/object_detection/yolov7/igie/ci/prepare.sh b/models/cv/object_detection/yolov7/igie/ci/prepare.sh index e73fc3cc..7524c273 100644 --- a/models/cv/object_detection/yolov7/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov7/igie/ci/prepare.sh @@ -25,15 +25,15 @@ else echo "Not Support Os" fi -pip3 install -r requirements.txt - -# clone yolov7 -REPO_URL="https://gitee.com/monkeycc/yolov7.git" -TARGET_DIR="yolov7" -if [ ! -d "$TARGET_DIR" ]; then - git clone --depth 1 "$REPO_URL" "$TARGET_DIR" +if [ -f /etc/system-release ]; then + if grep -qi "Kylin" /etc/system-release; then + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + fi fi -cd $TARGET_DIR +pip3 install -r requirements.txt + +cp -r /mnt/deepspark/data/3rd_party/yolov7 ./ +cd yolov7 # export onnx model python3 export.py --weights ../yolov7.pt --simplify --img-size 640 640 --dynamic-batch --grid diff --git a/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh index 88caeb5e..4bca6ec1 100644 --- a/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh @@ -16,6 +16,12 @@ set -x +if [ -f /etc/system-release ]; then + if grep -qi "Kylin" /etc/system-release; then + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + fi +fi + pip3 install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints cp -r /root/data/3rd_party/yolov7 ./ diff --git a/models/cv/object_detection/yolox/igie/README.md b/models/cv/object_detection/yolox/igie/README.md index 260755a2..4099dacd 100644 --- a/models/cv/object_detection/yolox/igie/README.md +++ b/models/cv/object_detection/yolox/igie/README.md @@ -65,8 +65,8 @@ source /opt/rh/devtoolset-7/enable # install yolox git clone https://github.com/Megvii-BaseDetection/YOLOX.git cd YOLOX/ -python3 setup.py install - +pip3 install -v -e . --no-build-isolation +sed -i 's/torch.onnx._export/torch.onnx.export/g' tools/export_onnx.py # export onnx model python3 tools/export_onnx.py -c ../yolox_m.pth -o 13 -n yolox-m --input input --output output --dynamic --output-name ../yolox.onnx diff --git a/models/cv/object_detection/yolox/igie/ci/prepare.sh b/models/cv/object_detection/yolox/igie/ci/prepare.sh index 4a229e8e..5e02bc2c 100644 --- a/models/cv/object_detection/yolox/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolox/igie/ci/prepare.sh @@ -33,7 +33,7 @@ source /opt/rh/devtoolset-7/enable # install yolox cp -r /mnt/deepspark/data/repos/YOLOX ./ cd YOLOX -python3 setup.py develop - +pip3 install -v -e . --no-build-isolation +sed -i 's/torch.onnx._export/torch.onnx.export/g' tools/export_onnx.py # export onnx model python3 tools/export_onnx.py -c ../yolox_m.pth -o 13 -n yolox-m --input input --output output --dynamic --output-name ../yolox.onnx diff --git a/models/cv/object_detection/yolox/ixrt/README.md b/models/cv/object_detection/yolox/ixrt/README.md index 156eed0f..485c4245 100644 --- a/models/cv/object_detection/yolox/ixrt/README.md +++ b/models/cv/object_detection/yolox/ixrt/README.md @@ -65,7 +65,8 @@ pip3 install -r requirements.txt git clone https://github.com/Megvii-BaseDetection/YOLOX.git --depth=1 cd YOLOX -python3 setup.py install +pip3 install -v -e . --no-build-isolation +sed -i 's/torch.onnx._export/torch.onnx.export/g' tools/export_onnx.py # export onnx model python3 tools/export_onnx.py --output-name ../yolox.onnx -n yolox-m -c yolox_m.pth --batch-size 32 ``` diff --git a/models/cv/object_detection/yolox/ixrt/ci/prepare.sh b/models/cv/object_detection/yolox/ixrt/ci/prepare.sh index d8fe97f5..14852b0b 100644 --- a/models/cv/object_detection/yolox/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolox/ixrt/ci/prepare.sh @@ -28,4 +28,5 @@ fi pip install -r requirements.txt cp -r /root/data/repos/YOLOX ./ ln -s /root/data/checkpoints/yolox_m.pth ./YOLOX/ -cd YOLOX && python3 setup.py develop && python3 tools/export_onnx.py --output-name ../yolox.onnx -n yolox-m -c yolox_m.pth --batch-size 32 \ No newline at end of file +sed -i 's/torch.onnx._export/torch.onnx.export/g' ./YOLOX/tools/export_onnx.py +cd YOLOX && pip3 install -v -e . --no-build-isolation && python3 tools/export_onnx.py --output-name ../yolox.onnx -n yolox-m -c yolox_m.pth --batch-size 32 \ No newline at end of file diff --git a/models/nlp/plm/transformer/igie/ci/prepare.sh b/models/nlp/plm/transformer/igie/ci/prepare.sh index 7df2d9d9..18375541 100644 --- a/models/nlp/plm/transformer/igie/ci/prepare.sh +++ b/models/nlp/plm/transformer/igie/ci/prepare.sh @@ -31,6 +31,7 @@ if [ -f /etc/system-release ]; then pip3 install numpy==1.26.4 yum install -y libgomp export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD fi fi diff --git a/models/nlp/plm/transformer/ixrt/ci/prepare.sh b/models/nlp/plm/transformer/ixrt/ci/prepare.sh index 9a3a30cd..3eb341e4 100644 --- a/models/nlp/plm/transformer/ixrt/ci/prepare.sh +++ b/models/nlp/plm/transformer/ixrt/ci/prepare.sh @@ -31,6 +31,7 @@ if [ -f /etc/system-release ]; then pip3 install numpy==1.26.4 yum install -y libgomp export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD fi fi -- Gitee From 383506d2955b56b4695ced32b0c76d574d17b913 Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Wed, 24 Dec 2025 16:40:24 +0800 Subject: [PATCH 02/19] use default transformers version --- .../vision_language_model/qwen2_5_vl/vllm/README.md | 4 ---- .../vision_language_model/qwen2_5_vl/vllm/ci/prepare.sh | 3 +-- .../multimodal/vision_language_model/qwen2_vl/vllm/README.md | 4 ---- .../vision_language_model/qwen2_vl/vllm/ci/prepare.sh | 1 - 4 files changed, 1 insertion(+), 11 deletions(-) diff --git a/models/multimodal/vision_language_model/qwen2_5_vl/vllm/README.md b/models/multimodal/vision_language_model/qwen2_5_vl/vllm/README.md index 2092dcc6..bf11ca80 100644 --- a/models/multimodal/vision_language_model/qwen2_5_vl/vllm/README.md +++ b/models/multimodal/vision_language_model/qwen2_5_vl/vllm/README.md @@ -24,10 +24,6 @@ cp -r ../../vllm_public_assets/ ./ In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website. -```bash -pip install transformers==4.50.3 -``` - ## Model Inference ```bash diff --git a/models/multimodal/vision_language_model/qwen2_5_vl/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/qwen2_5_vl/vllm/ci/prepare.sh index cc6608c2..1ce243cb 100644 --- a/models/multimodal/vision_language_model/qwen2_5_vl/vllm/ci/prepare.sh +++ b/models/multimodal/vision_language_model/qwen2_5_vl/vllm/ci/prepare.sh @@ -16,5 +16,4 @@ set -x -cp -r ../../vllm_public_assets/ ./ -pip install transformers==4.50.3 +cp -r ../../vllm_public_assets/ ./ \ No newline at end of file diff --git a/models/multimodal/vision_language_model/qwen2_vl/vllm/README.md b/models/multimodal/vision_language_model/qwen2_vl/vllm/README.md index c694ff03..ab5fee09 100644 --- a/models/multimodal/vision_language_model/qwen2_vl/vllm/README.md +++ b/models/multimodal/vision_language_model/qwen2_vl/vllm/README.md @@ -24,10 +24,6 @@ cp -r ../../vllm_public_assets/ ./ In order to run the model smoothly, you need to get the sdk from [resource center](https://support.iluvatar.com/#/ProductLine?id=2) of Iluvatar CoreX official website. -```bash -pip install transformers==4.50.3 -``` - ## Model Inference ```bash diff --git a/models/multimodal/vision_language_model/qwen2_vl/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/qwen2_vl/vllm/ci/prepare.sh index cc6608c2..b54c8d39 100644 --- a/models/multimodal/vision_language_model/qwen2_vl/vllm/ci/prepare.sh +++ b/models/multimodal/vision_language_model/qwen2_vl/vllm/ci/prepare.sh @@ -17,4 +17,3 @@ set -x cp -r ../../vllm_public_assets/ ./ -pip install transformers==4.50.3 -- Gitee From c0a7184e20723b48ace5da5c6842a79bdb37daec Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Thu, 25 Dec 2025 13:33:12 +0800 Subject: [PATCH 03/19] fix igie resnet vgg16 install tf error --- models/cv/classification/resnet50_sample/igie/ci/prepare.sh | 1 - models/cv/classification/vgg16_sample/igie/ci/prepare.sh | 1 - tests/run_igie.py | 4 ++++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/models/cv/classification/resnet50_sample/igie/ci/prepare.sh b/models/cv/classification/resnet50_sample/igie/ci/prepare.sh index 92b57ede..da1c63b8 100644 --- a/models/cv/classification/resnet50_sample/igie/ci/prepare.sh +++ b/models/cv/classification/resnet50_sample/igie/ci/prepare.sh @@ -29,6 +29,5 @@ else fi pip3 install pycocotools pytest opencv-python==4.6.0.66 tqdm -pip3 install /mnt/deepspark/data/install/tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl ln -s /mnt/deepspark/data/checkpoints/resnet50.onnx ./ ln -s /mnt/deepspark/data/checkpoints/resnet50-fp32.pt ./ \ No newline at end of file diff --git a/models/cv/classification/vgg16_sample/igie/ci/prepare.sh b/models/cv/classification/vgg16_sample/igie/ci/prepare.sh index 72f8042d..5d55a371 100644 --- a/models/cv/classification/vgg16_sample/igie/ci/prepare.sh +++ b/models/cv/classification/vgg16_sample/igie/ci/prepare.sh @@ -29,5 +29,4 @@ else fi pip3 install pycocotools pytest opencv-python==4.6.0.66 tqdm -pip3 install /mnt/deepspark/data/install/tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl ln -s /mnt/deepspark/data/checkpoints/vgg16.onnx ./ \ No newline at end of file diff --git a/tests/run_igie.py b/tests/run_igie.py index 355aa1eb..c6045095 100644 --- a/tests/run_igie.py +++ b/tests/run_igie.py @@ -172,6 +172,10 @@ def run_clf_testcase(model, batch_size, whl_url): prepare_script += f""" pip install {whl_url}`curl -s {whl_url} | grep -o 'tensorflow-[^"]*\.whl' | head -n1` """ + else: + prepare_script += """ + pip install /mnt/deepspark/data/install/tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl + """ prepare_script += f""" bash ci/prepare.sh ls -l | grep onnx -- Gitee From 3310d6e28668252158ba24a2c4abc1c1707aa886 Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Thu, 25 Dec 2025 14:04:03 +0800 Subject: [PATCH 04/19] fix protobuf --- models/cv/object_detection/yolox/ixrt/ci/prepare.sh | 3 ++- models/cv/object_detection/yolox/ixrt/requirements.txt | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/models/cv/object_detection/yolox/ixrt/ci/prepare.sh b/models/cv/object_detection/yolox/ixrt/ci/prepare.sh index 14852b0b..a505c44b 100644 --- a/models/cv/object_detection/yolox/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolox/ixrt/ci/prepare.sh @@ -29,4 +29,5 @@ pip install -r requirements.txt cp -r /root/data/repos/YOLOX ./ ln -s /root/data/checkpoints/yolox_m.pth ./YOLOX/ sed -i 's/torch.onnx._export/torch.onnx.export/g' ./YOLOX/tools/export_onnx.py -cd YOLOX && pip3 install -v -e . --no-build-isolation && python3 tools/export_onnx.py --output-name ../yolox.onnx -n yolox-m -c yolox_m.pth --batch-size 32 \ No newline at end of file +cd YOLOX && pip3 install -v -e . --no-build-isolation && python3 tools/export_onnx.py --output-name ../yolox.onnx -n yolox-m -c yolox_m.pth --batch-size 32 +pip install protobuf==3.20.0 \ No newline at end of file diff --git a/models/cv/object_detection/yolox/ixrt/requirements.txt b/models/cv/object_detection/yolox/ixrt/requirements.txt index a64772f7..e8789518 100644 --- a/models/cv/object_detection/yolox/ixrt/requirements.txt +++ b/models/cv/object_detection/yolox/ixrt/requirements.txt @@ -1,10 +1,9 @@ tqdm -onnx +onnx==1.18.0 onnxsim tabulate pycocotools ppq pycuda -protobuf==3.20.0 opencv-python==4.6.0.66 ninja==1.11.1.3 \ No newline at end of file -- Gitee From 344fb101c9296daad1355701e792678b35004a81 Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Fri, 26 Dec 2025 10:42:25 +0800 Subject: [PATCH 05/19] use uname -m to check aarch --- .../resnet50_sample/igie/ci/prepare.sh | 15 ++++++--------- .../vgg16_sample/igie/ci/prepare.sh | 15 ++++++--------- .../object_detection/yolov3/igie/ci/prepare.sh | 16 +++------------- .../object_detection/yolov3/ixrt/ci/prepare.sh | 16 +++------------- .../object_detection/yolov5/igie/ci/prepare.sh | 7 +++---- .../object_detection/yolov5/ixrt/ci/prepare.sh | 7 +++---- .../object_detection/yolov5s/igie/ci/prepare.sh | 7 +++---- .../object_detection/yolov5s/ixrt/ci/prepare.sh | 16 +++------------- .../object_detection/yolov7/igie/ci/prepare.sh | 16 +++------------- .../object_detection/yolov7/ixrt/ci/prepare.sh | 7 +++---- .../cv/object_detection/yolov8n/igie/README.md | 2 +- models/cv/object_detection/yolox/ixrt/README.md | 1 + .../bert_base_squad_sample/igie/ci/prepare.sh | 15 ++++++--------- .../bert_large_squad_sample/igie/ci/prepare.sh | 15 ++++++--------- models/nlp/plm/transformer/igie/ci/prepare.sh | 15 +++++++-------- models/nlp/plm/transformer/ixrt/ci/prepare.sh | 15 +++++++-------- 16 files changed, 64 insertions(+), 121 deletions(-) diff --git a/models/cv/classification/resnet50_sample/igie/ci/prepare.sh b/models/cv/classification/resnet50_sample/igie/ci/prepare.sh index da1c63b8..fb64d7d1 100644 --- a/models/cv/classification/resnet50_sample/igie/ci/prepare.sh +++ b/models/cv/classification/resnet50_sample/igie/ci/prepare.sh @@ -16,16 +16,13 @@ set -x -if [ -f /etc/redhat-release ]; then - if grep -qi "CentOS" /etc/redhat-release; then - yum install -y numactl - fi -elif [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - yum install -y numactl - fi -else +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then apt install numactl +elif [[ ${ID} == "centos" ]]; then + yum install -y numactl +else + echo "Not Support Os" fi pip3 install pycocotools pytest opencv-python==4.6.0.66 tqdm diff --git a/models/cv/classification/vgg16_sample/igie/ci/prepare.sh b/models/cv/classification/vgg16_sample/igie/ci/prepare.sh index 5d55a371..484e403e 100644 --- a/models/cv/classification/vgg16_sample/igie/ci/prepare.sh +++ b/models/cv/classification/vgg16_sample/igie/ci/prepare.sh @@ -16,16 +16,13 @@ set -x -if [ -f /etc/redhat-release ]; then - if grep -qi "CentOS" /etc/redhat-release; then - yum install -y numactl - fi -elif [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - yum install -y numactl - fi -else +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then apt install numactl +elif [[ ${ID} == "centos" ]]; then + yum install -y numactl +else + echo "Not Support Os" fi pip3 install pycocotools pytest opencv-python==4.6.0.66 tqdm diff --git a/models/cv/object_detection/yolov3/igie/ci/prepare.sh b/models/cv/object_detection/yolov3/igie/ci/prepare.sh index dea400d2..94c7df37 100644 --- a/models/cv/object_detection/yolov3/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov3/igie/ci/prepare.sh @@ -16,19 +16,9 @@ set -x -ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') -if [[ ${ID} == "ubuntu" ]]; then - apt install -y libgl1-mesa-glx -elif [[ ${ID} == "centos" ]]; then - yum install -y mesa-libGL -else - echo "Not Support Os" -fi - -if [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD - fi +if [[ $(uname -m) == "aarch64" ]]; then + echo "Architecture is aarch64." + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD fi pip3 install -r requirements.txt diff --git a/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh index 342732ce..8fca8883 100644 --- a/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh @@ -16,19 +16,9 @@ set -x -ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') -if [[ ${ID} == "ubuntu" ]]; then - apt install -y libgl1-mesa-glx -elif [[ ${ID} == "centos" ]]; then - yum install -y mesa-libGL -else - echo "Not Support Os" -fi - -if [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD - fi +if [[ $(uname -m) == "aarch64" ]]; then + echo "Architecture is aarch64." + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD fi pip3 install -r ../../ixrt_common/requirements.txt diff --git a/models/cv/object_detection/yolov5/igie/ci/prepare.sh b/models/cv/object_detection/yolov5/igie/ci/prepare.sh index 2232a189..59810367 100644 --- a/models/cv/object_detection/yolov5/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov5/igie/ci/prepare.sh @@ -16,10 +16,9 @@ set -x -if [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD - fi +if [[ $(uname -m) == "aarch64" ]]; then + echo "Architecture is aarch64." + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD fi pip3 install -r requirements.txt diff --git a/models/cv/object_detection/yolov5/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov5/ixrt/ci/prepare.sh index a41f8c6b..44ff8a0f 100644 --- a/models/cv/object_detection/yolov5/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov5/ixrt/ci/prepare.sh @@ -16,10 +16,9 @@ set -x -if [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD - fi +if [[ $(uname -m) == "aarch64" ]]; then + echo "Architecture is aarch64." + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD fi pip3 install -r ../../ixrt_common/requirements.txt diff --git a/models/cv/object_detection/yolov5s/igie/ci/prepare.sh b/models/cv/object_detection/yolov5s/igie/ci/prepare.sh index 1ae5ca61..10a61bac 100644 --- a/models/cv/object_detection/yolov5s/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov5s/igie/ci/prepare.sh @@ -16,10 +16,9 @@ set -x -if [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD - fi +if [[ $(uname -m) == "aarch64" ]]; then + echo "Architecture is aarch64." + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD fi pip3 install -r ../../ixrt_common/requirements.txt diff --git a/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh index 61148ce9..ae59373c 100644 --- a/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh @@ -16,19 +16,9 @@ set -x -ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') -if [[ ${ID} == "ubuntu" ]]; then - apt install -y libgl1-mesa-glx -elif [[ ${ID} == "centos" ]]; then - yum install -y mesa-libGL -else - echo "Not Support Os" -fi - -if [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD - fi +if [[ $(uname -m) == "aarch64" ]]; then + echo "Architecture is aarch64." + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD fi pip3 install -r ../../ixrt_common/requirements.txt diff --git a/models/cv/object_detection/yolov7/igie/ci/prepare.sh b/models/cv/object_detection/yolov7/igie/ci/prepare.sh index 7524c273..0ebe1285 100644 --- a/models/cv/object_detection/yolov7/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov7/igie/ci/prepare.sh @@ -16,19 +16,9 @@ set -x -ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') -if [[ ${ID} == "ubuntu" ]]; then - apt install -y libgl1-mesa-glx -elif [[ ${ID} == "centos" ]]; then - yum install -y mesa-libGL -else - echo "Not Support Os" -fi - -if [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD - fi +if [[ $(uname -m) == "aarch64" ]]; then + echo "Architecture is aarch64." + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD fi pip3 install -r requirements.txt diff --git a/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh index 4bca6ec1..514a10ff 100644 --- a/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh @@ -16,10 +16,9 @@ set -x -if [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD - fi +if [[ $(uname -m) == "aarch64" ]]; then + echo "Architecture is aarch64." + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD fi pip3 install -r ../../ixrt_common/requirements.txt diff --git a/models/cv/object_detection/yolov8n/igie/README.md b/models/cv/object_detection/yolov8n/igie/README.md index dc481a4d..fed89f98 100644 --- a/models/cv/object_detection/yolov8n/igie/README.md +++ b/models/cv/object_detection/yolov8n/igie/README.md @@ -8,7 +8,7 @@ YOLOv8n combines exceptional speed and competitive accuracy in real-time object | GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | | :----: | :----: | :----: | -| MR-V100 | 4.3.0 | 25.23 | +| MR-V100 | 4.3.0 | 25.12 | ## Model Preparation diff --git a/models/cv/object_detection/yolox/ixrt/README.md b/models/cv/object_detection/yolox/ixrt/README.md index 485c4245..1558ddaa 100644 --- a/models/cv/object_detection/yolox/ixrt/README.md +++ b/models/cv/object_detection/yolox/ixrt/README.md @@ -69,6 +69,7 @@ pip3 install -v -e . --no-build-isolation sed -i 's/torch.onnx._export/torch.onnx.export/g' tools/export_onnx.py # export onnx model python3 tools/export_onnx.py --output-name ../yolox.onnx -n yolox-m -c yolox_m.pth --batch-size 32 +pip install protobuf==3.20.0 ``` ## Model Inference diff --git a/models/nlp/plm/bert_base_squad_sample/igie/ci/prepare.sh b/models/nlp/plm/bert_base_squad_sample/igie/ci/prepare.sh index 41a05302..a9227cdd 100644 --- a/models/nlp/plm/bert_base_squad_sample/igie/ci/prepare.sh +++ b/models/nlp/plm/bert_base_squad_sample/igie/ci/prepare.sh @@ -21,16 +21,13 @@ mkdir -p ./data/datasets/bert_base_squad ln -s /mnt/deepspark/data/checkpoints/bert_base_uncased_squad ./data/checkpoints/bert_base_squad/ ln -s /mnt/deepspark/data/datasets/squad ./data/datasets/bert_base_squad/ -if [ -f /etc/redhat-release ]; then - if grep -qi "CentOS" /etc/redhat-release; then - yum install -y numactl - fi -elif [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - yum install -y numactl - fi +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install numactl +elif [[ ${ID} == "centos" ]]; then + yum install -y numactl else - apt install -y numactl + echo "Not Support Os" fi pip3 install --no-dependencies transformers diff --git a/models/nlp/plm/bert_large_squad_sample/igie/ci/prepare.sh b/models/nlp/plm/bert_large_squad_sample/igie/ci/prepare.sh index b5ea193c..c8a5bb56 100644 --- a/models/nlp/plm/bert_large_squad_sample/igie/ci/prepare.sh +++ b/models/nlp/plm/bert_large_squad_sample/igie/ci/prepare.sh @@ -21,16 +21,13 @@ mkdir -p ./data/datasets/bert_large_squad ln -s /mnt/deepspark/data/checkpoints/bert-large-uncased ./data/checkpoints/bert_large_squad/ ln -s /mnt/deepspark/data/datasets/squad ./data/datasets/bert_large_squad/ -if [ -f /etc/redhat-release ]; then - if grep -qi "CentOS" /etc/redhat-release; then - yum install -y numactl - fi -elif [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - yum install -y numactl - fi +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install numactl +elif [[ ${ID} == "centos" ]]; then + yum install -y numactl else - apt install -y numactl + echo "Not Support Os" fi pip3 install --no-dependencies transformers diff --git a/models/nlp/plm/transformer/igie/ci/prepare.sh b/models/nlp/plm/transformer/igie/ci/prepare.sh index 18375541..5b075204 100644 --- a/models/nlp/plm/transformer/igie/ci/prepare.sh +++ b/models/nlp/plm/transformer/igie/ci/prepare.sh @@ -25,14 +25,13 @@ else echo "Not Support Os" fi -if [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - pip3 install --no-cache-dir --force-reinstall --upgrade --index-url https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn scikit-learn - pip3 install numpy==1.26.4 - yum install -y libgomp - export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD - fi +if [[ $(uname -m) == "aarch64" ]]; then + echo "Architecture is aarch64." + pip3 install --no-cache-dir --force-reinstall --upgrade --index-url https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn scikit-learn + pip3 install numpy==1.26.4 + yum install -y libgomp + export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD fi pip3 install -r requirements.txt diff --git a/models/nlp/plm/transformer/ixrt/ci/prepare.sh b/models/nlp/plm/transformer/ixrt/ci/prepare.sh index 3eb341e4..6a7c66be 100644 --- a/models/nlp/plm/transformer/ixrt/ci/prepare.sh +++ b/models/nlp/plm/transformer/ixrt/ci/prepare.sh @@ -25,14 +25,13 @@ else echo "Not Support Os" fi -if [ -f /etc/system-release ]; then - if grep -qi "Kylin" /etc/system-release; then - pip3 install --no-cache-dir --force-reinstall --upgrade --index-url https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn scikit-learn - pip3 install numpy==1.26.4 - yum install -y libgomp - export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD - fi +if [[ $(uname -m) == "aarch64" ]]; then + echo "Architecture is aarch64." + pip3 install --no-cache-dir --force-reinstall --upgrade --index-url https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn scikit-learn + pip3 install numpy==1.26.4 + yum install -y libgomp + export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD fi pip3 install -r requirements.txt -- Gitee From 94a5a76f0b0c1a738a050b2cd909fe6e6e4c61a3 Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Fri, 26 Dec 2025 13:39:18 +0800 Subject: [PATCH 06/19] move export from prepare into execute --- .../yolov3/igie/ci/prepare.sh | 5 --- .../yolov3/ixrt/ci/prepare.sh | 5 --- .../yolov5/igie/ci/prepare.sh | 5 --- .../yolov5/ixrt/ci/prepare.sh | 5 --- .../yolov5s/igie/ci/prepare.sh | 5 --- .../yolov5s/ixrt/ci/prepare.sh | 5 --- .../yolov7/igie/ci/prepare.sh | 5 --- .../yolov7/ixrt/ci/prepare.sh | 5 --- models/nlp/plm/transformer/igie/ci/prepare.sh | 4 +-- models/nlp/plm/transformer/ixrt/ci/prepare.sh | 4 +-- tests/run_igie.py | 12 ++++++- tests/run_ixrt.py | 34 ++++++++++++++++--- 12 files changed, 43 insertions(+), 51 deletions(-) diff --git a/models/cv/object_detection/yolov3/igie/ci/prepare.sh b/models/cv/object_detection/yolov3/igie/ci/prepare.sh index 94c7df37..ca323384 100644 --- a/models/cv/object_detection/yolov3/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov3/igie/ci/prepare.sh @@ -16,11 +16,6 @@ set -x -if [[ $(uname -m) == "aarch64" ]]; then - echo "Architecture is aarch64." - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD -fi - pip3 install -r requirements.txt python3 export.py --weight yolov3.pt --output yolov3.onnx diff --git a/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh index 8fca8883..219cffa0 100644 --- a/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov3/ixrt/ci/prepare.sh @@ -16,11 +16,6 @@ set -x -if [[ $(uname -m) == "aarch64" ]]; then - echo "Architecture is aarch64." - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD -fi - pip3 install -r ../../ixrt_common/requirements.txt mkdir checkpoints unzip -q /root/data/3rd_party/onnx_tflite_yolov3.zip -d ./ diff --git a/models/cv/object_detection/yolov5/igie/ci/prepare.sh b/models/cv/object_detection/yolov5/igie/ci/prepare.sh index 59810367..fcf24d4c 100644 --- a/models/cv/object_detection/yolov5/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov5/igie/ci/prepare.sh @@ -16,11 +16,6 @@ set -x -if [[ $(uname -m) == "aarch64" ]]; then - echo "Architecture is aarch64." - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD -fi - pip3 install -r requirements.txt python3 export.py --weight yolov5m.pt --output yolov5m.onnx diff --git a/models/cv/object_detection/yolov5/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov5/ixrt/ci/prepare.sh index 44ff8a0f..23b04131 100644 --- a/models/cv/object_detection/yolov5/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov5/ixrt/ci/prepare.sh @@ -16,11 +16,6 @@ set -x -if [[ $(uname -m) == "aarch64" ]]; then - echo "Architecture is aarch64." - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD -fi - pip3 install -r ../../ixrt_common/requirements.txt mkdir checkpoints diff --git a/models/cv/object_detection/yolov5s/igie/ci/prepare.sh b/models/cv/object_detection/yolov5s/igie/ci/prepare.sh index 10a61bac..b53ca6d1 100644 --- a/models/cv/object_detection/yolov5s/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov5s/igie/ci/prepare.sh @@ -16,11 +16,6 @@ set -x -if [[ $(uname -m) == "aarch64" ]]; then - echo "Architecture is aarch64." - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD -fi - pip3 install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints diff --git a/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh index ae59373c..4672c649 100644 --- a/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov5s/ixrt/ci/prepare.sh @@ -16,11 +16,6 @@ set -x -if [[ $(uname -m) == "aarch64" ]]; then - echo "Architecture is aarch64." - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD -fi - pip3 install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints diff --git a/models/cv/object_detection/yolov7/igie/ci/prepare.sh b/models/cv/object_detection/yolov7/igie/ci/prepare.sh index 0ebe1285..e0237872 100644 --- a/models/cv/object_detection/yolov7/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov7/igie/ci/prepare.sh @@ -16,11 +16,6 @@ set -x -if [[ $(uname -m) == "aarch64" ]]; then - echo "Architecture is aarch64." - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD -fi - pip3 install -r requirements.txt cp -r /mnt/deepspark/data/3rd_party/yolov7 ./ diff --git a/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh index 514a10ff..88caeb5e 100644 --- a/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/yolov7/ixrt/ci/prepare.sh @@ -16,11 +16,6 @@ set -x -if [[ $(uname -m) == "aarch64" ]]; then - echo "Architecture is aarch64." - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD -fi - pip3 install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints cp -r /root/data/3rd_party/yolov7 ./ diff --git a/models/nlp/plm/transformer/igie/ci/prepare.sh b/models/nlp/plm/transformer/igie/ci/prepare.sh index 5b075204..f1f93e80 100644 --- a/models/nlp/plm/transformer/igie/ci/prepare.sh +++ b/models/nlp/plm/transformer/igie/ci/prepare.sh @@ -29,9 +29,7 @@ if [[ $(uname -m) == "aarch64" ]]; then echo "Architecture is aarch64." pip3 install --no-cache-dir --force-reinstall --upgrade --index-url https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn scikit-learn pip3 install numpy==1.26.4 - yum install -y libgomp - export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + apt install -y libgomp1 fi pip3 install -r requirements.txt diff --git a/models/nlp/plm/transformer/ixrt/ci/prepare.sh b/models/nlp/plm/transformer/ixrt/ci/prepare.sh index 6a7c66be..46fe1a4d 100644 --- a/models/nlp/plm/transformer/ixrt/ci/prepare.sh +++ b/models/nlp/plm/transformer/ixrt/ci/prepare.sh @@ -29,9 +29,7 @@ if [[ $(uname -m) == "aarch64" ]]; then echo "Architecture is aarch64." pip3 install --no-cache-dir --force-reinstall --upgrade --index-url https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn scikit-learn pip3 install numpy==1.26.4 - yum install -y libgomp - export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) - export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + apt install -y libgomp1 fi pip3 install -r requirements.txt diff --git a/tests/run_igie.py b/tests/run_igie.py index c6045095..7a339a43 100644 --- a/tests/run_igie.py +++ b/tests/run_igie.py @@ -21,7 +21,7 @@ import logging import os import sys import argparse - +import platform import utils # 配置日志 @@ -305,6 +305,11 @@ def run_detec_testcase(model, batch_size): export DATASETS_DIR=./{dataset_n}/ """ + if platform.machine() == "aarch64": + base_script += """ + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + """ + for prec in model["precisions"]: result["result"].setdefault(prec, {"status": "FAIL"}) for bs in batch_size_list: @@ -586,6 +591,11 @@ def run_nlp_testcase(model, batch_size): export DATASETS_DIR=/mnt/deepspark/data/datasets/{dataset_n} cd ../{model['model_path']} """ + if model_name == "transformer" and platform.machine() == "aarch64": + base_script += """ + export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + """ for prec in model["precisions"]: result["result"].setdefault(prec, {"status": "FAIL"}) for bs in batch_size_list: diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py index 6d0e7ef9..aa96b99b 100644 --- a/tests/run_ixrt.py +++ b/tests/run_ixrt.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import platform import yaml import subprocess import json @@ -328,6 +329,11 @@ def run_detec_testcase(model, batch_size): export CONFIG_DIR=config/{config_name}_CONFIG """ + if platform.machine() == "aarch64": + base_script += """ + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD + """ + for prec in model["precisions"]: result["result"].setdefault(prec, {"status": "FAIL"}) for bs in batch_size_list: @@ -609,20 +615,40 @@ def run_nlp_testcase(model, batch_size): if bs == "None": bs = "Default" if model_name in ["bert_base_squad", "bert_large_squad", "transformer"]: - script = f""" + if model_name == "transformer" and platform.machine() == "aarch64": + script = """ set -x + export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD cd ../{model['model_path']}/ bash scripts/infer_{model_name}_{prec}_accuracy.sh bash scripts/infer_{model_name}_{prec}_performance.sh - """ + """ + else: + script = f""" + set -x + cd ../{model['model_path']}/ + bash scripts/infer_{model_name}_{prec}_accuracy.sh + bash scripts/infer_{model_name}_{prec}_performance.sh + """ else: if model_name in ["bert_base_squad", "bert_large_squad", "transformer"]: - script = f""" + if model_name == "transformer" and platform.machine() == "aarch64": + script = """ set -x + export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) + export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD cd ../{model['model_path']}/ bash scripts/infer_{model_name}_{prec}_accuracy.sh --bs {bs} bash scripts/infer_{model_name}_{prec}_performance.sh --bs {bs} - """ + """ + else: + script = f""" + set -x + cd ../{model['model_path']}/ + bash scripts/infer_{model_name}_{prec}_accuracy.sh --bs {bs} + bash scripts/infer_{model_name}_{prec}_performance.sh --bs {bs} + """ result["result"][prec].setdefault(bs, {}) logging.info(f"Start running {model_name} {prec} bs: {bs} test case") -- Gitee From 5311038c6b2a68ad1f0fb51b2ca5dea636d998fe Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Fri, 26 Dec 2025 13:54:01 +0800 Subject: [PATCH 07/19] use default script for CosyVoice2 in 4.4.0 --- .../cosyvoice/pytorch/README.md | 7 +-- .../cosyvoice/pytorch/inference_test.py | 46 ------------------- 2 files changed, 4 insertions(+), 49 deletions(-) delete mode 100644 models/speech/speech_synthesis/cosyvoice/pytorch/inference_test.py diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/README.md b/models/speech/speech_synthesis/cosyvoice/pytorch/README.md index 677f5919..6b8319c7 100644 --- a/models/speech/speech_synthesis/cosyvoice/pytorch/README.md +++ b/models/speech/speech_synthesis/cosyvoice/pytorch/README.md @@ -24,6 +24,7 @@ pip3 install onnxruntime==1.18.0 git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git # If you failed to clone the submodule due to network failures, please run the following command until success cd CosyVoice +git checkout 2db78e705835e56778f69b80cae51e4314d635b0 git submodule update --init --recursive mkdir -p pretrained_models @@ -39,12 +40,12 @@ sudo yum install sox sox-devel ## Model Inference ```bash -cp ../inference_test.py ./ -python3 inference_test.py +# make sure run cosyvoice2_example() in example.py +python3 example.py ``` ## Model Results ## References -- [CosyVoice](https://github.com/FunAudioLLM/CosyVoice/commit/0a496c18f78ca993c63f6d880fcc60778bfc85c1) \ No newline at end of file +- [CosyVoice](https://github.com/FunAudioLLM/CosyVoice/commit/2db78e705835e56778f69b80cae51e4314d635b0) \ No newline at end of file diff --git a/models/speech/speech_synthesis/cosyvoice/pytorch/inference_test.py b/models/speech/speech_synthesis/cosyvoice/pytorch/inference_test.py deleted file mode 100644 index 66cfac19..00000000 --- a/models/speech/speech_synthesis/cosyvoice/pytorch/inference_test.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append('third_party/Matcha-TTS') -from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2 -from cosyvoice.utils.file_utils import load_wav -import torchaudio -cosyvoice = CosyVoice2('pretrained_models/CosyVoice2-0.5B', load_jit=False, load_trt=False, fp16=False) - -# NOTE if you want to reproduce the results on https://funaudiollm.github.io/cosyvoice2, please add text_frontend=False during inference -# zero_shot usage -prompt_speech_16k = load_wav('./asset/zero_shot_prompt.wav', 16000) -text = ('收到好友从远方寄来的生日礼物,' - '那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,' - '笑容如花儿般绽放。') -text_1 = '希望你以后能够做的比我还好呦。' -for i, j in enumerate(cosyvoice.inference_zero_shot(text, text_1, prompt_speech_16k, stream=False)): - torchaudio.save('zero_shot_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate) - -# fine grained control, for supported control, check cosyvoice/tokenizer/tokenizer.py#L248 -text = ('在他讲述那个荒诞故事的过程中,' - '他突然[laughter]停下来,' - '因为他自己也被逗笑了[laughter]。') -for i, j in enumerate(cosyvoice.inference_cross_lingual(text, prompt_speech_16k, stream=False)): - torchaudio.save('fine_grained_control_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate) - -# instruct usage -text = ('收到好友从远方寄来的生日礼物,' - '那份意外的惊喜与深深的祝福让我心中充满了甜蜜的快乐,' - '笑容如花儿般绽放。') -for i, j in enumerate(cosyvoice.inference_instruct2(text, '用四川话说这句话', prompt_speech_16k, stream=False)): - torchaudio.save('instruct_{}.wav'.format(i), j['tts_speech'], cosyvoice.sample_rate) -print("Offline inference is successful!") \ No newline at end of file -- Gitee From 866f0fb6c0f3941bab59cea568f4011c44647cde Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Fri, 26 Dec 2025 14:37:05 +0800 Subject: [PATCH 08/19] fix comma error --- tests/run_ixrt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py index aa96b99b..eb3f5d6d 100644 --- a/tests/run_ixrt.py +++ b/tests/run_ixrt.py @@ -616,7 +616,7 @@ def run_nlp_testcase(model, batch_size): bs = "Default" if model_name in ["bert_base_squad", "bert_large_squad", "transformer"]: if model_name == "transformer" and platform.machine() == "aarch64": - script = """ + script = f""" set -x export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD @@ -634,7 +634,7 @@ def run_nlp_testcase(model, batch_size): else: if model_name in ["bert_base_squad", "bert_large_squad", "transformer"]: if model_name == "transformer" and platform.machine() == "aarch64": - script = """ + script = f""" set -x export LD_PRELOAD=$(find /usr/local/lib/python3.10/site-packages/scikit_learn.libs -name "libgomp*.so.1.0.0" | head -n1) export LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD -- Gitee From 3e30ab07727b0243f6f4900e41938215a6fa4a89 Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Fri, 26 Dec 2025 15:01:28 +0800 Subject: [PATCH 09/19] update whl dependent all from whl_url --- tests/run_igie.py | 29 ++++++++++++----------------- tests/run_ixrt.py | 37 +++++++++++++++++++------------------ tests/run_trtllm.py | 12 ++++++------ tests/run_vllm.py | 7 ++++--- 4 files changed, 41 insertions(+), 44 deletions(-) diff --git a/tests/run_igie.py b/tests/run_igie.py index 7a339a43..38bbb966 100644 --- a/tests/run_igie.py +++ b/tests/run_igie.py @@ -74,7 +74,7 @@ def main(): logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_detec_testcase(model, batch_size) + result = run_detec_testcase(model, batch_size, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -84,7 +84,7 @@ def main(): logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_ocr_testcase(model) + result = run_ocr_testcase(model, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -164,18 +164,13 @@ def run_clf_testcase(model, batch_size, whl_url): ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./ """ if model["category"] == "cv/semantic_segmentation": - prepare_script += """ - pip install /mnt/deepspark/install/mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + prepare_script += f""" + pip install {whl_url}`curl -s {whl_url} | grep -o 'mmcv-[^"]*\.whl' | head -n1` """ if model_name in ["resnet50_sample", "vgg16_sample"]: - if whl_url and whl_url != "None": - prepare_script += f""" - pip install {whl_url}`curl -s {whl_url} | grep -o 'tensorflow-[^"]*\.whl' | head -n1` - """ - else: - prepare_script += """ - pip install /mnt/deepspark/data/install/tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl - """ + prepare_script += f""" + pip install {whl_url}`curl -s {whl_url} | grep -o 'tensorflow-[^"]*\.whl' | head -n1` + """ prepare_script += f""" bash ci/prepare.sh ls -l | grep onnx @@ -268,7 +263,7 @@ def run_clf_testcase(model, batch_size, whl_url): logging.debug(f"matchs:\n{matchs}") return result -def run_detec_testcase(model, batch_size): +def run_detec_testcase(model, batch_size, whl_url): batch_size_list = batch_size.split(",") if batch_size else [] model_name = model["model_name"] result = { @@ -284,8 +279,8 @@ def run_detec_testcase(model, batch_size): ln -s /mnt/deepspark/data/datasets/{dataset_n} ./ """ # for 4.3.0 sdk need pre install mmcv - prepare_script += """ - pip install /mnt/deepspark/install/mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + prepare_script += f""" + pip install {whl_url}`curl -s {whl_url} | grep -o 'mmcv-[^"]*\.whl' | head -n1` """ # if model["need_third_part"] and model["3rd_party_repo"]: @@ -375,7 +370,7 @@ def run_detec_testcase(model, batch_size): return result -def run_ocr_testcase(model): +def run_ocr_testcase(model, whl_url): model_name = model["model_name"] result = { "name": model_name, @@ -389,7 +384,7 @@ def run_ocr_testcase(model): cd ../{model['model_path']} ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./ ln -s /mnt/deepspark/data/datasets/{dataset_n} ./ - pip install /mnt/deepspark/install/paddlepaddle-3.0.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + pip install {whl_url}`curl -s {whl_url} | grep -o 'paddlepaddle-[^"]*\.whl' | head -n1` unzip -q /mnt/deepspark/data/3rd_party/PaddleOCR-release-2.6.zip -d ./PaddleOCR bash ci/prepare.sh """ diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py index eb3f5d6d..5c387887 100644 --- a/tests/run_ixrt.py +++ b/tests/run_ixrt.py @@ -59,12 +59,14 @@ def main(): logging.error(f"model name {model['model_name']} is not support for IXUCA SDK v4.3.0.") sys.exit(-1) + whl_url = os.environ.get("WHL_URL") + result = {} if model["category"] == "cv/classification": logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_clf_testcase(model, batch_size) + result = run_clf_testcase(model, batch_size, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -74,7 +76,7 @@ def main(): logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_detec_testcase(model, batch_size) + result = run_detec_testcase(model, batch_size, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -104,7 +106,7 @@ def main(): logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_instance_segmentation_testcase(model) + result = run_instance_segmentation_testcase(model, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -114,7 +116,7 @@ def main(): logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_nlp_testcase(model, batch_size) + result = run_nlp_testcase(model, batch_size, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -149,7 +151,7 @@ def check_model_result(result): break result["status"] = status -def run_clf_testcase(model, batch_size): +def run_clf_testcase(model, batch_size, whl_url): batch_size_list = batch_size.split(",") if batch_size else [] model_name = model["model_name"] result = { @@ -164,8 +166,8 @@ def run_clf_testcase(model, batch_size): ln -s /root/data/checkpoints/{checkpoint_n} ./ """ if model_name == "swin_transformer_large": - prepare_script += """ - pip install /root/data/install/tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl + prepare_script += f""" + pip install {whl_url}`curl -s {whl_url} | grep -o 'tensorflow-[^"]*\.whl' | head -n1` """ prepare_script += """ bash ci/prepare.sh @@ -275,7 +277,7 @@ def run_clf_testcase(model, batch_size): logging.debug(f"matchs:\n{matchs}") return result -def run_detec_testcase(model, batch_size): +def run_detec_testcase(model, batch_size, whl_url): batch_size_list = batch_size.split(",") if batch_size else [] model_name = model["model_name"] result = { @@ -289,7 +291,7 @@ def run_detec_testcase(model, batch_size): cd ../{model['model_path']} ln -s /root/data/checkpoints/{checkpoint_n} ./ ln -s /root/data/datasets/{dataset_n} ./ - pip install /root/data/install/mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + pip install {whl_url}`curl -s {whl_url} | grep -o 'mmcv-[^"]*\.whl' | head -n1` bash ci/prepare.sh """ @@ -550,7 +552,7 @@ def run_multi_object_tracking_testcase(model): return result # BERT series models -def run_nlp_testcase(model, batch_size): +def run_nlp_testcase(model, batch_size, whl_url): batch_size_list = batch_size.split(",") if batch_size else [] model_name = model["model_name"] result = { @@ -561,18 +563,17 @@ def run_nlp_testcase(model, batch_size): prepare_script = f""" set -x cd ../{model['model_path']} - pip install /root/data/install/tensorflow-2.16.2+corex.4.3.0-cp310-cp310-linux_x86_64.whl - pip install /root/data/install/ixrt-1.0.0a0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - pip install /root/data/install/cuda_python-11.8.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - bash /root/data/install/ixrt-1.0.0.alpha+corex.4.3.0-linux_x86_64.run + pip install {whl_url}`curl -s {whl_url} | grep -o 'tensorflow-[^"]*\.whl' | head -n1` + pip install {whl_url}`curl -s {whl_url} | grep -o 'ixrt-[^"]*\.whl' | head -n1` + pip install {whl_url}`curl -s {whl_url} | grep -o 'cuda_python-[^"]*\.whl' | head -n1` bash ci/prepare.sh """ else: prepare_script = f""" set -x cd ../{model['model_path']} - pip install /root/data/install/ixrt-1.0.0a0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - pip install /root/data/install/cuda_python-11.8.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + pip install {whl_url}`curl -s {whl_url} | grep -o 'ixrt-[^"]*\.whl' | head -n1` + pip install {whl_url}`curl -s {whl_url} | grep -o 'cuda_python-[^"]*\.whl' | head -n1` bash ci/prepare.sh """ @@ -752,7 +753,7 @@ def run_speech_testcase(model, batch_size): logging.debug(f"matchs:\n{matchs}") return result -def run_instance_segmentation_testcase(model): +def run_instance_segmentation_testcase(model, whl_url): model_name = model["model_name"] result = { "name": model_name, @@ -765,7 +766,7 @@ def run_instance_segmentation_testcase(model): cd ../{model['model_path']} ln -s /root/data/checkpoints/{checkpoint_n} ./ ln -s /root/data/datasets/{dataset_n} ./ - pip install /root/data/install/mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + pip install {whl_url}`curl -s {whl_url} | grep -o 'mmcv-[^"]*\.whl' | head -n1` bash ci/prepare.sh ls -l | grep onnx """ diff --git a/tests/run_trtllm.py b/tests/run_trtllm.py index f8a684c3..a7cf1963 100644 --- a/tests/run_trtllm.py +++ b/tests/run_trtllm.py @@ -58,13 +58,14 @@ def main(): logging.error(f"model name {model['model_name']} is not support for IXUCA SDK v4.3.0.") sys.exit(-1) + whl_url = os.environ.get("WHL_URL") result = {} # NLP模型 if model["category"] in ["nlp/llm"]: logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_nlp_testcase(model) + result = run_nlp_testcase(model, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -89,7 +90,7 @@ def check_model_result(result): break result["status"] = status -def run_nlp_testcase(model): +def run_nlp_testcase(model, whl_url): model_name = model["model_name"] result = { "name": model_name, @@ -101,10 +102,9 @@ def run_nlp_testcase(model): prepare_script = f""" set -x cd ../{model['model_path']} - pip install /mnt/deepspark/install/tensorrt_llm-0.12.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - pip install /mnt/deepspark/install/ixrt-1.0.0a0+corex.4.3.0-cp310-cp310-linux_x86_64.whl - bash /mnt/deepspark/install/ixrt-1.0.0.alpha+corex.4.3.0-linux_x86_64.run - pip install /mnt/deepspark/install/cuda_python-11.8.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl + pip install {whl_url}`curl -s {whl_url} | grep -o 'tensorrt_llm-[^"]*\.whl' | head -n1` + pip install {whl_url}`curl -s {whl_url} | grep -o 'ixrt-[^"]*\.whl' | head -n1` + pip install {whl_url}`curl -s {whl_url} | grep -o 'cuda_python-[^"]*\.whl' | head -n1` bash ci/prepare.sh """ diff --git a/tests/run_vllm.py b/tests/run_vllm.py index d2617b91..ccb28573 100644 --- a/tests/run_vllm.py +++ b/tests/run_vllm.py @@ -57,13 +57,14 @@ def main(): logging.error(f"model name {model['model_name']} is not support for IXUCA SDK v4.3.0.") sys.exit(-1) + whl_url = os.environ.get("WHL_URL") result = {} # NLP模型 if model["category"] in ["nlp/llm", "multimodal/vision_language_model", "speech/asr", "speech/speech_synthesis"]: logging.info(f"Start running {model['model_name']} test case:\n{json.dumps(model, indent=4)}") d_url = model["download_url"] if d_url is not None: - result = run_nlp_testcase(model) + result = run_nlp_testcase(model, whl_url) check_model_result(result) logging.debug(f"The result of {model['model_name']} is\n{json.dumps(result, indent=4)}") logging.info(f"End running {model['model_name']} test case.") @@ -299,7 +300,7 @@ def _parse_script_output(sout: str, prec: str, display_name: str) -> Dict[str, A # --- Main function (now simple and low complexity) --- -def run_nlp_testcase(model: Dict[str, Any]) -> Dict[str, Any]: +def run_nlp_testcase(model: Dict[str, Any], whl_url: str) -> Dict[str, Any]: get_num_devices_script = "ixsmi -L | wc -l" result, _ = run_script(get_num_devices_script) num_devices = int(result.stdout.strip()) @@ -320,7 +321,7 @@ def run_nlp_testcase(model: Dict[str, Any]) -> Dict[str, Any]: set -x cd ../{model['model_path']} ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./{model_name} -pip install /mnt/deepspark/install/xformers-0.0.26.post1+corex.4.3.0-cp310-cp310-linux_x86_64.whl +pip install {whl_url}`curl -s {whl_url} | grep -o 'xformers-[^"]*\.whl' | head -n1` bash ci/prepare.sh """ -- Gitee From 8948be4b32e92243049a6469078a1b2b846da4cf Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Fri, 26 Dec 2025 15:26:14 +0800 Subject: [PATCH 10/19] del pip install in ci/prepare --- models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh | 1 + models/cv/object_detection/fcos/ixrt/ci/prepare.sh | 1 - models/cv/ocr/svtr/igie/ci/prepare.sh | 2 -- .../vision_language_model/fuyu_8b/vllm/ci/prepare.sh | 3 +-- .../vision_language_model/minicpm_o/vllm/ci/prepare.sh | 1 - models/nlp/llm/internlm3/lmdeploy/ci/prepare.sh | 3 +-- tests/run_vllm.py | 4 ++++ 7 files changed, 7 insertions(+), 8 deletions(-) diff --git a/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh b/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh index 107ffda4..75fa99d6 100644 --- a/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh +++ b/models/cv/instance_segmentation/solov1/ixrt/ci/prepare.sh @@ -27,6 +27,7 @@ fi pip install -r requirements.txt +# it need low mmcv version pip install /root/data/install/mmcv_full-1.7.0+corex.20250108131027-cp310-cp310-linux_x86_64.whl mkdir -p checkpoints diff --git a/models/cv/object_detection/fcos/ixrt/ci/prepare.sh b/models/cv/object_detection/fcos/ixrt/ci/prepare.sh index 633e4f20..1a22e892 100644 --- a/models/cv/object_detection/fcos/ixrt/ci/prepare.sh +++ b/models/cv/object_detection/fcos/ixrt/ci/prepare.sh @@ -25,6 +25,5 @@ else echo "Not Support Os" fi pip3 install -r requirements.txt -pip install /root/data/install/mmcv-2.1.0+corex.4.3.0-cp310-cp310-linux_x86_64.whl mkdir -p checkpoints cp /root/data/checkpoints/fcos_opt.onnx checkpoints/ diff --git a/models/cv/ocr/svtr/igie/ci/prepare.sh b/models/cv/ocr/svtr/igie/ci/prepare.sh index 3c222716..0eb71f7e 100644 --- a/models/cv/ocr/svtr/igie/ci/prepare.sh +++ b/models/cv/ocr/svtr/igie/ci/prepare.sh @@ -42,5 +42,3 @@ cd .. # Use onnxsim optimize onnx model onnxsim SVTR.onnx SVTR_opt.onnx -# should update igie -pip install http://sw.iluvatar.ai/download/corex/daily_packages/ivcore11/x86_64/20250220/apps/py3.10/igie-0.18.0+corex.4.2.0.20250220-cp310-cp310-linux_x86_64.whl \ No newline at end of file diff --git a/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh index a3756a47..003a1508 100644 --- a/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh +++ b/models/multimodal/vision_language_model/fuyu_8b/vllm/ci/prepare.sh @@ -24,5 +24,4 @@ else echo "Not Support Os" fi -cp -r ../../vllm_public_assets/ ./ -pip install /mnt/deepspark/install/transformers-4.45.2+corex.4.3.0-py3-none-any.whl \ No newline at end of file +cp -r ../../vllm_public_assets/ ./ \ No newline at end of file diff --git a/models/multimodal/vision_language_model/minicpm_o/vllm/ci/prepare.sh b/models/multimodal/vision_language_model/minicpm_o/vllm/ci/prepare.sh index 072ab438..ec26235f 100644 --- a/models/multimodal/vision_language_model/minicpm_o/vllm/ci/prepare.sh +++ b/models/multimodal/vision_language_model/minicpm_o/vllm/ci/prepare.sh @@ -16,4 +16,3 @@ set -x cp -r ../../vllm_public_assets/ ./ -pip install /mnt/deepspark/install/transformers-4.45.2+corex.4.3.0-py3-none-any.whl diff --git a/models/nlp/llm/internlm3/lmdeploy/ci/prepare.sh b/models/nlp/llm/internlm3/lmdeploy/ci/prepare.sh index 34aa71ce..d6fa2d8c 100644 --- a/models/nlp/llm/internlm3/lmdeploy/ci/prepare.sh +++ b/models/nlp/llm/internlm3/lmdeploy/ci/prepare.sh @@ -14,5 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -x -pip install /mnt/deepspark/data/install/lmdeploy-0.7.2+corex.4.3.0-py3-none-any.whl \ No newline at end of file +set -x \ No newline at end of file diff --git a/tests/run_vllm.py b/tests/run_vllm.py index ccb28573..79b7cbba 100644 --- a/tests/run_vllm.py +++ b/tests/run_vllm.py @@ -324,6 +324,10 @@ ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./{model_name} pip install {whl_url}`curl -s {whl_url} | grep -o 'xformers-[^"]*\.whl' | head -n1` bash ci/prepare.sh """ + if model_name == "internlm3": + prepare_script += f""" + pip install {whl_url}`curl -s {whl_url} | grep -o 'lmdeploy-[^"]*\.whl' | head -n1` + """ if utils.is_debug(): pip_list = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n" -- Gitee From dc088d8d1792a32f4bcc0adb40c84f9538d7373f Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Mon, 29 Dec 2025 10:09:36 +0800 Subject: [PATCH 11/19] add ixrt yolox build engine set precision fp16 when int8 --- models/cv/object_detection/yolox/ixrt/build_engine.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/models/cv/object_detection/yolox/ixrt/build_engine.py b/models/cv/object_detection/yolox/ixrt/build_engine.py index e61f82a3..7b847477 100644 --- a/models/cv/object_detection/yolox/ixrt/build_engine.py +++ b/models/cv/object_detection/yolox/ixrt/build_engine.py @@ -32,6 +32,9 @@ def main(config): # print("precision : ", precision) build_config.set_flag(precision) + if config.precision == "int8": + build_config.set_flag(tensorrt.BuilderFlag.FP16) + plan = builder.build_serialized_network(network, build_config) engine_file_path = config.engine with open(engine_file_path, "wb") as f: -- Gitee From 3eda89767df8027cb7657b2bb461651b5230275f Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Mon, 29 Dec 2025 17:26:14 +0800 Subject: [PATCH 12/19] update igie yolov3 yolov5 yolov7 yolox sample --- .../yolov3_sample/igie/build_engine.py | 33 +++ .../yolov3_sample/igie/build_nms_engine.py | 82 ++++++ .../yolov3_sample/igie/calibration_dataset.py | 31 ++ .../yolov3_sample/igie/ci/prepare.sh | 22 ++ .../yolov3_sample/igie/coco_labels.py | 89 ++++++ .../yolov3_sample/igie/common.py | 86 ++++++ .../yolov3_sample/igie/config/YOLOV3_CONFIG | 49 ++++ .../yolov3_sample/igie/cut_model.py | 16 ++ .../yolov3_sample/igie/datasets/__init__.py | 0 .../yolov3_sample/igie/datasets/coco.py | 116 ++++++++ .../yolov3_sample/igie/datasets/common.py | 66 +++++ .../igie/datasets/post_process.py | 115 ++++++++ .../igie/datasets/pre_process.py | 56 ++++ .../yolov3_sample/igie/datasets/vision.py | 136 +++++++++ .../yolov3_sample/igie/deploy.py | 134 +++++++++ .../yolov3_sample/igie/inference.py | 267 +++++++++++++++++ .../yolov3_sample/igie/load_ixrt_plugin.py | 12 + .../yolov3_sample/igie/modify_batchsize.py | 54 ++++ .../yolov3_sample/igie/quant.py | 55 ++++ .../infer_yolov3_sample_int8_accuracy.sh | 209 ++++++++++++++ .../infer_yolov3_sample_int8_performance.sh | 210 ++++++++++++++ .../yolov3_sample/igie/simplify_model.py | 21 ++ .../yolov5_sample/igie/build_engine.py | 39 +++ .../yolov5_sample/igie/build_nms_engine.py | 82 ++++++ .../yolov5_sample/igie/calibration_dataset.py | 30 ++ .../yolov5_sample/igie/ci/prepare.sh | 24 ++ .../yolov5_sample/igie/coco_labels.py | 89 ++++++ .../yolov5_sample/igie/common.py | 86 ++++++ .../yolov5_sample/igie/config/YOLOV5M_CONFIG | 49 ++++ .../yolov5_sample/igie/cut_model.py | 16 ++ .../yolov5_sample/igie/datasets/__init__.py | 0 .../yolov5_sample/igie/datasets/coco.py | 116 ++++++++ .../yolov5_sample/igie/datasets/common.py | 66 +++++ .../igie/datasets/post_process.py | 115 ++++++++ .../igie/datasets/pre_process.py | 56 ++++ .../yolov5_sample/igie/datasets/vision.py | 136 +++++++++ .../yolov5_sample/igie/deploy.py | 134 +++++++++ .../yolov5_sample/igie/inference.py | 263 +++++++++++++++++ .../yolov5_sample/igie/load_ixrt_plugin.py | 12 + .../yolov5_sample/igie/modify_batchsize.py | 37 +++ .../yolov5_sample/igie/quant.py | 55 ++++ .../infer_yolov5_sample_int8_accuracy.sh | 209 ++++++++++++++ .../infer_yolov5_sample_int8_performance.sh | 210 ++++++++++++++ .../yolov5_sample/igie/simplify_model.py | 21 ++ .../yolov7_sample/igie/build_engine.py | 39 +++ .../yolov7_sample/igie/build_nms_engine.py | 82 ++++++ .../yolov7_sample/igie/calibration_dataset.py | 31 ++ .../yolov7_sample/igie/ci/prepare.sh | 24 ++ .../yolov7_sample/igie/coco_labels.py | 89 ++++++ .../yolov7_sample/igie/common.py | 86 ++++++ .../yolov7_sample/igie/config/YOLOV7M_CONFIG | 49 ++++ .../yolov7_sample/igie/cut_model.py | 16 ++ .../yolov7_sample/igie/datasets/__init__.py | 0 .../yolov7_sample/igie/datasets/coco.py | 116 ++++++++ .../yolov7_sample/igie/datasets/common.py | 66 +++++ .../igie/datasets/post_process.py | 115 ++++++++ .../igie/datasets/pre_process.py | 56 ++++ .../yolov7_sample/igie/datasets/vision.py | 136 +++++++++ .../yolov7_sample/igie/deploy.py | 125 ++++++++ .../yolov7_sample/igie/inference.py | 268 +++++++++++++++++ .../yolov7_sample/igie/load_ixrt_plugin.py | 12 + .../yolov7_sample/igie/modify_batchsize.py | 37 +++ .../yolov7_sample/igie/quant.py | 55 ++++ .../infer_yolov7_sample_int8_accuracy.sh | 208 ++++++++++++++ .../infer_yolov7_sample_int8_performance.sh | 209 ++++++++++++++ .../yolov7_sample/igie/simplify_model.py | 21 ++ .../yolox_sample/igie/build_engine.py | 39 +++ .../yolox_sample/igie/build_nms_engine.py | 82 ++++++ .../yolox_sample/igie/calibration_dataset.py | 29 ++ .../yolox_sample/igie/ci/prepare.sh | 36 +++ .../yolox_sample/igie/coco_labels.py | 89 ++++++ .../yolox_sample/igie/common.py | 89 ++++++ .../yolox_sample/igie/config/YOLOXM_CONFIG | 56 ++++ .../yolox_sample/igie/cut_model.py | 16 ++ .../yolox_sample/igie/datasets/__init__.py | 0 .../yolox_sample/igie/datasets/coco.py | 116 ++++++++ .../yolox_sample/igie/datasets/common.py | 66 +++++ .../igie/datasets/post_process.py | 115 ++++++++ .../yolox_sample/igie/datasets/pre_process.py | 56 ++++ .../yolox_sample/igie/datasets/vision.py | 136 +++++++++ .../yolox_sample/igie/deploy.py | 135 +++++++++ .../yolox_sample/igie/inference.py | 269 ++++++++++++++++++ .../yolox_sample/igie/load_ixrt_plugin.py | 12 + .../yolox_sample/igie/modify_batchsize.py | 37 +++ .../yolox_sample/igie/quant.py | 55 ++++ .../infer_yolox_sample_int8_accuracy.sh | 210 ++++++++++++++ .../infer_yolox_sample_int8_performance.sh | 211 ++++++++++++++ .../yolox_sample/igie/simplify_model.py | 21 ++ tests/model_info.json | 132 +++++++++ 89 files changed, 7551 insertions(+) create mode 100644 models/cv/object_detection/yolov3_sample/igie/build_engine.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/build_nms_engine.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/calibration_dataset.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/ci/prepare.sh create mode 100644 models/cv/object_detection/yolov3_sample/igie/coco_labels.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/common.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/config/YOLOV3_CONFIG create mode 100644 models/cv/object_detection/yolov3_sample/igie/cut_model.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/datasets/__init__.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/datasets/coco.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/datasets/common.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/datasets/post_process.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/datasets/pre_process.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/datasets/vision.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/deploy.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/inference.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/load_ixrt_plugin.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/modify_batchsize.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/quant.py create mode 100644 models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_accuracy.sh create mode 100644 models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_performance.sh create mode 100644 models/cv/object_detection/yolov3_sample/igie/simplify_model.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/build_engine.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/build_nms_engine.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/calibration_dataset.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/ci/prepare.sh create mode 100644 models/cv/object_detection/yolov5_sample/igie/coco_labels.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/common.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/config/YOLOV5M_CONFIG create mode 100644 models/cv/object_detection/yolov5_sample/igie/cut_model.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/datasets/__init__.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/datasets/coco.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/datasets/common.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/datasets/post_process.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/datasets/pre_process.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/datasets/vision.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/deploy.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/inference.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/load_ixrt_plugin.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/modify_batchsize.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/quant.py create mode 100644 models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_accuracy.sh create mode 100644 models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_performance.sh create mode 100644 models/cv/object_detection/yolov5_sample/igie/simplify_model.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/build_engine.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/build_nms_engine.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/calibration_dataset.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/ci/prepare.sh create mode 100644 models/cv/object_detection/yolov7_sample/igie/coco_labels.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/common.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/config/YOLOV7M_CONFIG create mode 100644 models/cv/object_detection/yolov7_sample/igie/cut_model.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/datasets/__init__.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/datasets/coco.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/datasets/common.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/datasets/post_process.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/datasets/pre_process.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/datasets/vision.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/deploy.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/inference.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/load_ixrt_plugin.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/modify_batchsize.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/quant.py create mode 100644 models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_accuracy.sh create mode 100644 models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_performance.sh create mode 100644 models/cv/object_detection/yolov7_sample/igie/simplify_model.py create mode 100644 models/cv/object_detection/yolox_sample/igie/build_engine.py create mode 100644 models/cv/object_detection/yolox_sample/igie/build_nms_engine.py create mode 100644 models/cv/object_detection/yolox_sample/igie/calibration_dataset.py create mode 100644 models/cv/object_detection/yolox_sample/igie/ci/prepare.sh create mode 100644 models/cv/object_detection/yolox_sample/igie/coco_labels.py create mode 100644 models/cv/object_detection/yolox_sample/igie/common.py create mode 100644 models/cv/object_detection/yolox_sample/igie/config/YOLOXM_CONFIG create mode 100644 models/cv/object_detection/yolox_sample/igie/cut_model.py create mode 100644 models/cv/object_detection/yolox_sample/igie/datasets/__init__.py create mode 100644 models/cv/object_detection/yolox_sample/igie/datasets/coco.py create mode 100644 models/cv/object_detection/yolox_sample/igie/datasets/common.py create mode 100644 models/cv/object_detection/yolox_sample/igie/datasets/post_process.py create mode 100644 models/cv/object_detection/yolox_sample/igie/datasets/pre_process.py create mode 100644 models/cv/object_detection/yolox_sample/igie/datasets/vision.py create mode 100644 models/cv/object_detection/yolox_sample/igie/deploy.py create mode 100644 models/cv/object_detection/yolox_sample/igie/inference.py create mode 100644 models/cv/object_detection/yolox_sample/igie/load_ixrt_plugin.py create mode 100644 models/cv/object_detection/yolox_sample/igie/modify_batchsize.py create mode 100644 models/cv/object_detection/yolox_sample/igie/quant.py create mode 100644 models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_accuracy.sh create mode 100644 models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_performance.sh create mode 100644 models/cv/object_detection/yolox_sample/igie/simplify_model.py diff --git a/models/cv/object_detection/yolov3_sample/igie/build_engine.py b/models/cv/object_detection/yolov3_sample/igie/build_engine.py new file mode 100644 index 00000000..7b11d92d --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/build_engine.py @@ -0,0 +1,33 @@ +import argparse +import numpy as np +import tvm +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + + +def main(config): + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + precision = config.precision + + inputs_info = {"images": ([config.bsz, 3, 416, 416], "float32")} + mod, params = import_model_to_igie(config.model, inputs_info, precision=precision, backend="tensorrt") + lib = relay.build(mod, target=target, params=params, precision=precision, device=device) + lib.export_library(config.engine) + print("Build engine done!") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--bsz", type=int) + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/build_nms_engine.py b/models/cv/object_detection/yolov3_sample/igie/build_nms_engine.py new file mode 100644 index 00000000..51d70747 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/build_nms_engine.py @@ -0,0 +1,82 @@ +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def create_onnx(args): + nms = helper.make_node( + "DetectionNMS_IxRT", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/calibration_dataset.py b/models/cv/object_detection/yolov3_sample/igie/calibration_dataset.py new file mode 100644 index 00000000..578e013d --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/calibration_dataset.py @@ -0,0 +1,31 @@ +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader + + + +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/ci/prepare.sh b/models/cv/object_detection/yolov3_sample/igie/ci/prepare.sh new file mode 100644 index 00000000..4cfeb6e4 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/ci/prepare.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install tqdm +pip3 install pycocotools opencv_python==4.6.0.66 +mkdir -p checkpoints +ln -s /mnt/deepspark/data/datasets/corex-inference-data-4.0.0/checkpoints/yolov3/yolov3.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/coco_labels.py b/models/cv/object_detection/yolov3_sample/igie/coco_labels.py new file mode 100644 index 00000000..69d38878 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolov3_sample/igie/common.py b/models/cv/object_detection/yolov3_sample/igie/common.py new file mode 100644 index 00000000..5f543555 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/common.py @@ -0,0 +1,86 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +import cuda.cuda as cuda +import cuda.cudart as cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert(err == cuda.CUresult.CUDA_SUCCESS) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/config/YOLOV3_CONFIG b/models/cv/object_detection/yolov3_sample/igie/config/YOLOV3_CONFIG new file mode 100644 index 00000000..828be17d --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/config/YOLOV3_CONFIG @@ -0,0 +1,49 @@ +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=416 +MODEL_NAME=yolov3 +ORIGINE_MODEL=yolov3.onnx +DATA_PROCESS_TYPE=yolov3 +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +DECODER_INPUT_NAMES=(416 353 290) +DECODER_8_ANCHOR=(10 13 16 30 33 23) +DECODER_16_ANCHOR=(30 61 62 45 59 119) +DECODER_32_ANCHOR=(116 90 156 198 373 326) + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.6 +SCORE_THRESH=0.001 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=10647 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/cut_model.py b/models/cv/object_detection/yolov3_sample/igie/cut_model.py new file mode 100644 index 00000000..af0a3a4f --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/cut_model.py @@ -0,0 +1,16 @@ +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/datasets/__init__.py b/models/cv/object_detection/yolov3_sample/igie/datasets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/models/cv/object_detection/yolov3_sample/igie/datasets/coco.py b/models/cv/object_detection/yolov3_sample/igie/datasets/coco.py new file mode 100644 index 00000000..7f355b84 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/yolov3_sample/igie/datasets/common.py b/models/cv/object_detection/yolov3_sample/igie/datasets/common.py new file mode 100644 index 00000000..e120e00f --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/datasets/common.py @@ -0,0 +1,66 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/datasets/post_process.py b/models/cv/object_detection/yolov3_sample/igie/datasets/post_process.py new file mode 100644 index 00000000..a58c02f8 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/datasets/post_process.py @@ -0,0 +1,115 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/datasets/pre_process.py b/models/cv/object_detection/yolov3_sample/igie/datasets/pre_process.py new file mode 100644 index 00000000..8cc643a8 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/datasets/pre_process.py @@ -0,0 +1,56 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/datasets/vision.py b/models/cv/object_detection/yolov3_sample/igie/datasets/vision.py new file mode 100644 index 00000000..32da4a78 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolov3_sample/igie/deploy.py b/models/cv/object_detection/yolov3_sample/igie/deploy.py new file mode 100644 index 00000000..ec56b7ab --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/deploy.py @@ -0,0 +1,134 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:num*2+1], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + elif args.with_nms: + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + else: + graph.outputs.clear() + graph.add_output("decoder_8") + graph.outputs["decoder_8"].dtype = "FLOAT" + graph.add_output("decoder_16") + graph.outputs["decoder_16"].dtype = "FLOAT" + graph.add_output("decoder_32") + graph.outputs["decoder_32"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--with_nms", type=bool, default=False, help="engine with nms") + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/inference.py b/models/cv/object_detection/yolov3_sample/igie/inference.py new file mode 100644 index 00000000..1cfaa784 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/inference.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import cuda.cuda as cuda +import cuda.cudart as cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +import tensorrt +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + + +import tvm +from tvm.contrib import graph_executor + +def init_by_igie(engine_path): + device = tvm.device("iluvatar", 0) + lib = tvm.runtime.load_module(engine_path) + module = graph_executor.GraphModule(lib["default"](device)) + # engine, context = module.engine, module.context + # inputs, outputs, allocations = module.inputs, module.outputs, module.allocations + return module + +def igie_infer(module, batch_data): + # set input + module.set_input(module.inputs[0]["name"], batch_data) + ### infer model + module.run() + # get output data + output = module.get_output(0) + return output + + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + module = init_by_igie(config.model_engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + module.run() + print("Warm Done.") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + cur_bsz_sample = batch_data.shape[0] + + err, = cuda.cuMemcpyHtoD(module.inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + module.run() + + if config.test_mode == "MAP": + # Fetch output + output = igie_infer(module, batch_data) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + err, = cuda.cuMemcpyHtoD(nms_inputs[0]["allocation"], nms_input, nms_input.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + nms_context.execute_v2(nms_allocations) + err, = cuda.cuMemcpyDtoH(nms_output0, nms_outputs[0]["allocation"], nms_outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + err, = cuda.cuMemcpyDtoH(nms_output1, nms_outputs[1]["allocation"], nms_outputs[1]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + # fps = num_samples / forward_time + + if config.test_mode == "FPS": + start_time = time.time() + for i in range(config.loop_count): + # module.run() + module.context.execute_v2(module.allocations) + end_time = time.time() + forward_time = end_time - start_time + fps = (config.loop_count*config.bsz) / forward_time + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(10) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/object_detection/yolov3_sample/igie/load_ixrt_plugin.py b/models/cv/object_detection/yolov3_sample/igie/load_ixrt_plugin.py new file mode 100644 index 00000000..932efbdf --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/load_ixrt_plugin.py @@ -0,0 +1,12 @@ +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/modify_batchsize.py b/models/cv/object_detection/yolov3_sample/igie/modify_batchsize.py new file mode 100644 index 00000000..f696ae55 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/modify_batchsize.py @@ -0,0 +1,54 @@ +import onnx +import argparse +import copy +import numpy as np + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +def modify_resize_nodes(model, bsz): + print("modify resize") + for node in model.graph.node: + if node.op_type == "Resize": + if len(node.input) >= 4 and node.input[3]: + sizes_name = node.input[3] + for initializer in model.graph.initializer: + if initializer.name == sizes_name: + shape = copy.deepcopy(onnx.numpy_helper.to_array(initializer)) + shape[0] = shape[0] * bsz + new_sizes = np.array(shape, dtype=np.int64) + initializer.CopyFrom(onnx.numpy_helper.from_array(new_sizes, name=initializer.name)) + break + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +modify_resize_nodes(model, args.batch_size) +onnx.save(model, args.output_model) diff --git a/models/cv/object_detection/yolov3_sample/igie/quant.py b/models/cv/object_detection/yolov3_sample/igie/quant.py new file mode 100644 index 00000000..d73212ca --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/quant.py @@ -0,0 +1,55 @@ +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_accuracy.sh b/models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_accuracy.sh new file mode 100644 index 00000000..c2c76539 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_accuracy.sh @@ -0,0 +1,209 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=0.65 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV3_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV3Decoder \ + --with_nms True \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_with_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_with_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_performance.sh b/models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_performance.sh new file mode 100644 index 00000000..d93e51e8 --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/scripts/infer_yolov3_sample_int8_performance.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=1010 +LOOP_COUNT=100 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV3_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion_no_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV3Decoder \ + --with_nms False \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_without_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_without_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov3_sample/igie/simplify_model.py b/models/cv/object_detection/yolov3_sample/igie/simplify_model.py new file mode 100644 index 00000000..b4254b6f --- /dev/null +++ b/models/cv/object_detection/yolov3_sample/igie/simplify_model.py @@ -0,0 +1,21 @@ +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/build_engine.py b/models/cv/object_detection/yolov5_sample/igie/build_engine.py new file mode 100644 index 00000000..cdace84b --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/build_engine.py @@ -0,0 +1,39 @@ +import os +import cv2 +import argparse +import numpy as np + +import tvm +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + + +def main(config): + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + precision = config.precision + if config.precision == "float16": + precision = "fp16" + + inputs_info = {"images": ([config.bsz, 3, 640, 640], "float32")} + mod, params = import_model_to_igie(config.model, inputs_info, outputs_info=None, precision=precision, backend="tensorrt") + lib = relay.build(mod, target=target, params=params, precision=precision, device=device) + lib.export_library(config.engine) + print("Build engine done!") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--bsz", type=int) + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/build_nms_engine.py b/models/cv/object_detection/yolov5_sample/igie/build_nms_engine.py new file mode 100644 index 00000000..51d70747 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/build_nms_engine.py @@ -0,0 +1,82 @@ +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def create_onnx(args): + nms = helper.make_node( + "DetectionNMS_IxRT", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/calibration_dataset.py b/models/cv/object_detection/yolov5_sample/igie/calibration_dataset.py new file mode 100644 index 00000000..7d3e3e48 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/calibration_dataset.py @@ -0,0 +1,30 @@ +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader + + +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/ci/prepare.sh b/models/cv/object_detection/yolov5_sample/igie/ci/prepare.sh new file mode 100644 index 00000000..fb897d45 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/ci/prepare.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install pycocotools +pip3 install tqdm +pip3 install opencv-python==4.6.0.66 + +mkdir -p checkpoints +ln -s /mnt/deepspark/data/datasets/corex-inference-data-4.0.0/checkpoints/yolov5m/yolov5m.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/coco_labels.py b/models/cv/object_detection/yolov5_sample/igie/coco_labels.py new file mode 100644 index 00000000..69d38878 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolov5_sample/igie/common.py b/models/cv/object_detection/yolov5_sample/igie/common.py new file mode 100644 index 00000000..5f543555 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/common.py @@ -0,0 +1,86 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +import cuda.cuda as cuda +import cuda.cudart as cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert(err == cuda.CUresult.CUDA_SUCCESS) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/config/YOLOV5M_CONFIG b/models/cv/object_detection/yolov5_sample/igie/config/YOLOV5M_CONFIG new file mode 100644 index 00000000..510b359a --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/config/YOLOV5M_CONFIG @@ -0,0 +1,49 @@ +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolov5m +ORIGINE_MODEL=yolov5m.onnx +DATA_PROCESS_TYPE=yolov5 +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +DECODER_INPUT_NAMES=(443 482 521) +DECODER_8_ANCHOR=(10 13 16 30 33 23) +DECODER_16_ANCHOR=(30 61 62 45 59 119) +DECODER_32_ANCHOR=(116 90 156 198 373 326) + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.6 +SCORE_THRESH=0.001 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=25200 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/cut_model.py b/models/cv/object_detection/yolov5_sample/igie/cut_model.py new file mode 100644 index 00000000..af0a3a4f --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/cut_model.py @@ -0,0 +1,16 @@ +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/datasets/__init__.py b/models/cv/object_detection/yolov5_sample/igie/datasets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/models/cv/object_detection/yolov5_sample/igie/datasets/coco.py b/models/cv/object_detection/yolov5_sample/igie/datasets/coco.py new file mode 100644 index 00000000..7f355b84 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/yolov5_sample/igie/datasets/common.py b/models/cv/object_detection/yolov5_sample/igie/datasets/common.py new file mode 100644 index 00000000..e120e00f --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/datasets/common.py @@ -0,0 +1,66 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/datasets/post_process.py b/models/cv/object_detection/yolov5_sample/igie/datasets/post_process.py new file mode 100644 index 00000000..a58c02f8 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/datasets/post_process.py @@ -0,0 +1,115 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/datasets/pre_process.py b/models/cv/object_detection/yolov5_sample/igie/datasets/pre_process.py new file mode 100644 index 00000000..8cc643a8 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/datasets/pre_process.py @@ -0,0 +1,56 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/datasets/vision.py b/models/cv/object_detection/yolov5_sample/igie/datasets/vision.py new file mode 100644 index 00000000..32da4a78 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolov5_sample/igie/deploy.py b/models/cv/object_detection/yolov5_sample/igie/deploy.py new file mode 100644 index 00000000..ec56b7ab --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/deploy.py @@ -0,0 +1,134 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:num*2+1], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + elif args.with_nms: + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + else: + graph.outputs.clear() + graph.add_output("decoder_8") + graph.outputs["decoder_8"].dtype = "FLOAT" + graph.add_output("decoder_16") + graph.outputs["decoder_16"].dtype = "FLOAT" + graph.add_output("decoder_32") + graph.outputs["decoder_32"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--with_nms", type=bool, default=False, help="engine with nms") + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/inference.py b/models/cv/object_detection/yolov5_sample/igie/inference.py new file mode 100644 index 00000000..8042fc56 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/inference.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import cuda.cuda as cuda +import cuda.cudart as cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +import tvm +from tvm.contrib import graph_executor + +def init_by_igie(engine_path): + device = tvm.device("iluvatar", 0) + lib = tvm.runtime.load_module(engine_path) + module = graph_executor.GraphModule(lib["default"](device)) + # engine, context = module.engine, module.context + # inputs, outputs, allocations = module.inputs, module.outputs, module.allocations + return module + +def igie_infer(module, batch_data): + # set input + module.set_input(module.inputs[0]["name"], batch_data) + ### infer model + module.run() + # get output data + output = module.get_output(0) + return output + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + + # Load Engine + module = init_by_igie(config.model_engine) + + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + module.run() + print("Warm Done.") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + + cur_bsz_sample = batch_data.shape[0] + + if config.test_mode == "MAP": + # Fetch output + output = igie_infer(module, batch_data) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + err, = cuda.cuMemcpyHtoD(nms_inputs[0]["allocation"], nms_input, nms_input.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + nms_context.execute_v2(nms_allocations) + err, = cuda.cuMemcpyDtoH(nms_output0, nms_outputs[0]["allocation"], nms_outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + err, = cuda.cuMemcpyDtoH(nms_output1, nms_outputs[1]["allocation"], nms_outputs[1]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + # fps = num_samples / forward_time + + if config.test_mode == "FPS": + start_time = time.time() + for i in range(config.loop_count): + module.run() + end_time = time.time() + forward_time = end_time - start_time + fps = (config.loop_count*config.bsz) / forward_time + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(10) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/load_ixrt_plugin.py b/models/cv/object_detection/yolov5_sample/igie/load_ixrt_plugin.py new file mode 100644 index 00000000..932efbdf --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/load_ixrt_plugin.py @@ -0,0 +1,12 @@ +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/modify_batchsize.py b/models/cv/object_detection/yolov5_sample/igie/modify_batchsize.py new file mode 100644 index 00000000..00ed65dd --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/modify_batchsize.py @@ -0,0 +1,37 @@ +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/quant.py b/models/cv/object_detection/yolov5_sample/igie/quant.py new file mode 100644 index 00000000..d73212ca --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/quant.py @@ -0,0 +1,55 @@ +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_accuracy.sh b/models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_accuracy.sh new file mode 100644 index 00000000..c8729053 --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_accuracy.sh @@ -0,0 +1,209 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=0.626 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV5M_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --with_nms True \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}}_with_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_with_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_performance.sh b/models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_performance.sh new file mode 100644 index 00000000..a9db515a --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/scripts/infer_yolov5_sample_int8_performance.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=735 +LOOP_COUNT=100 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV5M_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion_no_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --with_nms False \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_without_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_without_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/simplify_model.py b/models/cv/object_detection/yolov5_sample/igie/simplify_model.py new file mode 100644 index 00000000..b4254b6f --- /dev/null +++ b/models/cv/object_detection/yolov5_sample/igie/simplify_model.py @@ -0,0 +1,21 @@ +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/build_engine.py b/models/cv/object_detection/yolov7_sample/igie/build_engine.py new file mode 100644 index 00000000..cdace84b --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/build_engine.py @@ -0,0 +1,39 @@ +import os +import cv2 +import argparse +import numpy as np + +import tvm +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + + +def main(config): + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + precision = config.precision + if config.precision == "float16": + precision = "fp16" + + inputs_info = {"images": ([config.bsz, 3, 640, 640], "float32")} + mod, params = import_model_to_igie(config.model, inputs_info, outputs_info=None, precision=precision, backend="tensorrt") + lib = relay.build(mod, target=target, params=params, precision=precision, device=device) + lib.export_library(config.engine) + print("Build engine done!") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--bsz", type=int) + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/build_nms_engine.py b/models/cv/object_detection/yolov7_sample/igie/build_nms_engine.py new file mode 100644 index 00000000..51d70747 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/build_nms_engine.py @@ -0,0 +1,82 @@ +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def create_onnx(args): + nms = helper.make_node( + "DetectionNMS_IxRT", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/calibration_dataset.py b/models/cv/object_detection/yolov7_sample/igie/calibration_dataset.py new file mode 100644 index 00000000..578e013d --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/calibration_dataset.py @@ -0,0 +1,31 @@ +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader + + + +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/ci/prepare.sh b/models/cv/object_detection/yolov7_sample/igie/ci/prepare.sh new file mode 100644 index 00000000..79816c74 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/ci/prepare.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install pycocotools +pip3 install tqdm +pip3 install opencv-python==4.6.0.66 + +mkdir -p checkpoints +ln -s /mnt/deepspark/data/datasets/corex-inference-data-4.0.0/checkpoints/yolov7/yolov7m.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/coco_labels.py b/models/cv/object_detection/yolov7_sample/igie/coco_labels.py new file mode 100644 index 00000000..69d38878 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolov7_sample/igie/common.py b/models/cv/object_detection/yolov7_sample/igie/common.py new file mode 100644 index 00000000..5f543555 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/common.py @@ -0,0 +1,86 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +import cuda.cuda as cuda +import cuda.cudart as cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert(err == cuda.CUresult.CUDA_SUCCESS) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/config/YOLOV7M_CONFIG b/models/cv/object_detection/yolov7_sample/igie/config/YOLOV7M_CONFIG new file mode 100644 index 00000000..51bd2d14 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/config/YOLOV7M_CONFIG @@ -0,0 +1,49 @@ +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolov7m +ORIGINE_MODEL=yolov7m.onnx +DATA_PROCESS_TYPE=yolov5 +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +DECODER_INPUT_NAMES=(/model.105/m.0/Conv_output_0 /model.105/m.1/Conv_output_0 /model.105/m.2/Conv_output_0) +DECODER_8_ANCHOR=(12 16 19 36 40 28) +DECODER_16_ANCHOR=(36 75 76 55 72 146) +DECODER_32_ANCHOR=(142 110 192 243 459 401) + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.6 +SCORE_THRESH=0.001 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=25200 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/cut_model.py b/models/cv/object_detection/yolov7_sample/igie/cut_model.py new file mode 100644 index 00000000..af0a3a4f --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/cut_model.py @@ -0,0 +1,16 @@ +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/datasets/__init__.py b/models/cv/object_detection/yolov7_sample/igie/datasets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/models/cv/object_detection/yolov7_sample/igie/datasets/coco.py b/models/cv/object_detection/yolov7_sample/igie/datasets/coco.py new file mode 100644 index 00000000..7f355b84 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/yolov7_sample/igie/datasets/common.py b/models/cv/object_detection/yolov7_sample/igie/datasets/common.py new file mode 100644 index 00000000..e120e00f --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/datasets/common.py @@ -0,0 +1,66 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/datasets/post_process.py b/models/cv/object_detection/yolov7_sample/igie/datasets/post_process.py new file mode 100644 index 00000000..a58c02f8 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/datasets/post_process.py @@ -0,0 +1,115 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/datasets/pre_process.py b/models/cv/object_detection/yolov7_sample/igie/datasets/pre_process.py new file mode 100644 index 00000000..8cc643a8 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/datasets/pre_process.py @@ -0,0 +1,56 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/datasets/vision.py b/models/cv/object_detection/yolov7_sample/igie/datasets/vision.py new file mode 100644 index 00000000..32da4a78 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolov7_sample/igie/deploy.py b/models/cv/object_detection/yolov7_sample/igie/deploy.py new file mode 100644 index 00000000..83f80a9e --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/deploy.py @@ -0,0 +1,125 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:num*2+1], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + else: + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/inference.py b/models/cv/object_detection/yolov7_sample/igie/inference.py new file mode 100644 index 00000000..517a3f6d --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/inference.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import cuda.cuda as cuda +import cuda.cudart as cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + + +import tvm +from tvm.contrib import graph_executor + +def init_by_igie(engine_path): + device = tvm.device("iluvatar", 0) + lib = tvm.runtime.load_module(engine_path) + module = graph_executor.GraphModule(lib["default"](device)) + # engine, context = module.engine, module.context + # inputs, outputs, allocations = module.inputs, module.outputs, module.allocations + return module + +def igie_infer(module, batch_data): + # set input + module.set_input(module.inputs[0]["name"], batch_data) + ### infer model + module.run() + # get output data + output = module.get_output(0) + return output + + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + # Load Engine + module = init_by_igie(config.model_engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + module.run() + print("Warm Done.") + + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + cur_bsz_sample = batch_data.shape[0] + + err, = cuda.cuMemcpyHtoD(module.inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + module.run() + + if config.test_mode == "MAP": + # Fetch output + output = igie_infer(module, batch_data) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + err, = cuda.cuMemcpyHtoD(nms_inputs[0]["allocation"], nms_input, nms_input.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + nms_context.execute_v2(nms_allocations) + err, = cuda.cuMemcpyDtoH(nms_output0, nms_outputs[0]["allocation"], nms_outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + err, = cuda.cuMemcpyDtoH(nms_output1, nms_outputs[1]["allocation"], nms_outputs[1]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + + if config.test_mode == "FPS": + start_time = time.time() + for i in range(config.loop_count): + module.run() + end_time = time.time() + forward_time = end_time - start_time + fps = (config.loop_count*config.bsz) / forward_time + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(10) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/object_detection/yolov7_sample/igie/load_ixrt_plugin.py b/models/cv/object_detection/yolov7_sample/igie/load_ixrt_plugin.py new file mode 100644 index 00000000..932efbdf --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/load_ixrt_plugin.py @@ -0,0 +1,12 @@ +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/modify_batchsize.py b/models/cv/object_detection/yolov7_sample/igie/modify_batchsize.py new file mode 100644 index 00000000..00ed65dd --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/modify_batchsize.py @@ -0,0 +1,37 @@ +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/quant.py b/models/cv/object_detection/yolov7_sample/igie/quant.py new file mode 100644 index 00000000..d73212ca --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/quant.py @@ -0,0 +1,55 @@ +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_accuracy.sh b/models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_accuracy.sh new file mode 100644 index 00000000..958c8b02 --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_accuracy.sh @@ -0,0 +1,208 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=0.68 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV7M_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_performance.sh b/models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_performance.sh new file mode 100644 index 00000000..980e788a --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/scripts/infer_yolov7_sample_int8_performance.sh @@ -0,0 +1,209 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=425 +LOOP_COUNT=100 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV7M_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} diff --git a/models/cv/object_detection/yolov7_sample/igie/simplify_model.py b/models/cv/object_detection/yolov7_sample/igie/simplify_model.py new file mode 100644 index 00000000..b4254b6f --- /dev/null +++ b/models/cv/object_detection/yolov7_sample/igie/simplify_model.py @@ -0,0 +1,21 @@ +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/build_engine.py b/models/cv/object_detection/yolox_sample/igie/build_engine.py new file mode 100644 index 00000000..5db1f6a1 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/build_engine.py @@ -0,0 +1,39 @@ +import os +import cv2 +import argparse +import numpy as np + +import tvm +from tvm import relay +from tvm.relay.import_model import import_model_to_igie + + +def main(config): + target = tvm.target.iluvatar(model="MR", options="-libs=cudnn,cublas,ixinfer") + device = tvm.device(target.kind.name, 0) + precision = config.precision + if config.precision == "float16": + precision = "fp16" + + inputs_info = {"images": ([config.bsz, 3, 640, 640], "float32")} + mod, params = import_model_to_igie(config.model, inputs_info, outputs_info=None, precision=precision, backend="tensorrt") + lib = relay.build(mod, target=target, params=params, precision=precision, device=device) + lib.export_library(config.engine) + print("Build engine done!") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--bsz", type=int) + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/build_nms_engine.py b/models/cv/object_detection/yolox_sample/igie/build_nms_engine.py new file mode 100644 index 00000000..51d70747 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/build_nms_engine.py @@ -0,0 +1,82 @@ +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def create_onnx(args): + nms = helper.make_node( + "DetectionNMS_IxRT", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/calibration_dataset.py b/models/cv/object_detection/yolox_sample/igie/calibration_dataset.py new file mode 100644 index 00000000..0f39a87a --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/calibration_dataset.py @@ -0,0 +1,29 @@ +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader + +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/ci/prepare.sh b/models/cv/object_detection/yolox_sample/igie/ci/prepare.sh new file mode 100644 index 00000000..c26ef6af --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/ci/prepare.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y numactl +elif [[ ${ID} == "centos" ]]; then + yum install -y numactl +else + echo "Not Support Os" +fi + +pip3 install pycocotools +pip3 install loguru +pip3 install tabulate +pip3 install tqdm +pip3 install opencv-python==4.6.0.66 +pip3 install simplejson + +mkdir -p checkpoints +ln -s /mnt/deepspark/data/datasets/corex-inference-data-4.0.0/checkpoints/yolox/yolox_m_export.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/coco_labels.py b/models/cv/object_detection/yolox_sample/igie/coco_labels.py new file mode 100644 index 00000000..69d38878 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolox_sample/igie/common.py b/models/cv/object_detection/yolox_sample/igie/common.py new file mode 100644 index 00000000..677051f2 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/common.py @@ -0,0 +1,89 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +import cuda.cuda as cuda +import cuda.cudart as cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + if c<1 or c>80: + print("error class: ", c) + continue + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert(err == cuda.CUresult.CUDA_SUCCESS) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations diff --git a/models/cv/object_detection/yolox_sample/igie/config/YOLOXM_CONFIG b/models/cv/object_detection/yolox_sample/igie/config/YOLOXM_CONFIG new file mode 100644 index 00000000..6da9d04e --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/config/YOLOXM_CONFIG @@ -0,0 +1,56 @@ +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolox +ORIGINE_MODEL=yolox_m_export.onnx +DATA_PROCESS_TYPE=yolox +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +# nx4x80x80 nx1x80x80 nx80x80x80 +DECODER0_INPUT_NAMES=(1041 1042 1032) +# nx4x40x40 nx1x40x40 nx80x40x40 +DECODER1_INPUT_NAMES=(1067 1068 1058) +# nx4x20x20 nx1x20x20 nx80x20x20 +DECODER2_INPUT_NAMES=(1093 1094 1084) + +# Fuse Focus +FOCUS_INPUT_EDGE=images +FOCUS_OUTPUT_EDGE=input +FOCUS_LAST_NODE=Concat_40 + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.65 +SCORE_THRESH=0.01 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=8400 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/cut_model.py b/models/cv/object_detection/yolox_sample/igie/cut_model.py new file mode 100644 index 00000000..af0a3a4f --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/cut_model.py @@ -0,0 +1,16 @@ +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/datasets/__init__.py b/models/cv/object_detection/yolox_sample/igie/datasets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/models/cv/object_detection/yolox_sample/igie/datasets/coco.py b/models/cv/object_detection/yolox_sample/igie/datasets/coco.py new file mode 100644 index 00000000..7f355b84 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/yolox_sample/igie/datasets/common.py b/models/cv/object_detection/yolox_sample/igie/datasets/common.py new file mode 100644 index 00000000..e120e00f --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/datasets/common.py @@ -0,0 +1,66 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/datasets/post_process.py b/models/cv/object_detection/yolox_sample/igie/datasets/post_process.py new file mode 100644 index 00000000..a58c02f8 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/datasets/post_process.py @@ -0,0 +1,115 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/datasets/pre_process.py b/models/cv/object_detection/yolox_sample/igie/datasets/pre_process.py new file mode 100644 index 00000000..8cc643a8 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/datasets/pre_process.py @@ -0,0 +1,56 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/datasets/vision.py b/models/cv/object_detection/yolox_sample/igie/datasets/vision.py new file mode 100644 index 00000000..32da4a78 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolox_sample/igie/deploy.py b/models/cv/object_detection/yolox_sample/igie/deploy.py new file mode 100644 index 00000000..668b3420 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/deploy.py @@ -0,0 +1,135 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + print("AddYoloDecoderOp:", ) + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + elif args.with_nms: + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + else: + graph.outputs.clear() + graph.add_output("decoder_8") + graph.outputs["decoder_8"].dtype = "FLOAT" + graph.add_output("decoder_16") + graph.outputs["decoder_16"].dtype = "FLOAT" + graph.add_output("decoder_32") + graph.outputs["decoder_32"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--with_nms", type=bool, default=False, help="engine with nms") + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/inference.py b/models/cv/object_detection/yolox_sample/igie/inference.py new file mode 100644 index 00000000..7fbb611d --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/inference.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import cuda.cuda as cuda +import cuda.cudart as cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + + +import tvm +from tvm.contrib import graph_executor + +def init_by_igie(engine_path): + device = tvm.device("iluvatar", 0) + lib = tvm.runtime.load_module(engine_path) + module = graph_executor.GraphModule(lib["default"](device)) + # engine, context = module.engine, module.context + # inputs, outputs, allocations = module.inputs, module.outputs, module.allocations + return module + +def igie_infer(module, batch_data): + # set input + module.set_input(module.inputs[0]["name"], batch_data) + ### infer model + module.run() + # get output data + output = module.get_output(0) + return output + + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + # Load Engine + module = init_by_igie(config.model_engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + module.run() + print("Warm Done.") + + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + cur_bsz_sample = batch_data.shape[0] + + err, = cuda.cuMemcpyHtoD(module.inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + module.run() + + if config.test_mode == "MAP": + # Fetch output + output = igie_infer(module, batch_data) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + err, = cuda.cuMemcpyHtoD(nms_inputs[0]["allocation"], nms_input, nms_input.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + nms_context.execute_v2(nms_allocations) + err, = cuda.cuMemcpyDtoH(nms_output0, nms_outputs[0]["allocation"], nms_outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + err, = cuda.cuMemcpyDtoH(nms_output1, nms_outputs[1]["allocation"], nms_outputs[1]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + # fps = num_samples / forward_time + + if config.test_mode == "FPS": + start_time = time.time() + for i in range(config.loop_count): + module.run() + end_time = time.time() + forward_time = end_time - start_time + fps = (config.loop_count*config.bsz) / forward_time + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(10) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) diff --git a/models/cv/object_detection/yolox_sample/igie/load_ixrt_plugin.py b/models/cv/object_detection/yolox_sample/igie/load_ixrt_plugin.py new file mode 100644 index 00000000..932efbdf --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/load_ixrt_plugin.py @@ -0,0 +1,12 @@ +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/modify_batchsize.py b/models/cv/object_detection/yolox_sample/igie/modify_batchsize.py new file mode 100644 index 00000000..00ed65dd --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/modify_batchsize.py @@ -0,0 +1,37 @@ +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/quant.py b/models/cv/object_detection/yolox_sample/igie/quant.py new file mode 100644 index 00000000..d73212ca --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/quant.py @@ -0,0 +1,55 @@ +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_accuracy.sh b/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_accuracy.sh new file mode 100644 index 00000000..50e862a9 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_accuracy.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=0.645 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/data/datasets/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOXM_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +DECODER_INPUT_NAMES=("${DECODER0_INPUT_NAMES[@]}" "${DECODER1_INPUT_NAMES[@]}" "${DECODER2_INPUT_NAMES[@]}") +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}}_quant_fusion_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloxDecoder \ + --with_nms True \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} \ + --focus_input images_DequantizeLinear_Output \ + --focus_output ${FOCUS_OUTPUT_EDGE} \ + --focus_last_node ${FOCUS_LAST_NODE} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_with_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_with_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_performance.sh b/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_performance.sh new file mode 100644 index 00000000..6c47e695 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_performance.sh @@ -0,0 +1,211 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=540 +CPU_AFFINITY=$(ixsmi topo -m|grep "^GPU0" |awk '{print $(NF-1)}') +LOOP_COUNT=100 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/data/datasets/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOXM_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +DECODER_INPUT_NAMES=("${DECODER0_INPUT_NAMES[@]}" "${DECODER1_INPUT_NAMES[@]}" "${DECODER2_INPUT_NAMES[@]}") +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion_no_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloxDecoder \ + --with_nms False \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} \ + --focus_input images_DequantizeLinear_Output \ + --focus_output ${FOCUS_OUTPUT_EDGE} \ + --focus_last_node ${FOCUS_LAST_NODE} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_without_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_without_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --bsz ${BSZ} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +numactl --physcpubind=${CPU_AFFINITY} python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/igie/simplify_model.py b/models/cv/object_detection/yolox_sample/igie/simplify_model.py new file mode 100644 index 00000000..b4254b6f --- /dev/null +++ b/models/cv/object_detection/yolox_sample/igie/simplify_model.py @@ -0,0 +1,21 @@ +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/tests/model_info.json b/tests/model_info.json index eeeebc92..d03ee48d 100644 --- a/tests/model_info.json +++ b/tests/model_info.json @@ -9065,6 +9065,138 @@ "type": "inference", "hasDemo": false, "demoType": "" + }, + { + "display_name": "YOLOv3_Sample", + "model_name": "yolov3_sample", + "framework": "igie", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.3.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolov3_sample/igie/", + "readme_file": "models/cv/object_detection/yolov3_sample/igie/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov3.pt", + "need_third_part": false, + "precisions": [ + "int8" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "YOLOv5_Sample", + "model_name": "yolov5_sample", + "framework": "igie", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.3.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolov5_sample/igie/", + "readme_file": "models/cv/object_detection/yolov5_sample/igie/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m.pt", + "need_third_part": false, + "precisions": [ + "int8" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "YOLOv7_Sample", + "model_name": "yolov7_sample", + "framework": "igie", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.3.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolov7_sample/igie/", + "readme_file": "models/cv/object_detection/yolov7_sample/igie/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt", + "need_third_part": true, + "precisions": [ + "int8" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "YOLOX_Sample", + "model_name": "yolox_sample", + "framework": "igie", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.3.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolox_sample/igie/", + "readme_file": "models/cv/object_detection/yolox_sample/igie/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth", + "need_third_part": true, + "precisions": [ + "int8" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" } ] } \ No newline at end of file -- Gitee From cf84ac72a75484ef3464ef6faf4f40944c68bd7a Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Mon, 29 Dec 2025 17:37:41 +0800 Subject: [PATCH 13/19] update ixrt bert large sample code --- .../nlp/plm/bert_large_squad/ixrt/builder.py | 527 ++++-------------- .../ixrt/helpers/data_processing.py | 16 +- models/nlp/plm/bert_large_squad/ixrt/perf.py | 172 ------ 3 files changed, 122 insertions(+), 593 deletions(-) delete mode 100644 models/nlp/plm/bert_large_squad/ixrt/perf.py diff --git a/models/nlp/plm/bert_large_squad/ixrt/builder.py b/models/nlp/plm/bert_large_squad/ixrt/builder.py index 970f91bc..f52be8b7 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/builder.py +++ b/models/nlp/plm/bert_large_squad/ixrt/builder.py @@ -1,109 +1,36 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# - -#!/usr/bin/env python3 -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# +import os import argparse -import ctypes import json -import os -import sys +import tensorrt as trt import time - +import sys +import ctypes +import os import numpy as np -import ixrt -from builder_utils import ( # Attention Keys; Transformer Keys; SQuAD Output Keys - B_AOUT, - B_LOUT, - B_MID, - BQKV, - SQD_B, - SQD_W, - W_AOUT, - W_LOUT, - W_MID, - WQKV, - load_onnx_weights_and_quant, - load_pytorch_weights_and_quant, -) - -plugin_lib_name = ( - "libnvinfer_plugin.so" if os.getenv("USE_TRT") == "True" else "libixrt_plugin.so" -) +from builder_utils import load_onnx_weights_and_quant, load_pytorch_weights_and_quant +from builder_utils import WQKV, BQKV # Attention Keys +from builder_utils import W_AOUT, B_AOUT, W_MID, B_MID, W_LOUT, B_LOUT # Transformer Keys +from builder_utils import SQD_W, SQD_B # SQuAD Output Keys + +trt_version = [int(n) for n in trt.__version__.split('.')] +plugin_lib_name = "libnvinfer_plugin.so" if os.getenv('USE_TRT') == 'True' else "libixrt_plugin.so" print(plugin_lib_name) -TRT_LOGGER = ixrt.Logger(ixrt.Logger.WARNING) +TRT_LOGGER = trt.Logger(trt.Logger.WARNING) from load_ixrt_plugin import load_ixrt_plugin - load_ixrt_plugin(TRT_LOGGER) -plg_registry = ixrt.get_plugin_registry() +plg_registry = trt.get_plugin_registry() registry_list = plg_registry.plugin_creator_list -print( - "registry_list: ", - [registry.name + "/" + registry.plugin_version for registry in registry_list], -) -emln_plg_creator = plg_registry.get_plugin_creator( - "CustomEmbLayerNormPluginDynamic_IxRT", "1", "" -) -qkv2_plg_creator = plg_registry.get_plugin_creator( - "CustomQKVToContextPluginDynamic_IxRT", "1", "" -) -skln_plg_creator = plg_registry.get_plugin_creator( - "CustomSkipLayerNormPluginDynamic_IxRT", "1", "" -) -ffn_plg_creator = plg_registry.get_plugin_creator( - "CustomFFNPluginDynamic_IxRT", "1", "" -) -gelu_plg_creator = plg_registry.get_plugin_creator( - "CustomGeluPluginDynamic_IxRT", "1", "" -) +print("registry_list: ", [registry.name + '/' + registry.plugin_version for registry in registry_list]) +emln_plg_creator = plg_registry.get_plugin_creator("CustomEmbLayerNormPluginDynamic_IxRT", "1", "") +qkv2_plg_creator = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic_IxRT", "1", "") +skln_plg_creator = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic_IxRT", "1", "") +ffn_plg_creator = plg_registry.get_plugin_creator("CustomFFNPluginDynamic_IxRT", "1", "") +gelu_plg_creator = plg_registry.get_plugin_creator("CustomGeluPluginDynamic_IxRT", "1", "") fc_plg_creator = plg_registry.get_plugin_creator("CustomFCPluginDynamic_IxRT", "1", "") - class BertConfig: def __init__(self, bert_config_path, use_fp16, use_trt): with open(bert_config_path, "r") as f: @@ -116,46 +43,36 @@ class BertConfig: self.use_fp16 = use_fp16 self.use_trt = use_trt - def set_tensor_name(tensor, prefix, name): tensor.name = prefix + name - -def set_output_name(layer, prefix, name, out_idx=0): +def set_output_name(layer, prefix, name, out_idx = 0): set_tensor_name(layer.get_output(out_idx), prefix, name) - -def set_output_range(layer, maxval, out_idx=0): +def set_output_range(layer, maxval, out_idx = 0): layer.get_output(out_idx).set_dynamic_range(-maxval, maxval) - def get_mha_dtype(config): - dtype = ixrt.float32 + dtype = trt.float32 if config.use_fp16: - dtype = ixrt.float16 + dtype = trt.float16 return int(dtype) - def custom_fc(network, input_tensor, out_dims, W, B): - pf_out_dims = ixrt.PluginField( - "out_dims", np.array(out_dims, dtype=np.int32), ixrt.PluginFieldType.INT32 - ) - pf_type = ixrt.PluginField( - "type_id", np.array(int(ixrt.float16), dtype=np.int32), ixrt.PluginFieldType.INT32 - ) - pf_W = ixrt.PluginField("W", W, ixrt.PluginFieldType.FLOAT32) + pf_out_dims = trt.PluginField("out_dims", np.array(out_dims, dtype=np.int32), trt.PluginFieldType.INT32) + pf_type = trt.PluginField("type_id", np.array(int(trt.float16), dtype=np.int32), trt.PluginFieldType.INT32) + pf_W = trt.PluginField("W", W, trt.PluginFieldType.FLOAT32) fields = [pf_out_dims, pf_type, pf_W] if B is not None: - pf_B = ixrt.PluginField("B", B, ixrt.PluginFieldType.FLOAT32) + pf_B = trt.PluginField("B", B, trt.PluginFieldType.FLOAT32) fields.append(pf_B) - pfc = ixrt.PluginFieldCollection(fields) + pfc = trt.PluginFieldCollection(fields) fc_plugin = fc_plg_creator.create_plugin("fcplugin", pfc) plug_inputs = [input_tensor] out_dense = network.add_plugin_v2(plug_inputs, fc_plugin) return out_dense - def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask): """ Add the attention layer @@ -172,23 +89,11 @@ def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask) has_mask = imask is not None # QKV2CTX - pf_type = ixrt.PluginField( - "type_id", - np.array([get_mha_dtype(config)], np.int32), - ixrt.PluginFieldType.INT32, - ) - pf_hidden_size = ixrt.PluginField( - "hidden_size", np.array([hidden_size], np.int32), ixrt.PluginFieldType.INT32 - ) - pf_num_heads = ixrt.PluginField( - "num_heads", np.array([num_heads], np.int32), ixrt.PluginFieldType.INT32 - ) - pf_has_mask = ixrt.PluginField( - "has_mask", np.array([has_mask], np.int32), ixrt.PluginFieldType.INT32 - ) - pfc = ixrt.PluginFieldCollection( - [pf_hidden_size, pf_num_heads, pf_has_mask, pf_type] - ) + pf_type = trt.PluginField("type_id", np.array([get_mha_dtype(config)], np.int32), trt.PluginFieldType.INT32) + pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) + pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32) + pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32) + pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type]) qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc) qkv_in = [mult_all.get_output(0)] @@ -205,54 +110,43 @@ def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None): idims = input_tensor.shape hidden_size = idims[2] - dtype = ixrt.float32 + dtype = trt.float32 if config.use_fp16: - dtype = ixrt.float16 + dtype = trt.float16 - pf_ld = ixrt.PluginField( - "ld", np.array([hidden_size], np.int32), ixrt.PluginFieldType.INT32 - ) + pf_ld = trt.PluginField("ld", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32) wbeta = init_dict[prefix + "beta"] - pf_beta = ixrt.PluginField("beta", wbeta, ixrt.PluginFieldType.FLOAT32) + pf_beta = trt.PluginField("beta", wbeta, trt.PluginFieldType.FLOAT32) wgamma = init_dict[prefix + "gamma"] - pf_gamma = ixrt.PluginField("gamma", wgamma, ixrt.PluginFieldType.FLOAT32) - pf_type = ixrt.PluginField( - "type_id", np.array([int(dtype)], np.int32), ixrt.PluginFieldType.INT32 - ) + pf_gamma = trt.PluginField("gamma", wgamma, trt.PluginFieldType.FLOAT32) + pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32) - fields = [pf_ld, pf_beta, pf_gamma, pf_type] + fields = [pf_ld, pf_beta, pf_gamma, pf_type ] if bias is not None: - pf_bias = ixrt.PluginField("bias", bias, ixrt.PluginFieldType.FLOAT32) + pf_bias = trt.PluginField("bias", bias, trt.PluginFieldType.FLOAT32) fields.append(pf_bias) - pfc = ixrt.PluginFieldCollection(fields) + pfc = trt.PluginFieldCollection(fields) skipln_plug = skln_plg_creator.create_plugin("skipln", pfc) skipln_inputs = [input_tensor, skip] layer = network.add_plugin_v2(skipln_inputs, skipln_plug) return layer - def ffn_trt(prefix, config, init_dict, network, input_tensor): - # FC1 + GELU + # FC1 + GELU B_mid = init_dict[prefix + B_MID] W_mid = init_dict[prefix + W_MID] - mid_dense = network.add_fully_connected( - input_tensor, config.intermediate_size, W_mid, B_mid - ) + mid_dense = network.add_fully_connected(input_tensor, config.intermediate_size, W_mid, B_mid) - dtype = ixrt.float32 + dtype = trt.float32 if config.use_fp16: - dtype = ixrt.float16 - pf_type = ixrt.PluginField( - "type_id", np.array([int(dtype)], np.int32), ixrt.PluginFieldType.INT32 - ) - pf_ld = ixrt.PluginField( - "ld", np.array([config.hidden_size], np.int32), ixrt.PluginFieldType.INT32 - ) - - pfc = ixrt.PluginFieldCollection([pf_type, pf_ld]) + dtype = trt.float16 + pf_type = trt.PluginField("type_id", np.array([int(dtype)], np.int32), trt.PluginFieldType.INT32) + pf_ld = trt.PluginField("ld", np.array([config.hidden_size], np.int32), trt.PluginFieldType.INT32) + + pfc = trt.PluginFieldCollection([pf_type, pf_ld]) gelu_plug = gelu_plg_creator.create_plugin("gelu", pfc) gelu_inputs = [mid_dense.get_output(0)] @@ -264,61 +158,33 @@ def ffn_trt(prefix, config, init_dict, network, input_tensor): # Dense to hidden size B_lout = init_dict[prefix + B_LOUT] W_lout = init_dict[prefix + W_LOUT] - out_dense = network.add_fully_connected( - intermediate_act, config.hidden_size, W_lout, B_lout - ) + out_dense = network.add_fully_connected(intermediate_act, config.hidden_size, W_lout, B_lout) B_lout = None - out_layer = skipln( - prefix + "output_layernorm_", - config, - init_dict, - network, - out_dense.get_output(0), - input_tensor, - B_lout, - ) + out_layer = skipln(prefix + "output_layernorm_", config, init_dict, network, out_dense.get_output(0), input_tensor, B_lout) return out_layer - def ffn(prefix, config, init_dict, network, input_tensor): # FC1 + GELU B_mid = init_dict[prefix + B_MID] W_mid = init_dict[prefix + W_MID] B_lout = init_dict[prefix + B_LOUT] W_lout = init_dict[prefix + W_LOUT] - pf_out_dim = ixrt.PluginField( - "out_dims", np.array(config.hidden_size, np.int32), ixrt.PluginFieldType.INT32 - ) - pf_type = ixrt.PluginField( - "type_id", np.array(int(ixrt.float16), np.int32), ixrt.PluginFieldType.INT32 - ) - pf_W1 = ixrt.PluginField("W1", W_mid, ixrt.PluginFieldType.FLOAT32) - pf_W2 = ixrt.PluginField("W2", W_lout, ixrt.PluginFieldType.FLOAT32) - pf_B1 = ixrt.PluginField("B1", B_mid, ixrt.PluginFieldType.FLOAT32) - pf_act_type = ixrt.PluginField( - "act_type", np.array(int(3), np.int32), ixrt.PluginFieldType.INT32 - ) - pfc = ixrt.PluginFieldCollection( - [pf_out_dim, pf_type, pf_W1, pf_W2, pf_B1, pf_act_type] - ) + pf_out_dim = trt.PluginField("out_dims", np.array(config.hidden_size, np.int32), trt.PluginFieldType.INT32) + pf_type = trt.PluginField("type_id", np.array(int(trt.float16), np.int32), trt.PluginFieldType.INT32) + pf_W1 = trt.PluginField("W1", W_mid, trt.PluginFieldType.FLOAT32) + pf_W2 = trt.PluginField("W2", W_lout, trt.PluginFieldType.FLOAT32) + pf_B1 = trt.PluginField("B1", B_mid, trt.PluginFieldType.FLOAT32) + pf_act_type = trt.PluginField("act_type", np.array(int(3), np.int32), trt.PluginFieldType.INT32) + pfc = trt.PluginFieldCollection([pf_out_dim, pf_type, pf_W1, pf_W2, pf_B1, pf_act_type]) ffn_plug = ffn_plg_creator.create_plugin("ffn", pfc) ffn_inputs = [input_tensor] ffn_layer = network.add_plugin_v2(ffn_inputs, ffn_plug) - out_layer = skipln( - prefix + "output_layernorm_", - config, - init_dict, - network, - ffn_layer.get_output(0), - input_tensor, - B_lout, - ) + out_layer = skipln(prefix + "output_layernorm_", config, init_dict, network, ffn_layer.get_output(0), input_tensor, B_lout) return out_layer - def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imask): """ Add the transformer layer @@ -326,26 +192,16 @@ def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imas idims = input_tensor.shape hidden_size = idims[2] - context_transposed = attention_layer_opt( - prefix + "attention_", config, init_dict, network, input_tensor, imask - ) + context_transposed = attention_layer_opt(prefix + "attention_", config, init_dict, network, input_tensor, imask) attention_heads = context_transposed.get_output(0) - + # FC0 B_aout = init_dict[prefix + B_AOUT] W_aout = init_dict[prefix + W_AOUT] attention_out_fc = custom_fc(network, attention_heads, hidden_size, W_aout, B_aout) - B_aout = None - - skiplayer = skipln( - prefix + "attention_output_layernorm_", - config, - init_dict, - network, - attention_out_fc.get_output(0), - input_tensor, - B_aout, - ) + B_aout = None + + skiplayer = skipln(prefix + "attention_output_layernorm_",config, init_dict, network, attention_out_fc.get_output(0), input_tensor, B_aout) attention_ln = skiplayer.get_output(0) if config.use_trt: @@ -354,21 +210,17 @@ def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imas ffn_layer = ffn(prefix, config, init_dict, network, attention_ln) return ffn_layer - def bert_model(config, init_dict, network, input_tensor, input_mask): """ Create the bert model """ prev_input = input_tensor for layer in range(0, config.num_hidden_layers): - ss = "l{}_".format(layer) - out_layer = transformer_layer_opt( - ss, config, init_dict, network, prev_input, input_mask - ) + ss = "l{}_".format(layer) + out_layer = transformer_layer_opt(ss, config, init_dict, network, prev_input, input_mask) prev_input = out_layer.get_output(0) return prev_input - def squad_output(prefix, config, init_dict, network, input_tensor): """ Create the squad output @@ -388,98 +240,34 @@ def squad_output(prefix, config, init_dict, network, input_tensor): return OUT return dense - -def emb_layernorm( - builder, - network, - config, - weights_dict, - builder_config, - sequence_lengths, - batch_sizes, -): - input_ids = network.add_input( - name="input_ids", - dtype=ixrt.int32, - shape=( - -1 if len(batch_sizes) > 1 else batch_sizes[0], - -1 if len(sequence_lengths) > 1 else sequence_lengths[0], - ), - ) - segment_ids = network.add_input( - name="segment_ids", - dtype=ixrt.int32, - shape=( - -1 if len(batch_sizes) > 1 else batch_sizes[0], - -1 if len(sequence_lengths) > 1 else sequence_lengths[0], - ), - ) - input_mask = network.add_input( - name="input_mask", - dtype=ixrt.int32, - shape=( - -1 if len(batch_sizes) > 1 else batch_sizes[0], - -1 if len(sequence_lengths) > 1 else sequence_lengths[0], - ), - ) +def emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes): + input_ids = network.add_input(name="input_ids", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0])) + segment_ids = network.add_input(name="segment_ids", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0])) + input_mask = network.add_input(name="input_mask", dtype=trt.int32, shape=(-1 if len(batch_sizes) > 1 else batch_sizes[0], -1 if len(sequence_lengths) > 1 else sequence_lengths[0])) if len(sequence_lengths) > 1: profile = builder.create_optimization_profile() min_shape = (batch_sizes[0], sequence_lengths[0]) opt_shape = (batch_sizes[1], sequence_lengths[1]) max_shape = (batch_sizes[2], sequence_lengths[2]) - assert ( - sequence_lengths[0] <= sequence_lengths[1] - and sequence_lengths[1] <= sequence_lengths[2] - ) - - print("set dynamic shape -> ", min_shape, opt_shape, max_shape) + assert(sequence_lengths[0] <= sequence_lengths[1] and sequence_lengths[1] <= sequence_lengths[2]) + + print('set dynamic shape -> ', min_shape, opt_shape, max_shape) profile.set_shape("input_ids", min_shape, opt_shape, max_shape) profile.set_shape("segment_ids", min_shape, opt_shape, max_shape) profile.set_shape("input_mask", min_shape, opt_shape, max_shape) builder_config.add_optimization_profile(profile) - wbeta = ixrt.PluginField( - "bert_embeddings_layernorm_beta", - weights_dict["bert_embeddings_layernorm_beta"], - ixrt.PluginFieldType.FLOAT32, - ) - - wgamma = ixrt.PluginField( - "bert_embeddings_layernorm_gamma", - weights_dict["bert_embeddings_layernorm_gamma"], - ixrt.PluginFieldType.FLOAT32, - ) - wwordemb = ixrt.PluginField( - "bert_embeddings_word_embeddings", - weights_dict["bert_embeddings_word_embeddings"], - ixrt.PluginFieldType.FLOAT32, - ) - wtokemb = ixrt.PluginField( - "bert_embeddings_token_type_embeddings", - weights_dict["bert_embeddings_token_type_embeddings"], - ixrt.PluginFieldType.FLOAT32, - ) - wposemb = ixrt.PluginField( - "bert_embeddings_position_embeddings", - weights_dict["bert_embeddings_position_embeddings"], - ixrt.PluginFieldType.FLOAT32, - ) - - output_fp16 = ixrt.PluginField( - "output_fp16", - np.array([1 if config.use_fp16 else 0]).astype(np.int32), - ixrt.PluginFieldType.INT32, - ) - mha_type = ixrt.PluginField( - "mha_type_id", - np.array([get_mha_dtype(config)], np.int32), - ixrt.PluginFieldType.INT32, - ) - - pfc = ixrt.PluginFieldCollection( - [wbeta, wgamma, wwordemb, wtokemb, wposemb, output_fp16, mha_type] - ) + wbeta = trt.PluginField("bert_embeddings_layernorm_beta", weights_dict["bert_embeddings_layernorm_beta"], trt.PluginFieldType.FLOAT32) + wgamma = trt.PluginField("bert_embeddings_layernorm_gamma", weights_dict["bert_embeddings_layernorm_gamma"], trt.PluginFieldType.FLOAT32) + wwordemb = trt.PluginField("bert_embeddings_word_embeddings", weights_dict["bert_embeddings_word_embeddings"], trt.PluginFieldType.FLOAT32) + wtokemb = trt.PluginField("bert_embeddings_token_type_embeddings", weights_dict["bert_embeddings_token_type_embeddings"], trt.PluginFieldType.FLOAT32) + wposemb = trt.PluginField("bert_embeddings_position_embeddings", weights_dict["bert_embeddings_position_embeddings"], trt.PluginFieldType.FLOAT32) + + output_fp16 = trt.PluginField("output_fp16", np.array([1 if config.use_fp16 else 0]).astype(np.int32), trt.PluginFieldType.INT32) + mha_type = trt.PluginField("mha_type_id", np.array([get_mha_dtype(config)], np.int32), trt.PluginFieldType.INT32) + + pfc = trt.PluginFieldCollection([wbeta, wgamma, wwordemb, wtokemb, wposemb, output_fp16, mha_type]) fn = emln_plg_creator.create_plugin("embeddings", pfc) if config.use_trt: @@ -489,142 +277,62 @@ def emb_layernorm( segment_ids.second_transpose = (1, 0) input_mask = network.add_shuffle(input_mask) input_mask.second_transpose = (1, 0) - inputs = [ - input_ids.get_output(0), - segment_ids.get_output(0), - input_mask.get_output(0), - ] + inputs = [input_ids.get_output(0), segment_ids.get_output(0), input_mask.get_output(0)] else: inputs = [input_ids, segment_ids, input_mask] emb_layer = network.add_plugin_v2(inputs, fn) return emb_layer - def build_engine(batch_sizes, sequence_lengths, config, weights_dict): - explicit_batch_flag = 1 << int(ixrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - builder = ixrt.Builder(TRT_LOGGER) - with builder.create_network( - explicit_batch_flag - ) as network, builder.create_builder_config() as builder_config: + builder = trt.Builder(TRT_LOGGER) + with builder.create_network(explicit_batch_flag) as network, builder.create_builder_config() as builder_config: if config.use_fp16: - builder_config.set_flag(ixrt.BuilderFlag.FP16) + builder_config.set_flag(trt.BuilderFlag.FP16) # Create the network - emb_layer = emb_layernorm( - builder, - network, - config, - weights_dict, - builder_config, - sequence_lengths, - batch_sizes, - ) + emb_layer = emb_layernorm(builder, network, config, weights_dict, builder_config, sequence_lengths, batch_sizes) embeddings = emb_layer.get_output(0) mask_idx = emb_layer.get_output(1) - + bert_out = bert_model(config, weights_dict, network, embeddings, mask_idx) squad_logits = squad_output("cls_", config, weights_dict, network, bert_out) squad_logits_out = squad_logits.get_output(0) - squad_logits.set_output_type(0, ixrt.float32) network.mark_output(squad_logits_out) build_start_time = time.time() - serialized_engine = builder.build_serialized_network(network, builder_config) - build_time_elapsed = time.time() - build_start_time - TRT_LOGGER.log( - TRT_LOGGER.INFO, "build serialized_engine in {:.3f} Sec".format(build_time_elapsed) - ) - return serialized_engine - + plan = builder.build_serialized_network(network, builder_config) + build_time_elapsed = (time.time() - build_start_time) + TRT_LOGGER.log(TRT_LOGGER.INFO, "build engine in {:.3f} Sec".format(build_time_elapsed)) + return plan def str2bool(v): - return v.lower() in ("yes", "true") - + return v.lower() in ('yes', 'true') def main(): - parser = argparse.ArgumentParser( - description="IxRT BERT Sample", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument( - "-z", - "--use_trt", - type=str2bool, - default=False, - help="Whether to use ixrt or IxRT", - ) - parser.add_argument( - "-x", "--onnx", required=False, help="The ONNX model file path." - ) - parser.add_argument( - "-pt", "--pytorch", required=False, help="The PyTorch checkpoint file path." - ) - parser.add_argument( - "-o", - "--output", - required=True, - default="bert_base_384.engine", - help="The bert engine file, ex bert.engine", - ) - parser.add_argument( - "-b", - "--batch-size", - nargs="+", - help="Batch size(s) to optimize for. The engine will be usable with any batch size below this, but may not be optimal for smaller sizes. Can be specified multiple times to optimize for more than one batch size.", - type=int, - ) - parser.add_argument( - "-s", - "--sequence-length", - nargs="+", - help="Sequence length of the BERT model", - type=int, - ) - parser.add_argument( - "-c", - "--config-dir", - required=True, - help="The folder containing the bert_config.json, which can be downloaded e.g. from https://github.com/google-research/bert#pre-trained-models or by running download_models.py in dle/TensorFlow/LanguageModeling/BERT/data/pretrained_models_google", - ) - parser.add_argument( - "-f", - "--fp16", - action="store_true", - help="Indicates that inference should be run in FP16 precision", - required=False, - ) - parser.add_argument( - "-j", - "--squad-json", - default="squad/dev-v1.1.json", - help="squad json dataset used for int8 calibration", - required=False, - ) - parser.add_argument( - "-v", - "--vocab-file", - default="./pre-trained_model/uncased_L-24_H-1024_A-16/vocab.txt", - help="Path to file containing entire understandable vocab", - required=False, - ) - parser.add_argument( - "--verbose", - action="store_true", - help="Turn on verbose logger and set profiling verbosity to DETAILED", - required=False, - ) + parser = argparse.ArgumentParser(description="TensorRT BERT Sample", formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("-z", "--use_trt", type=str2bool, default=False, help = "Whether to use tensorRT or IxRT") + parser.add_argument("-x", "--onnx", required=False, help="The ONNX model file path.") + parser.add_argument("-pt", "--pytorch", required=False, help="The PyTorch checkpoint file path.") + parser.add_argument("-o", "--output", required=True, default="bert_base_384.engine", help="The bert engine file, ex bert.engine") + parser.add_argument("-b", "--batch-size", nargs='+', help="Batch size(s) to optimize for. The engine will be usable with any batch size below this, but may not be optimal for smaller sizes. Can be specified multiple times to optimize for more than one batch size.", type=int) + parser.add_argument("-s", "--sequence-length", nargs='+', help="Sequence length of the BERT model", type=int) + parser.add_argument("-c", "--config-dir", required=True, + help="The folder containing the bert_config.json, which can be downloaded e.g. from https://github.com/google-research/bert#pre-trained-models or by running download_models.py in dle/TensorFlow/LanguageModeling/BERT/data/pretrained_models_google") + parser.add_argument("-f", "--fp16", action="store_true", help="Indicates that inference should be run in FP16 precision", required=False) + parser.add_argument("-j", "--squad-json", default="squad/dev-v1.1.json", help="squad json dataset used for int8 calibration", required=False) + parser.add_argument("-v", "--vocab-file", default="./pre-trained_model/uncased_L-24_H-1024_A-16/vocab.txt", help="Path to file containing entire understandable vocab", required=False) + parser.add_argument("--verbose", action="store_true", help="Turn on verbose logger and set profiling verbosity to DETAILED", required=False) args, _ = parser.parse_known_args() args.batch_size = args.batch_size or [1] args.sequence_length = args.sequence_length or [128] if len(args.sequence_length) not in [1, 3]: - print( - "Error: You must provide either one or three integers." - ) + print("Error: You must provide either one or three integers.") sys.exit(1) if len(args.batch_size) not in [1, 3]: @@ -635,9 +343,7 @@ def main(): TRT_LOGGER.min_severity = TRT_LOGGER.VERBOSE bert_config_path = args.config_dir - TRT_LOGGER.log( - TRT_LOGGER.INFO, "Using configuration file: {:}".format(bert_config_path) - ) + TRT_LOGGER.log(TRT_LOGGER.INFO, "Using configuration file: {:}".format(bert_config_path)) config = BertConfig(bert_config_path, args.fp16, args.use_trt) @@ -646,18 +352,13 @@ def main(): elif args.pytorch != None: weights_dict = load_pytorch_weights_and_quant(args.pytorch, config) else: - raise RuntimeError( - "You need either specify TF checkpoint using option --ckpt or ONNX using option --onnx to build TRT BERT model." - ) + raise RuntimeError("You need either specify TF checkpoint using option --ckpt or ONNX using option --onnx to build TRT BERT model.") - with build_engine( - args.batch_size, args.sequence_length, config, weights_dict - ) as serialized_engine: + with build_engine(args.batch_size, args.sequence_length, config, weights_dict) as serialized_engine: TRT_LOGGER.log(TRT_LOGGER.INFO, "Saving Engine to {:}".format(args.output)) with open(args.output, "wb") as fout: fout.write(serialized_engine) TRT_LOGGER.log(TRT_LOGGER.INFO, "Done.") - if __name__ == "__main__": main() diff --git a/models/nlp/plm/bert_large_squad/ixrt/helpers/data_processing.py b/models/nlp/plm/bert_large_squad/ixrt/helpers/data_processing.py index 88459ebf..712e1a61 100644 --- a/models/nlp/plm/bert_large_squad/ixrt/helpers/data_processing.py +++ b/models/nlp/plm/bert_large_squad/ixrt/helpers/data_processing.py @@ -159,14 +159,14 @@ def convert_example_to_features(doc_tokens, question_text, tokenizer, max_seq_le input_mask = [1] * len(input_ids) # Zero-pad up to the sequence length. - while len(input_ids) < max_seq_length: - input_ids.append(0) - input_mask.append(0) - segment_ids.append(0) - - assert len(input_ids) == max_seq_length - assert len(input_mask) == max_seq_length - assert len(segment_ids) == max_seq_length + # while len(input_ids) < max_seq_length: + # input_ids.append(0) + # input_mask.append(0) + # segment_ids.append(0) + + # assert len(input_ids) == max_seq_length + # assert len(input_mask) == max_seq_length + # assert len(segment_ids) == max_seq_length def create_int_feature(values): feature = np.asarray(values, dtype=np.int32, order=None) diff --git a/models/nlp/plm/bert_large_squad/ixrt/perf.py b/models/nlp/plm/bert_large_squad/ixrt/perf.py deleted file mode 100644 index 8343c95d..00000000 --- a/models/nlp/plm/bert_large_squad/ixrt/perf.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - - -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import ctypes -import time -import numpy as np -import tensorrt as trt -import pycuda.driver as cuda -import pycuda.autoinit - -import numpy as np - -TRT_LOGGER = trt.Logger(trt.Logger.ERROR) -from load_ixrt_plugin import load_ixrt_plugin - -class DeviceBuffer(object): - def __init__(self, shape, dtype=trt.int32): - self.buf = cuda.mem_alloc(trt.volume(shape) * 4) - - def binding(self): - return int(self.buf) - - def free(self): - self.buf.free() - - -def main(): - parser = argparse.ArgumentParser(description='BERT Inference Benchmark') - parser.add_argument("-z", "--use_trt", action="store_false", help="Whether to use tensorRT or IxRT") - parser.add_argument("-e", "--engine", help='Path to BERT TensorRT engine') - parser.add_argument('-b', '--batch-size', default=[], action="append", help='Batch size(s) to benchmark. Can be specified multiple times for more than one batch size. This script assumes that the engine has been built with one optimization profile for each batch size, and that these profiles are in order of increasing batch size.', type=int) - parser.add_argument('-s', '--sequence-length', default=128, help='Sequence length of the BERT model', type=int) - parser.add_argument('-i', '--iterations', default=200, help='Number of iterations to run when benchmarking each batch size.', type=int) - parser.add_argument('-w', '--warm-up-runs', default=10, help='Number of iterations to run prior to benchmarking.', type=int) - parser.add_argument('-d', '--duration', default=0.0, help='Minimal number of seconds to run when benchmarking each batch size.', type=float) - parser.add_argument('-r', '--random-seed', required=False, default=12345, help='Random seed.', type=int) - args, _ = parser.parse_known_args() - args.batch_size = args.batch_size or [1] - - # Import necessary plugins for BERT TensorRT - load_ixrt_plugin(TRT_LOGGER) - - with open(args.engine, 'rb') as f: - runtime = trt.Runtime(TRT_LOGGER) - engine = runtime.deserialize_cuda_engine(f.read()) - context = engine.create_execution_context() - - # Allocate buffers large enough to store the largest batch size - max_input_shape = (max(args.batch_size), args.sequence_length) - max_output_shape = (max(args.batch_size), args.sequence_length, 2, 1, 1) - buffers = [ - DeviceBuffer(max_input_shape), - DeviceBuffer(max_input_shape), - DeviceBuffer(max_input_shape), - DeviceBuffer(max_output_shape) - ] - - # Prepare random input - pseudo_vocab_size = 30522 - pseudo_type_vocab_size = 2 - np.random.seed(args.random_seed) - test_word_ids = np.random.randint(0, pseudo_vocab_size, (max(args.batch_size), args.sequence_length), dtype=np.int32) - test_segment_ids = np.random.randint(0, pseudo_type_vocab_size, (max(args.batch_size), args.sequence_length), dtype=np.int32) - test_input_mask = np.ones((max(args.batch_size), args.sequence_length), dtype=np.int32) - - # Copy input h2d - cuda.memcpy_htod(buffers[0].buf, test_word_ids.ravel()) - cuda.memcpy_htod(buffers[1].buf, test_segment_ids.ravel()) - cuda.memcpy_htod(buffers[2].buf, test_input_mask.ravel()) - - num_binding_per_profile = engine.num_bindings // engine.num_optimization_profiles - - bench_times = {} - - stream = cuda.Stream() - for batch_size in sorted(args.batch_size): - # # Select engine profile - selected_profile = -1 - for idx in range(engine.num_optimization_profiles): - profile_shape = engine.get_profile_shape(idx, idx * num_binding_per_profile) - if profile_shape[0][0] <= batch_size and profile_shape[2][0] >= batch_size and profile_shape[0][1] <= args.sequence_length and profile_shape[2][1] >= args.sequence_length: - selected_profile = idx - break - if selected_profile == -1: - raise RuntimeError("None of the dynamic shape profiles meets the requirement batch = {} and sequence = {}.".format(batch_size, args.sequence_length)) - context.set_optimization_profile_async(selected_profile, stream.handle) - - # Each profile has unique bindings - binding_idx_offset = selected_profile * num_binding_per_profile - bindings = [0] * binding_idx_offset + [buf.binding() for buf in buffers] - - shapes = { - 0 : (batch_size, args.sequence_length), - 1 : (batch_size, args.sequence_length), - 2 : (batch_size, args.sequence_length), - } - - for binding, shape in shapes.items(): - context.set_binding_shape(binding, shape) - assert context.all_binding_shapes_specified - - # Inference - total_time = 0 - start = cuda.Event() - end = cuda.Event() - - # Warmup - for _ in range(args.warm_up_runs): - context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) - stream.synchronize() - - # Timing loop - times = [] - actual_iterations = 0 - start_time = time.time() - while actual_iterations < args.iterations or (time.time() - start_time) < args.duration: - start.record(stream) - context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) - end.record(stream) - stream.synchronize() - times.append(end.time_since(start)) - actual_iterations += 1 - - # Compute average time, 95th percentile time and 99th percentile time. - bench_times[batch_size] = times - - [b.free() for b in buffers] - - for batch_size, times in bench_times.items(): - total_time = sum(times) - avg_time = total_time / float(actual_iterations) - times.sort() - percentile95 = times[int(actual_iterations * 0.95)] - percentile99 = times[int(actual_iterations * 0.99)] - print("Running {:} iterations with Batch Size: {:}\n\tTotal Time: {:} ms \tAverage Time: {:} ms\t95th Percentile Time: {:} ms\t99th Percentile Time: {:}".format(actual_iterations, batch_size, total_time, avg_time, percentile95, percentile99)) - - del context - del engine - -if __name__ == '__main__': - main() -- Gitee From 3bf5e336f67e49a9b889993aff1739665933af4c Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Mon, 29 Dec 2025 17:46:15 +0800 Subject: [PATCH 14/19] update igie dependent --- models/cv/object_detection/yolov3_sample/igie/ci/prepare.sh | 2 +- models/cv/object_detection/yolov5_sample/igie/ci/prepare.sh | 2 +- models/cv/object_detection/yolov7_sample/igie/ci/prepare.sh | 2 +- models/cv/object_detection/yolox_sample/igie/ci/prepare.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/models/cv/object_detection/yolov3_sample/igie/ci/prepare.sh b/models/cv/object_detection/yolov3_sample/igie/ci/prepare.sh index 4cfeb6e4..2034e9e8 100644 --- a/models/cv/object_detection/yolov3_sample/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov3_sample/igie/ci/prepare.sh @@ -16,7 +16,7 @@ set -x -pip3 install tqdm +pip3 install tqdm onnxsim pycuda pip3 install pycocotools opencv_python==4.6.0.66 mkdir -p checkpoints ln -s /mnt/deepspark/data/datasets/corex-inference-data-4.0.0/checkpoints/yolov3/yolov3.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolov5_sample/igie/ci/prepare.sh b/models/cv/object_detection/yolov5_sample/igie/ci/prepare.sh index fb897d45..2662da09 100644 --- a/models/cv/object_detection/yolov5_sample/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov5_sample/igie/ci/prepare.sh @@ -16,7 +16,7 @@ set -x -pip3 install pycocotools +pip3 install pycocotools onnxsim pycuda pip3 install tqdm pip3 install opencv-python==4.6.0.66 diff --git a/models/cv/object_detection/yolov7_sample/igie/ci/prepare.sh b/models/cv/object_detection/yolov7_sample/igie/ci/prepare.sh index 79816c74..543e7e36 100644 --- a/models/cv/object_detection/yolov7_sample/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov7_sample/igie/ci/prepare.sh @@ -16,7 +16,7 @@ set -x -pip3 install pycocotools +pip3 install pycocotools onnxsim pycuda pip3 install tqdm pip3 install opencv-python==4.6.0.66 diff --git a/models/cv/object_detection/yolox_sample/igie/ci/prepare.sh b/models/cv/object_detection/yolox_sample/igie/ci/prepare.sh index c26ef6af..ca7bb945 100644 --- a/models/cv/object_detection/yolox_sample/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolox_sample/igie/ci/prepare.sh @@ -25,7 +25,7 @@ else echo "Not Support Os" fi -pip3 install pycocotools +pip3 install pycocotools onnxsim pycuda pip3 install loguru pip3 install tabulate pip3 install tqdm -- Gitee From 9c3d623faa48c9306d6b4ec86d74bba0cf3f0147 Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Tue, 30 Dec 2025 09:42:09 +0800 Subject: [PATCH 15/19] fix yolox dataset path --- .../igie/scripts/infer_yolox_sample_int8_accuracy.sh | 2 +- .../igie/scripts/infer_yolox_sample_int8_performance.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_accuracy.sh b/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_accuracy.sh index 50e862a9..7c23bc05 100644 --- a/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_accuracy.sh +++ b/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_accuracy.sh @@ -31,7 +31,7 @@ do done PROJ_DIR=./ -DATASETS_DIR="${PROJ_DIR}/data/datasets/coco" +DATASETS_DIR="${PROJ_DIR}/coco" COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json EVAL_DIR=${DATASETS_DIR}/images/val2017 CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" diff --git a/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_performance.sh b/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_performance.sh index 6c47e695..42ea520d 100644 --- a/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_performance.sh +++ b/models/cv/object_detection/yolox_sample/igie/scripts/infer_yolox_sample_int8_performance.sh @@ -32,7 +32,7 @@ do done PROJ_DIR=./ -DATASETS_DIR="${PROJ_DIR}/data/datasets/coco" +DATASETS_DIR="${PROJ_DIR}/coco" COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json EVAL_DIR=${DATASETS_DIR}/images/val2017 CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" -- Gitee From 98301afc1dcb5304c63590afc799d60bd913075d Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Tue, 30 Dec 2025 11:08:19 +0800 Subject: [PATCH 16/19] update ixrt yolox yolov5s and igie yolov5s --- .../object_detection/yolov5s/igie/README.md | 93 ------- .../igie/build_engine.py | 0 .../igie/build_nms_engine.py | 0 .../igie/calibration_dataset.py | 0 .../igie/ci/prepare.sh | 19 +- .../igie/coco_labels.py | 0 .../igie/common.py | 0 .../igie/config/YOLOV5S_CONFIG | 2 +- .../igie/cut_model.py | 0 .../igie/datasets/__init__.py | 0 .../igie/datasets/coco.py | 0 .../igie/datasets/common.py | 0 .../igie/datasets/post_process.py | 0 .../igie/datasets/pre_process.py | 0 .../igie/datasets/vision.py | 0 .../igie/deploy.py | 0 .../igie/inference.py | 0 .../igie/load_ixrt_plugin.py | 0 .../igie/modify_batchsize.py | 0 .../{yolov5s => yolov5s_sample}/igie/quant.py | 3 + .../infer_yolov5s_sample_fp16_accuracy.sh} | 0 .../infer_yolov5s_sample_fp16_performance.sh} | 2 +- .../igie/simplify_model.py | 0 .../yolov5s_sample/ixrt/README.md | 25 ++ .../yolov5s_sample/ixrt/build_engine.py | 43 +++ .../yolov5s_sample/ixrt/build_nms_engine.py | 81 ++++++ .../ixrt/calibration_dataset.py | 31 +++ .../yolov5s_sample/ixrt/ci/prepare.sh | 24 ++ .../yolov5s_sample/ixrt/coco_labels.py | 89 ++++++ .../yolov5s_sample/ixrt/common.py | 86 ++++++ .../yolov5s_sample/ixrt/config/YOLOV5S_CONFIG | 49 ++++ .../yolov5s_sample/ixrt/cut_model.py | 16 ++ .../yolov5s_sample/ixrt/datasets/__init__.py | 0 .../yolov5s_sample/ixrt/datasets/coco.py | 116 ++++++++ .../yolov5s_sample/ixrt/datasets/common.py | 66 +++++ .../ixrt/datasets/post_process.py | 115 ++++++++ .../ixrt/datasets/pre_process.py | 56 ++++ .../yolov5s_sample/ixrt/datasets/vision.py | 136 +++++++++ .../yolov5s_sample/ixrt/deploy.py | 134 +++++++++ .../yolov5s_sample/ixrt/inference.py | 260 +++++++++++++++++ .../yolov5s_sample/ixrt/load_ixrt_plugin.py | 12 + .../yolov5s_sample/ixrt/modify_batchsize.py | 37 +++ .../yolov5s_sample/ixrt/quant.py | 55 ++++ .../infer_yolov5s_sample_fp16_accuracy.sh | 208 ++++++++++++++ .../infer_yolov5s_sample_fp16_performance.sh | 208 ++++++++++++++ .../yolov5s_sample/ixrt/simplify_model.py | 21 ++ .../yolox_sample/ixrt/build_engine.py | 43 +++ .../yolox_sample/ixrt/build_nms_engine.py | 81 ++++++ .../yolox_sample/ixrt/calibration_dataset.py | 29 ++ .../yolox_sample/ixrt/ci/prepare.sh | 36 +++ .../yolox_sample/ixrt/coco_labels.py | 89 ++++++ .../yolox_sample/ixrt/common.py | 86 ++++++ .../yolox_sample/ixrt/config/YOLOXM_CONFIG | 56 ++++ .../yolox_sample/ixrt/cut_model.py | 16 ++ .../yolox_sample/ixrt/datasets/__init__.py | 0 .../yolox_sample/ixrt/datasets/coco.py | 116 ++++++++ .../yolox_sample/ixrt/datasets/common.py | 66 +++++ .../ixrt/datasets/post_process.py | 115 ++++++++ .../yolox_sample/ixrt/datasets/pre_process.py | 56 ++++ .../yolox_sample/ixrt/datasets/vision.py | 136 +++++++++ .../yolox_sample/ixrt/deploy.py | 135 +++++++++ .../yolox_sample/ixrt/inference.py | 261 ++++++++++++++++++ .../yolox_sample/ixrt/load_ixrt_plugin.py | 12 + .../yolox_sample/ixrt/modify_batchsize.py | 37 +++ .../yolox_sample/ixrt/quant.py | 55 ++++ .../infer_yolox_sample_int8_accuracy.sh | 209 ++++++++++++++ .../infer_yolox_sample_int8_performance.sh | 210 ++++++++++++++ .../yolox_sample/ixrt/simplify_model.py | 21 ++ tests/model_info.json | 74 ++++- 69 files changed, 3812 insertions(+), 114 deletions(-) delete mode 100644 models/cv/object_detection/yolov5s/igie/README.md rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/build_engine.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/build_nms_engine.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/calibration_dataset.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/ci/prepare.sh (55%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/coco_labels.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/common.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/config/YOLOV5S_CONFIG (94%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/cut_model.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/datasets/__init__.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/datasets/coco.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/datasets/common.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/datasets/post_process.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/datasets/pre_process.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/datasets/vision.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/deploy.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/inference.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/load_ixrt_plugin.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/modify_batchsize.py (100%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/quant.py (93%) rename models/cv/object_detection/{yolov5s/igie/scripts/infer_yolov5s_fp16_accuracy.sh => yolov5s_sample/igie/scripts/infer_yolov5s_sample_fp16_accuracy.sh} (100%) rename models/cv/object_detection/{yolov5s/igie/scripts/infer_yolov5s_fp16_performance.sh => yolov5s_sample/igie/scripts/infer_yolov5s_sample_fp16_performance.sh} (99%) rename models/cv/object_detection/{yolov5s => yolov5s_sample}/igie/simplify_model.py (100%) create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/README.md create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/build_engine.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/build_nms_engine.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/calibration_dataset.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/ci/prepare.sh create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/coco_labels.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/common.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/config/YOLOV5S_CONFIG create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/cut_model.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/datasets/__init__.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/datasets/coco.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/datasets/common.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/datasets/post_process.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/datasets/pre_process.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/datasets/vision.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/deploy.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/inference.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/load_ixrt_plugin.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/modify_batchsize.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/quant.py create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_accuracy.sh create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_performance.sh create mode 100644 models/cv/object_detection/yolov5s_sample/ixrt/simplify_model.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/build_engine.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/build_nms_engine.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/calibration_dataset.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/ci/prepare.sh create mode 100644 models/cv/object_detection/yolox_sample/ixrt/coco_labels.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/common.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/config/YOLOXM_CONFIG create mode 100644 models/cv/object_detection/yolox_sample/ixrt/cut_model.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/datasets/__init__.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/datasets/coco.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/datasets/common.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/datasets/post_process.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/datasets/pre_process.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/datasets/vision.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/deploy.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/inference.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/load_ixrt_plugin.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/modify_batchsize.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/quant.py create mode 100644 models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_accuracy.sh create mode 100644 models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_performance.sh create mode 100644 models/cv/object_detection/yolox_sample/ixrt/simplify_model.py diff --git a/models/cv/object_detection/yolov5s/igie/README.md b/models/cv/object_detection/yolov5s/igie/README.md deleted file mode 100644 index 1c97e8f5..00000000 --- a/models/cv/object_detection/yolov5s/igie/README.md +++ /dev/null @@ -1,93 +0,0 @@ -# YOLOv5s (IGIE) - -## Model Description - -The YOLOv5 architecture is designed for efficient and accurate object detection tasks in real-time scenarios. It employs a single convolutional neural network to simultaneously predict bounding boxes and class probabilities for multiple objects within an image. The YOLOV5s is a tiny model. - -## Supported Environments - -| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | -| :----: | :----: | :----: | -| MR-V100 | 4.3.0 | 25.12 | - -## Model Preparation - -### Prepare Resources - -Pretrained model: - -Dataset: - -- to download the labels dataset. -- to download the validation dataset. -- to download the train dataset. - -```bash -unzip -q -d ./ coco2017labels.zip -unzip -q -d ./coco/images/ train2017.zip -unzip -q -d ./coco/images/ val2017.zip - -coco -├── annotations -│   └── instances_val2017.json -├── images -│   ├── train2017 -│   └── val2017 -├── labels -│   ├── train2017 -│   └── val2017 -├── LICENSE -├── README.txt -├── test-dev2017.txt -├── train2017.cache -├── train2017.txt -├── val2017.cache -└── val2017.txt -``` - -### Install Dependencies - -```bash -pip3 install -r ../../ixrt_common/requirements.txt -``` - -### Model Conversion - -```bash -mkdir checkpoints -git clone -b v6.1 --depth 1 https://github.com/ultralytics/yolov5 - -# 有一些环境需要安装 -wget https://ultralytics.com/assets/Arial.ttf -cp Arial.ttf /root/.config/Ultralytics/Arial.ttf - -# 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数) -pushd ./yolov5 -# set weights_only=False to be comaptible with pytorch 2.7 -sed -i '96 s/map_location)/map_location, weights_only=False)/' ./models/experimental.py - -python3 export.py --weights yolov5s.pt --include onnx --opset 11 --batch-size 32 -mv yolov5s.onnx ../checkpoints -popd -``` - -## Model Inference - -```bash -export DATASETS_DIR=./coco/ -``` - -### FP16 - -```bash -# Accuracy -bash scripts/infer_yolov5s_fp16_accuracy.sh -# Performance -bash scripts/infer_yolov5s_fp16_performance.sh -``` - -## Model Results - -| Model | BatchSize | Precision | FPS | MAP@0.5 | MAP@0.5:0.95 | -| :----: | :----: | :----: | :----: | :----: | :----: | -| YOLOv5s | 32 | FP16 | 1112.66 | 0.565 | 0.370 | diff --git a/models/cv/object_detection/yolov5s/igie/build_engine.py b/models/cv/object_detection/yolov5s_sample/igie/build_engine.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/build_engine.py rename to models/cv/object_detection/yolov5s_sample/igie/build_engine.py diff --git a/models/cv/object_detection/yolov5s/igie/build_nms_engine.py b/models/cv/object_detection/yolov5s_sample/igie/build_nms_engine.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/build_nms_engine.py rename to models/cv/object_detection/yolov5s_sample/igie/build_nms_engine.py diff --git a/models/cv/object_detection/yolov5s/igie/calibration_dataset.py b/models/cv/object_detection/yolov5s_sample/igie/calibration_dataset.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/calibration_dataset.py rename to models/cv/object_detection/yolov5s_sample/igie/calibration_dataset.py diff --git a/models/cv/object_detection/yolov5s/igie/ci/prepare.sh b/models/cv/object_detection/yolov5s_sample/igie/ci/prepare.sh similarity index 55% rename from models/cv/object_detection/yolov5s/igie/ci/prepare.sh rename to models/cv/object_detection/yolov5s_sample/igie/ci/prepare.sh index b53ca6d1..34ec0222 100644 --- a/models/cv/object_detection/yolov5s/igie/ci/prepare.sh +++ b/models/cv/object_detection/yolov5s_sample/igie/ci/prepare.sh @@ -16,20 +16,9 @@ set -x -pip3 install -r ../../ixrt_common/requirements.txt +pip3 install pycocotools onnxsim pycuda +pip3 install tqdm +pip3 install opencv-python==4.6.0.66 mkdir -p checkpoints -cp -r /mnt/deepspark/data/3rd_party/yolov5 ./ - -cd yolov5/ - -# 有一些环境需要安装 -# wget https://ultralytics.com/assets/Arial.ttf -mkdir -p /root/.config/Ultralytics -cp /mnt/deepspark/data/3rd_party/Arial.ttf /root/.config/Ultralytics/Arial.ttf - -ln -s /mnt/deepspark/data/checkpoints/yolov5s.pt ./ -# 转换为onnx (具体实现可以参考 export.py 中的 export_onnx 函数) -python3 export.py --weights yolov5s.pt --include onnx --opset 11 --batch-size 32 -mv yolov5s.onnx ../checkpoints -cd .. +ln -s /mnt/deepspark/data/datasets/corex-inference-data-4.0.0/checkpoints/yolov5s/yolov5s.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s/igie/coco_labels.py b/models/cv/object_detection/yolov5s_sample/igie/coco_labels.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/coco_labels.py rename to models/cv/object_detection/yolov5s_sample/igie/coco_labels.py diff --git a/models/cv/object_detection/yolov5s/igie/common.py b/models/cv/object_detection/yolov5s_sample/igie/common.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/common.py rename to models/cv/object_detection/yolov5s_sample/igie/common.py diff --git a/models/cv/object_detection/yolov5s/igie/config/YOLOV5S_CONFIG b/models/cv/object_detection/yolov5s_sample/igie/config/YOLOV5S_CONFIG similarity index 94% rename from models/cv/object_detection/yolov5s/igie/config/YOLOV5S_CONFIG rename to models/cv/object_detection/yolov5s_sample/igie/config/YOLOV5S_CONFIG index c3f46cf8..1330489a 100644 --- a/models/cv/object_detection/yolov5s/igie/config/YOLOV5S_CONFIG +++ b/models/cv/object_detection/yolov5s_sample/igie/config/YOLOV5S_CONFIG @@ -18,7 +18,7 @@ MODEL_INPUT_NAMES=(images) LAYER_FUSION=1 DECODER_FASTER=1 DECODER_NUM_CLASS=80 -DECODER_INPUT_NAMES=(/model.24/m.0/Conv_output_0 /model.24/m.1/Conv_output_0 /model.24/m.2/Conv_output_0) +DECODER_INPUT_NAMES=(326 364 402) DECODER_8_ANCHOR=(10 13 16 30 33 23) DECODER_16_ANCHOR=(30 61 62 45 59 119) DECODER_32_ANCHOR=(116 90 156 198 373 326) diff --git a/models/cv/object_detection/yolov5s/igie/cut_model.py b/models/cv/object_detection/yolov5s_sample/igie/cut_model.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/cut_model.py rename to models/cv/object_detection/yolov5s_sample/igie/cut_model.py diff --git a/models/cv/object_detection/yolov5s/igie/datasets/__init__.py b/models/cv/object_detection/yolov5s_sample/igie/datasets/__init__.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/datasets/__init__.py rename to models/cv/object_detection/yolov5s_sample/igie/datasets/__init__.py diff --git a/models/cv/object_detection/yolov5s/igie/datasets/coco.py b/models/cv/object_detection/yolov5s_sample/igie/datasets/coco.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/datasets/coco.py rename to models/cv/object_detection/yolov5s_sample/igie/datasets/coco.py diff --git a/models/cv/object_detection/yolov5s/igie/datasets/common.py b/models/cv/object_detection/yolov5s_sample/igie/datasets/common.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/datasets/common.py rename to models/cv/object_detection/yolov5s_sample/igie/datasets/common.py diff --git a/models/cv/object_detection/yolov5s/igie/datasets/post_process.py b/models/cv/object_detection/yolov5s_sample/igie/datasets/post_process.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/datasets/post_process.py rename to models/cv/object_detection/yolov5s_sample/igie/datasets/post_process.py diff --git a/models/cv/object_detection/yolov5s/igie/datasets/pre_process.py b/models/cv/object_detection/yolov5s_sample/igie/datasets/pre_process.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/datasets/pre_process.py rename to models/cv/object_detection/yolov5s_sample/igie/datasets/pre_process.py diff --git a/models/cv/object_detection/yolov5s/igie/datasets/vision.py b/models/cv/object_detection/yolov5s_sample/igie/datasets/vision.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/datasets/vision.py rename to models/cv/object_detection/yolov5s_sample/igie/datasets/vision.py diff --git a/models/cv/object_detection/yolov5s/igie/deploy.py b/models/cv/object_detection/yolov5s_sample/igie/deploy.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/deploy.py rename to models/cv/object_detection/yolov5s_sample/igie/deploy.py diff --git a/models/cv/object_detection/yolov5s/igie/inference.py b/models/cv/object_detection/yolov5s_sample/igie/inference.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/inference.py rename to models/cv/object_detection/yolov5s_sample/igie/inference.py diff --git a/models/cv/object_detection/yolov5s/igie/load_ixrt_plugin.py b/models/cv/object_detection/yolov5s_sample/igie/load_ixrt_plugin.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/load_ixrt_plugin.py rename to models/cv/object_detection/yolov5s_sample/igie/load_ixrt_plugin.py diff --git a/models/cv/object_detection/yolov5s/igie/modify_batchsize.py b/models/cv/object_detection/yolov5s_sample/igie/modify_batchsize.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/modify_batchsize.py rename to models/cv/object_detection/yolov5s_sample/igie/modify_batchsize.py diff --git a/models/cv/object_detection/yolov5s/igie/quant.py b/models/cv/object_detection/yolov5s_sample/igie/quant.py similarity index 93% rename from models/cv/object_detection/yolov5s/igie/quant.py rename to models/cv/object_detection/yolov5s_sample/igie/quant.py index bcf5d9b6..d73212ca 100644 --- a/models/cv/object_detection/yolov5s/igie/quant.py +++ b/models/cv/object_detection/yolov5s_sample/igie/quant.py @@ -5,6 +5,9 @@ import numpy as np from tensorrt.deploy import static_quantize import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) from calibration_dataset import create_dataloaders def setseed(seed=42): diff --git a/models/cv/object_detection/yolov5s/igie/scripts/infer_yolov5s_fp16_accuracy.sh b/models/cv/object_detection/yolov5s_sample/igie/scripts/infer_yolov5s_sample_fp16_accuracy.sh similarity index 100% rename from models/cv/object_detection/yolov5s/igie/scripts/infer_yolov5s_fp16_accuracy.sh rename to models/cv/object_detection/yolov5s_sample/igie/scripts/infer_yolov5s_sample_fp16_accuracy.sh diff --git a/models/cv/object_detection/yolov5s/igie/scripts/infer_yolov5s_fp16_performance.sh b/models/cv/object_detection/yolov5s_sample/igie/scripts/infer_yolov5s_sample_fp16_performance.sh similarity index 99% rename from models/cv/object_detection/yolov5s/igie/scripts/infer_yolov5s_fp16_performance.sh rename to models/cv/object_detection/yolov5s_sample/igie/scripts/infer_yolov5s_sample_fp16_performance.sh index c1252f16..35cc5785 100644 --- a/models/cv/object_detection/yolov5s/igie/scripts/infer_yolov5s_fp16_performance.sh +++ b/models/cv/object_detection/yolov5s_sample/igie/scripts/infer_yolov5s_sample_fp16_performance.sh @@ -12,7 +12,7 @@ check_status() # Run paraments BSZ=32 WARM_UP=3 -TGT=-1 +TGT=840 LOOP_COUNT=100 RUN_MODE=FPS PRECISION=float16 diff --git a/models/cv/object_detection/yolov5s/igie/simplify_model.py b/models/cv/object_detection/yolov5s_sample/igie/simplify_model.py similarity index 100% rename from models/cv/object_detection/yolov5s/igie/simplify_model.py rename to models/cv/object_detection/yolov5s_sample/igie/simplify_model.py diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/README.md b/models/cv/object_detection/yolov5s_sample/ixrt/README.md new file mode 100644 index 00000000..05187af8 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/README.md @@ -0,0 +1,25 @@ +# 此代码是检测网络基于coco数据集的通用实现 + + +## 推理流程(以Yolov5s为例进行说明) +在ixrt-modelzoo/executables/yolov5s路径下 + +1. 下载onnx文件、数据集 && 安装依赖包 +``` +bash init.sh +``` + +2. 执行脚本(所需的量化、build engine等步骤都包含) + +``` +bash infer_yolov5s_int8_accuracy.sh --bs 32 --tgt 0.55 +bash infer_yolov5s_int8_performance.sh --bs 32 --tgt 2000 +``` + + +## 如何添加新模型 +1. 添加模型相关配置 +ixrt-modelzoo/benchmarks/cv/detection/general_impl/trt/config/{MODELNAME_CONFIG} +2. 添加执行脚本 ++ ixrt-modelzoo/executables/{model_name}/init.sh ++ ixrt-modelzoo/executables/{model_name}/infer_{model_name}_{precision}_{task}.sh \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/build_engine.py b/models/cv/object_detection/yolov5s_sample/ixrt/build_engine.py new file mode 100644 index 00000000..d47e45e5 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/build_engine.py @@ -0,0 +1,43 @@ +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/build_nms_engine.py b/models/cv/object_detection/yolov5s_sample/ixrt/build_nms_engine.py new file mode 100644 index 00000000..25f0ab8a --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/build_nms_engine.py @@ -0,0 +1,81 @@ +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() +def create_onnx(args): + nms = helper.make_node( + "DetectionNMS_IxRT", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/calibration_dataset.py b/models/cv/object_detection/yolov5s_sample/ixrt/calibration_dataset.py new file mode 100644 index 00000000..578e013d --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/calibration_dataset.py @@ -0,0 +1,31 @@ +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader + + + +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/ci/prepare.sh b/models/cv/object_detection/yolov5s_sample/ixrt/ci/prepare.sh new file mode 100644 index 00000000..78e9936d --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/ci/prepare.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +pip3 install pycocotools onnxsim pycuda +pip3 install tqdm +pip3 install opencv-python==4.6.0.66 + +mkdir -p checkpoints +ln -s /root/data/datasets/corex-inference-data-4.0.0/checkpoints/yolov5s/yolov5s.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/coco_labels.py b/models/cv/object_detection/yolov5s_sample/ixrt/coco_labels.py new file mode 100644 index 00000000..69d38878 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/common.py b/models/cv/object_detection/yolov5s_sample/ixrt/common.py new file mode 100644 index 00000000..5f543555 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/common.py @@ -0,0 +1,86 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +import cuda.cuda as cuda +import cuda.cudart as cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert(err == cuda.CUresult.CUDA_SUCCESS) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/config/YOLOV5S_CONFIG b/models/cv/object_detection/yolov5s_sample/ixrt/config/YOLOV5S_CONFIG new file mode 100644 index 00000000..1330489a --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/config/YOLOV5S_CONFIG @@ -0,0 +1,49 @@ +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolov5s +ORIGINE_MODEL=yolov5s.onnx +DATA_PROCESS_TYPE=yolov5 +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +DECODER_INPUT_NAMES=(326 364 402) +DECODER_8_ANCHOR=(10 13 16 30 33 23) +DECODER_16_ANCHOR=(30 61 62 45 59 119) +DECODER_32_ANCHOR=(116 90 156 198 373 326) + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.6 +SCORE_THRESH=0.001 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=25200 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/cut_model.py b/models/cv/object_detection/yolov5s_sample/ixrt/cut_model.py new file mode 100644 index 00000000..af0a3a4f --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/cut_model.py @@ -0,0 +1,16 @@ +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/datasets/__init__.py b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/datasets/coco.py b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/coco.py new file mode 100644 index 00000000..7f355b84 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/datasets/common.py b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/common.py new file mode 100644 index 00000000..e120e00f --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/common.py @@ -0,0 +1,66 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/datasets/post_process.py b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/post_process.py new file mode 100644 index 00000000..a58c02f8 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/post_process.py @@ -0,0 +1,115 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/datasets/pre_process.py b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/pre_process.py new file mode 100644 index 00000000..8cc643a8 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/pre_process.py @@ -0,0 +1,56 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/datasets/vision.py b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/vision.py new file mode 100644 index 00000000..32da4a78 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/deploy.py b/models/cv/object_detection/yolov5s_sample/ixrt/deploy.py new file mode 100644 index 00000000..ec56b7ab --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/deploy.py @@ -0,0 +1,134 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:num*2+1], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + elif args.with_nms: + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + else: + graph.outputs.clear() + graph.add_output("decoder_8") + graph.outputs["decoder_8"].dtype = "FLOAT" + graph.add_output("decoder_16") + graph.outputs["decoder_16"].dtype = "FLOAT" + graph.add_output("decoder_32") + graph.outputs["decoder_32"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--with_nms", type=bool, default=False, help="engine with nms") + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/inference.py b/models/cv/object_detection/yolov5s_sample/ixrt/inference.py new file mode 100644 index 00000000..5f5452d5 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/inference.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import cuda.cuda as cuda +import cuda.cudart as cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine + engine, context = create_engine_context(config.model_engine, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + + cur_bsz_sample = batch_data.shape[0] + + # Set input + err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Forward + # start_time = time.time() + context.execute_v2(allocations) + # end_time = time.time() + # forward_time += end_time - start_time + + if config.test_mode == "MAP": + # Fetch output + err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + err, = cuda.cuMemcpyHtoD(nms_inputs[0]["allocation"], nms_input, nms_input.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + nms_context.execute_v2(nms_allocations) + err, = cuda.cuMemcpyDtoH(nms_output0, nms_outputs[0]["allocation"], nms_outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + err, = cuda.cuMemcpyDtoH(nms_output1, nms_outputs[1]["allocation"], nms_outputs[1]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + # fps = num_samples / forward_time + + if config.test_mode == "FPS": + start_time = time.time() + for i in range(config.loop_count): + context.execute_v2(allocations) + end_time = time.time() + forward_time = end_time - start_time + fps = (config.loop_count*config.bsz) / forward_time + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(10) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/load_ixrt_plugin.py b/models/cv/object_detection/yolov5s_sample/ixrt/load_ixrt_plugin.py new file mode 100644 index 00000000..932efbdf --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/load_ixrt_plugin.py @@ -0,0 +1,12 @@ +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/modify_batchsize.py b/models/cv/object_detection/yolov5s_sample/ixrt/modify_batchsize.py new file mode 100644 index 00000000..00ed65dd --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/modify_batchsize.py @@ -0,0 +1,37 @@ +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/quant.py b/models/cv/object_detection/yolov5s_sample/ixrt/quant.py new file mode 100644 index 00000000..d73212ca --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/quant.py @@ -0,0 +1,55 @@ +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_accuracy.sh b/models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_accuracy.sh new file mode 100644 index 00000000..95dbf1d0 --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_accuracy.sh @@ -0,0 +1,208 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=0.56 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV5S_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_fusion_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --with_nms True \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}_with_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_with_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_performance.sh b/models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_performance.sh new file mode 100644 index 00000000..556e529c --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/scripts/infer_yolov5s_sample_fp16_performance.sh @@ -0,0 +1,208 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=840 +LOOP_COUNT=100 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOV5S_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_fusion_no_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloV5Decoder \ + --with_nms False \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --decoder8_anchor ${DECODER_8_ANCHOR[@]} \ + --decoder16_anchor ${DECODER_16_ANCHOR[@]} \ + --decoder32_anchor ${DECODER_32_ANCHOR[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_bs${BSZ}_without_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_without_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolov5s_sample/ixrt/simplify_model.py b/models/cv/object_detection/yolov5s_sample/ixrt/simplify_model.py new file mode 100644 index 00000000..b4254b6f --- /dev/null +++ b/models/cv/object_detection/yolov5s_sample/ixrt/simplify_model.py @@ -0,0 +1,21 @@ +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/build_engine.py b/models/cv/object_detection/yolox_sample/ixrt/build_engine.py new file mode 100644 index 00000000..d47e45e5 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/build_engine.py @@ -0,0 +1,43 @@ +import os +import cv2 +import argparse +import numpy as np + +import torch +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(config.model) + + precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 + # print("precision : ", precision) + build_config.set_flag(precision) + + plan = builder.build_serialized_network(network, build_config) + engine_file_path = config.engine + with open(engine_file_path, "wb") as f: + f.write(plan) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + # engine args + parser.add_argument("--engine", type=str, default=None) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/build_nms_engine.py b/models/cv/object_detection/yolox_sample/ixrt/build_nms_engine.py new file mode 100644 index 00000000..25f0ab8a --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/build_nms_engine.py @@ -0,0 +1,81 @@ +import os +import argparse +import torch +import onnx +from onnx import helper +from onnx import TensorProto, numpy_helper +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() +def create_onnx(args): + nms = helper.make_node( + "DetectionNMS_IxRT", + name="NMS", + inputs=["nms_input"], + outputs=["nms_output0", "nms_output1"], + nMaxKeep=args.max_box_pre_img, + fIoUThresh=args.iou_thresh, + fScoreThresh=args.score_thresh + ) + graph = helper.make_graph( + nodes=[nms], + name="gpu_nms", + inputs=[ + helper.make_tensor_value_info( + "nms_input", onnx.TensorProto.FLOAT, (args.bsz, args.all_box_num, 6) + ) + ], + outputs=[ + helper.make_tensor_value_info( + "nms_output0", onnx.TensorProto.FLOAT, (args.bsz, args.max_box_pre_img, 6) + ), + helper.make_tensor_value_info( + "nms_output1", onnx.TensorProto.INT32, (args.bsz,) + ) + ], + initializer=[] + ) + + op = onnx.OperatorSetIdProto() + op.version = 13 + model = onnx.helper.make_model(graph) + + model = onnx.helper.make_model(graph, opset_imports=[op]) + onnx_path = args.path + "/nms.onnx" + onnx.save(model, onnx_path) + +def build_engine(args): + onnx_path = args.path + "/nms.onnx" + IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + builder = tensorrt.Builder(IXRT_LOGGER) + EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + network = builder.create_network(EXPLICIT_BATCH) + build_config = builder.create_builder_config() + parser = tensorrt.OnnxParser(network, IXRT_LOGGER) + parser.parse_from_file(onnx_path) + plan = builder.build_serialized_network(network, build_config) + + engine_path = args.path + "/nms.engine" + with open(engine_path, "wb") as f: + f.write(plan) + +def main(args): + create_onnx(args) + build_engine(args) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--bsz", type=int, default=1, help="batch size") + parser.add_argument("--path", type=str) + parser.add_argument("--all_box_num", type=int, default=25200) + parser.add_argument("--max_box_pre_img", type=int, default=1000) + parser.add_argument("--iou_thresh", type=float, default=0.6) + parser.add_argument("--score_thresh", type=float, default=0.001) + + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/calibration_dataset.py b/models/cv/object_detection/yolox_sample/ixrt/calibration_dataset.py new file mode 100644 index 00000000..0f39a87a --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/calibration_dataset.py @@ -0,0 +1,29 @@ +import os +import torch +import torchvision.datasets +from torch.utils.data import DataLoader + +from datasets.coco import CocoDetection + +def create_dataloaders(data_path, annFile, img_sz=640, batch_size=32, step=32, workers=2, data_process_type="yolov5"): + dataset = CocoDetection( + root=data_path, + annFile=annFile, + img_size=img_sz, + data_process_type=data_process_type + ) + calibration_dataset = dataset + num_samples = min(5000, batch_size * step) + if num_samples > 0: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/ci/prepare.sh b/models/cv/object_detection/yolox_sample/ixrt/ci/prepare.sh new file mode 100644 index 00000000..f56a16d2 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/ci/prepare.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y numactl +elif [[ ${ID} == "centos" ]]; then + yum install -y numactl +else + echo "Not Support Os" +fi + +pip3 install pycocotools onnxsim pycuda +pip3 install loguru +pip3 install tabulate +pip3 install tqdm +pip3 install opencv-python==4.6.0.66 +pip3 install simplejson + +mkdir -p checkpoints +ln -s /root/data/datasets/corex-inference-data-4.0.0/checkpoints/yolox/yolox_m_export.onnx ./checkpoints/ \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/coco_labels.py b/models/cv/object_detection/yolox_sample/ixrt/coco_labels.py new file mode 100644 index 00000000..69d38878 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/coco_labels.py @@ -0,0 +1,89 @@ +labels = [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", +] +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + +__all__ = ["labels"] diff --git a/models/cv/object_detection/yolox_sample/ixrt/common.py b/models/cv/object_detection/yolox_sample/ixrt/common.py new file mode 100644 index 00000000..5f543555 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/common.py @@ -0,0 +1,86 @@ +import numpy as np +from tqdm import tqdm + +import tensorrt +import cuda.cuda as cuda +import cuda.cudart as cudart + +# input : [bsz, box_num, 5(cx, cy, w, h, conf) + class_num(prob[0], prob[1], ...)] +# output : [bsz, box_num, 6(left_top_x, left_top_y, right_bottom_x, right_bottom_y, class_id, max_prob*conf)] +def box_class85to6(input): + center_x_y = input[:, :2] + side = input[:, 2:4] + conf = input[:, 4:5] + class_id = np.argmax(input[:, 5:], axis = -1) + class_id = class_id.astype(np.float32).reshape(-1, 1) + 1 + max_prob = np.max(input[:, 5:], axis = -1).reshape(-1, 1) + x1_y1 = center_x_y - 0.5 * side + x2_y2 = center_x_y + 0.5 * side + nms_input = np.concatenate([x1_y1, x2_y2, class_id, max_prob*conf], axis = -1) + return nms_input + +def save2json(batch_img_id, pred_boxes, json_result, class_trans): + for i, boxes in enumerate(pred_boxes): + if boxes is not None: + image_id = int(batch_img_id[i]) + # have no target + if image_id == -1: + continue + for x, y, w, h, c, p in boxes: + x, y, w, h, p = float(x), float(y), float(w), float(h), float(p) + c = int(c) + json_result.append( + { + "image_id": image_id, + "category_id": class_trans[c - 1], + "bbox": [x, y, w, h], + "score": p, + } + ) + +def create_engine_context(engine_path, logger): + with open(engine_path, "rb") as f: + runtime = tensorrt.Runtime(logger) + assert runtime + engine = runtime.deserialize_cuda_engine(f.read()) + assert engine + context = engine.create_execution_context() + assert context + + return engine, context + +def get_io_bindings(engine): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + for i in range(engine.num_bindings): + is_input = False + if engine.binding_is_input(i): + is_input = True + name = engine.get_binding_name(i) + dtype = engine.get_binding_dtype(i) + shape = engine.get_binding_shape(i) + if is_input: + batch_size = shape[0] + size = np.dtype(tensorrt.nptype(dtype)).itemsize + for s in shape: + size *= s + err, allocation = cudart.cudaMalloc(size) + assert(err == cuda.CUresult.CUDA_SUCCESS) + binding = { + "index": i, + "name": name, + "dtype": np.dtype(tensorrt.nptype(dtype)), + "shape": list(shape), + "allocation": allocation, + "nbytes": size, + } + print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") + allocations.append(allocation) + if engine.binding_is_input(i): + inputs.append(binding) + else: + outputs.append(binding) + return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/config/YOLOXM_CONFIG b/models/cv/object_detection/yolox_sample/ixrt/config/YOLOXM_CONFIG new file mode 100644 index 00000000..6da9d04e --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/config/YOLOXM_CONFIG @@ -0,0 +1,56 @@ +# BSZ : 构建engine以及推理时的batchsize +# IMGSIZE : 模型输入hw大小 +# RUN_MODE : [FPS, MAP] +# PRECISION : [float16, int8] +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件 +# COCO_GT : COCOEVAL标签文件 +# DATASET_DIR : 量化/推理数据集路径 +# CHECKPOINTS_DIR : 存放生成的onnx/engine路径 +# LAYER_FUSION : decoder部分走融合算子实现 0不融合 1融合 +# DECODER_FASTER : 有两种融合实现,faster版本速度快且可以直接对接gpu nms;另一种实现的输出和onnx保持一致. 1:faster +IMGSIZE=640 +MODEL_NAME=yolox +ORIGINE_MODEL=yolox_m_export.onnx +DATA_PROCESS_TYPE=yolox +MODEL_INPUT_NAMES=(images) + +LAYER_FUSION=1 +DECODER_FASTER=1 +DECODER_NUM_CLASS=80 +# nx4x80x80 nx1x80x80 nx80x80x80 +DECODER0_INPUT_NAMES=(1041 1042 1032) +# nx4x40x40 nx1x40x40 nx80x40x40 +DECODER1_INPUT_NAMES=(1067 1068 1058) +# nx4x20x20 nx1x20x20 nx80x20x20 +DECODER2_INPUT_NAMES=(1093 1094 1084) + +# Fuse Focus +FOCUS_INPUT_EDGE=images +FOCUS_OUTPUT_EDGE=input +FOCUS_LAST_NODE=Concat_40 + +# NMS CONFIG + # IOU_THRESH : iou阈值 + # SCORE_THRESH : bbox置信度阈值 + # MAX_BOX_PRE_IMG : 每张图片预测bbox的数量上限 + # ALL_BOX_NUM : nms接收每张图片的box数量 + # NMS_TYPE : GPU/CPU(TODO) +IOU_THRESH=0.65 +SCORE_THRESH=0.01 +MAX_BOX_PRE_IMG=1000 +ALL_BOX_NUM=8400 +NMS_TYPE=GPU + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST=() +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/cut_model.py b/models/cv/object_detection/yolox_sample/ixrt/cut_model.py new file mode 100644 index 00000000..af0a3a4f --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/cut_model.py @@ -0,0 +1,16 @@ +import onnx +import argparse +from onnxsim import simplify + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--input_model", type=str) + parser.add_argument("--output_model", type=str) + parser.add_argument("--input_names", nargs='+', type=str) + parser.add_argument("--output_names", nargs='+', type=str) + args = parser.parse_args() + return args + +args = parse_args() +onnx.utils.extract_model(args.input_model, args.output_model, args.input_names, args.output_names) +print(" Cut Model Done.") \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/datasets/__init__.py b/models/cv/object_detection/yolox_sample/ixrt/datasets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/models/cv/object_detection/yolox_sample/ixrt/datasets/coco.py b/models/cv/object_detection/yolox_sample/ixrt/datasets/coco.py new file mode 100644 index 00000000..7f355b84 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/datasets/coco.py @@ -0,0 +1,116 @@ +import os.path +from typing import Any, Callable, List, Optional, Tuple + +import cv2 + +from .vision import VisionDataset +from .pre_process import get_post_process +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + img_size: int, + data_process_type: str, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None, + + ) -> None: + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.img_size = img_size + + self.transforms = get_post_process(data_process_type) + + def _load_image(self, id: int): + path = self.coco.loadImgs(id)[0]["file_name"] + data = cv2.imread(os.path.join(self.root, path)) + return data + + def _load_target(self, id: int) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id)) + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + id = self.ids[index] + image = self._load_image(id) + target = self._load_target(id) + origin_shape = image.shape[:2] + + if self.transforms is not None: + image = self.transforms(image, self.img_size) + + if len(target) > 0: + image_id = target[0]["image_id"] + else: + # have no target + image_id = -1 + return image, origin_shape, image_id + + def __len__(self) -> int: + return len(self.ids) + + +class CocoCaptions(CocoDetection): + """`MS Coco Captions `_ Dataset. + + It requires the `COCO API to be installed `_. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.PILToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.PILToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def _load_target(self, id: int) -> List[str]: + return [ann["caption"] for ann in super()._load_target(id)] diff --git a/models/cv/object_detection/yolox_sample/ixrt/datasets/common.py b/models/cv/object_detection/yolox_sample/ixrt/datasets/common.py new file mode 100644 index 00000000..e120e00f --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/datasets/common.py @@ -0,0 +1,66 @@ +import cv2 +import math +import numpy as np + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + +def scale_boxes(net_shape, boxes, ori_shape, use_letterbox=False): + # Rescale boxes (xyxy) from net_shape to ori_shape + + if use_letterbox: + + gain = min( + net_shape[0] / ori_shape[0], net_shape[1] / ori_shape[1] + ) # gain = new / old + pad = (net_shape[1] - ori_shape[1] * gain) / 2, ( + net_shape[0] - ori_shape[0] * gain + ) / 2.0 + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + else: + x_scale, y_scale = net_shape[1] / ori_shape[1], net_shape[0] / ori_shape[0] + + boxes[:, 0] /= x_scale + boxes[:, 1] /= y_scale + boxes[:, 2] /= x_scale + boxes[:, 3] /= y_scale + + clip_boxes(boxes, ori_shape) + return boxes + +def clip_boxes(boxes, shape): + + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/datasets/post_process.py b/models/cv/object_detection/yolox_sample/ixrt/datasets/post_process.py new file mode 100644 index 00000000..a58c02f8 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/datasets/post_process.py @@ -0,0 +1,115 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox, scale_boxes, clip_boxes + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Postprocess + elif data_process_type == "yolov3": + return Yolov3Postprocess + elif data_process_type == "yolox": + return YoloxPostprocess + return None + +def Yolov3Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=False + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def Yolov5Postprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + cur_box = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + boxes = scale_boxes( + (imgsz[0], imgsz[1]), + cur_box, + (ori_img_shape[0][i], ori_img_shape[1][i]), + use_letterbox=True + ) + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box + +def YoloxPostprocess( + ori_img_shape, + imgsz, + box_datas, + box_nums, + sample_num, + max_det=1000, +): + all_box = [] + data_offset = 0 + box_datas = box_datas.flatten() + box_nums = box_nums.flatten() + + for i in range(sample_num): + box_num = box_nums[i] + if box_num == 0: + boxes = None + else: + boxes = box_datas[data_offset : data_offset + box_num * 6].reshape(-1, 6) + r = min(imgsz[0]/ori_img_shape[0][i], imgsz[1]/ori_img_shape[1][i]) + boxes[:, :4] /= r + # xyxy2xywh + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + clip_boxes(boxes, (ori_img_shape[0][i], ori_img_shape[1][i])) + + all_box.append(boxes) + data_offset += max_det * 6 + + return all_box \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/datasets/pre_process.py b/models/cv/object_detection/yolox_sample/ixrt/datasets/pre_process.py new file mode 100644 index 00000000..8cc643a8 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/datasets/pre_process.py @@ -0,0 +1,56 @@ +import cv2 +import math +import numpy as np + +from .common import letterbox + +def get_post_process(data_process_type): + if data_process_type == "yolov5": + return Yolov5Preprocess + elif data_process_type == "yolov3": + return Yolov3Preprocess + elif data_process_type == "yolox": + return YoloxPreprocess + return None + +def Yolov3Preprocess(image, img_size): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + image = cv2.resize(image, (img_size, img_size)) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def Yolov5Preprocess(image, img_size, augment=False): + + h0, w0 = image.shape[:2] # orig hw + r = img_size / max(h0, w0) # ratio + + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA + image = cv2.resize(image, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) + + # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size rect == True + + image, ratio, dwdh = letterbox(image, new_shape=img_size, auto=False, scaleup=False) + image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + image = np.ascontiguousarray(image).astype(np.float32) / 255.0 # 0~1 np array + return image + +def YoloxPreprocess(img, img_size, swap=(2,0,1)): + + padded_img = np.ones((img_size, img_size, 3), dtype=np.uint8) * 114 + r = min(img_size / img.shape[0], img_size / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.uint8) + + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + + return padded_img \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/datasets/vision.py b/models/cv/object_detection/yolox_sample/ixrt/datasets/vision.py new file mode 100644 index 00000000..32da4a78 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/datasets/vision.py @@ -0,0 +1,136 @@ +import os +from typing import Any, Callable, List, Optional, Tuple + +import torch +import torch.utils.data as data + +from types import FunctionType + +def _log_api_usage_once(obj: Any) -> None: + + """ + Logs API usage(module and name) within an organization. + In a large ecosystem, it's often useful to track the PyTorch and + TorchVision APIs usage. This API provides the similar functionality to the + logging module in the Python stdlib. It can be used for debugging purpose + to log which methods are used and by default it is inactive, unless the user + manually subscribes a logger via the `SetAPIUsageLogger method `_. + Please note it is triggered only once for the same API call within a process. + It does not collect any data from open-source users since it is no-op by default. + For more information, please refer to + * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; + * Logging policy: https://github.com/pytorch/vision/issues/5052; + + Args: + obj (class instance or method): an object to extract info from. + """ + module = obj.__module__ + if not module.startswith("torchvision"): + module = f"torchvision.internal.{module}" + name = obj.__class__.__name__ + if isinstance(obj, FunctionType): + name = obj.__name__ + torch._C._log_api_usage_once(f"{module}.{name}") + +class VisionDataset(data.Dataset): + """ + Base Class For making datasets which are compatible with torchvision. + It is necessary to override the ``__getitem__`` and ``__len__`` method. + + Args: + root (string): Root directory of dataset. + transforms (callable, optional): A function/transforms that takes in + an image and a label and returns the transformed versions of both. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + + .. note:: + + :attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive. + """ + + _repr_indent = 4 + + def __init__( + self, + root: str, + transforms: Optional[Callable] = None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + ) -> None: + _log_api_usage_once(self) + if isinstance(root, str): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index: int) -> Any: + """ + Args: + index (int): Index + + Returns: + (Any): Sample and meta data, optionally transformed by the respective transforms. + """ + raise NotImplementedError + + def __len__(self) -> int: + raise NotImplementedError + + def __repr__(self) -> str: + head = "Dataset " + self.__class__.__name__ + body = [f"Number of datapoints: {self.__len__()}"] + if self.root is not None: + body.append(f"Root location: {self.root}") + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def extra_repr(self) -> str: + return "" + + +class StandardTransform: + def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None: + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]: + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform: Callable, head: str) -> List[str]: + lines = transform.__repr__().splitlines() + return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]] + + def __repr__(self) -> str: + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, "Target transform: ") + + return "\n".join(body) diff --git a/models/cv/object_detection/yolox_sample/ixrt/deploy.py b/models/cv/object_detection/yolox_sample/ixrt/deploy.py new file mode 100644 index 00000000..668b3420 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/deploy.py @@ -0,0 +1,135 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- +import argparse +from tensorrt.deploy.api import GraphTransform, create_source, create_target + +class Transform: + def __init__(self, graph): + self.t = GraphTransform(graph) + self.graph = graph + + def ReplaceFocus(self, input_edge, outputs, to_op): + input_var = self.graph.get_variable(input_edge) + op = self.graph.get_operator(to_op) + self.t.delete_operators_between_var_op( + from_var=input_var, to_op=op + ) + self.t.make_operator( + "Focus", inputs=input_edge, outputs=outputs + ) + return self.graph + + def AddYoloDecoderOp(self, inputs: list, outputs: list, op_type, **attributes): + if attributes["anchor"] is None: + del attributes["anchor"] + print("AddYoloDecoderOp:", ) + self.t.make_operator( + op_type, inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + + def AddConcatOp(self, inputs: list, outputs, **attributes): + self.t.make_operator( + "Concat", inputs=inputs, outputs=outputs, **attributes + ) + return self.graph + +def customize_ops(graph, args): + t = Transform(graph) + fuse_focus = args.focus_input is not None and args.focus_output is not None and args.focus_last_node is not None + if fuse_focus: + graph = t.ReplaceFocus( + input_edge=args.focus_input, + outputs=args.focus_output, + to_op=args.focus_last_node + ) + decoder_input = args.decoder_input_names + num = len(decoder_input) // 3 + graph = t.AddYoloDecoderOp( + inputs=decoder_input[:num], + outputs=["decoder_8"], + op_type=args.decoder_type, + anchor=args.decoder8_anchor, + num_class=args.num_class, + stride=8, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num:num*2], + outputs=["decoder_16"], + op_type=args.decoder_type, + anchor=args.decoder16_anchor, + num_class=args.num_class, + stride=16, + faster_impl=args.faster + ) + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2:], + outputs=["decoder_32"], + op_type=args.decoder_type, + anchor=args.decoder32_anchor, + num_class=args.num_class, + stride=32, + faster_impl=args.faster + ) + if args.decoder64_anchor is not None: + graph = t.AddYoloDecoderOp( + inputs=decoder_input[num*2+1:], + outputs=["decoder_64"], + op_type=args.decoder_type, + anchor=args.decoder64_anchor, + num_class=args.num_class, + stride=64, + faster_impl=args.faster + ) + graph = t.AddConcatOp( + inputs=["decoder_8", "decoder_16", "decoder_32", "decoder_64"], + outputs=["output"], + axis=1 + ) + elif args.with_nms: + graph = t.AddConcatOp( + inputs=["decoder_32", "decoder_16", "decoder_8"], + outputs=["output"], + axis=1 + ) + + graph.outputs.clear() + graph.add_output("output") + graph.outputs["output"].dtype = "FLOAT" + else: + graph.outputs.clear() + graph.add_output("decoder_8") + graph.outputs["decoder_8"].dtype = "FLOAT" + graph.add_output("decoder_16") + graph.outputs["decoder_16"].dtype = "FLOAT" + graph.add_output("decoder_32") + graph.outputs["decoder_32"].dtype = "FLOAT" + return graph + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--src", type=str) + parser.add_argument("--dst", type=str) + parser.add_argument("--decoder_type", type=str, choices=["YoloV3Decoder", "YoloV5Decoder", "YoloV7Decoder", "YoloxDecoder"]) + parser.add_argument("--with_nms", type=bool, default=False, help="engine with nms") + parser.add_argument("--decoder_input_names", nargs='+', type=str) + parser.add_argument("--decoder8_anchor", nargs='*', type=int) + parser.add_argument("--decoder16_anchor", nargs='*', type=int) + parser.add_argument("--decoder32_anchor", nargs='*', type=int) + parser.add_argument("--decoder64_anchor", nargs='*', type=int, default=None) + parser.add_argument("--num_class", type=int, default=80) + parser.add_argument("--faster", type=int, default=1) + parser.add_argument("--focus_input", type=str, default=None) + parser.add_argument("--focus_output", type=str, default=None) + parser.add_argument("--focus_last_node", type=str, default=None) + args = parser.parse_args() + return args + +if __name__ == "__main__": + + args = parse_args() + graph = create_source(args.src)() + graph = customize_ops(graph, args) + create_target(saved_path=args.dst).export(graph) + print("Surged onnx lies on", args.dst) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/inference.py b/models/cv/object_detection/yolox_sample/ixrt/inference.py new file mode 100644 index 00000000..1860545a --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/inference.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse +import glob +import json +import os +import time +import sys + +import torch +import numpy as np +import cuda.cuda as cuda +import cuda.cudart as cudart + +from coco_labels import coco80_to_coco91_class, labels +from common import save2json, box_class85to6 +from common import create_engine_context, get_io_bindings +from calibration_dataset import create_dataloaders +from datasets.post_process import get_post_process + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tqdm import tqdm +from tqdm.contrib import tzip + +import tensorrt + +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin() + +def main(config): + + # Load dataloader + dataloader = create_dataloaders( + data_path=config.eval_dir, + annFile=config.coco_gt, + img_sz=config.imgsz, + batch_size=config.bsz, + step=config.loop_count, + data_process_type=config.data_process_type + ) + + # Load post process func + if config.test_mode == "MAP": + post_process_func = get_post_process(config.data_process_type) + + bsz = config.bsz + num_samples = 5000 + if config.loop_count > 0: + num_samples = bsz * config.loop_count + num_batch = len(dataloader) + print("=" * 30) + print(f"Test Mode : {'Asynchronous' if config.use_async else 'Synchronous'}") + print(f"Total sample : {num_samples}\nBatch_size : {bsz}\nRun Batch : {num_batch}") + print("=" * 30) + + json_result = [] + forward_time = 0.0 + class_map = coco80_to_coco91_class() + + host_mem = tensorrt.IHostMemory + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + + # Load Engine + engine, context = create_engine_context(config.model_engine, logger) + inputs, outputs, allocations = get_io_bindings(engine) + + # Load nms_engine + if config.test_mode == "MAP" and config.nms_type == "GPU": + nms_engine, nms_context = create_engine_context(config.nms_engine, logger) + nms_inputs, nms_outputs, nms_allocations = get_io_bindings(nms_engine) + nms_output0 = np.zeros(nms_outputs[0]["shape"], nms_outputs[0]["dtype"]) + nms_output1 = np.zeros(nms_outputs[1]["shape"], nms_outputs[1]["dtype"]) + print(f"nms_output0 shape : {nms_output0.shape} nms_output0 type : {nms_output0.dtype}") + print(f"nms_output1 shape : {nms_output1.shape} nms_output1 type : {nms_output1.dtype}") + + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + context.execute_v2(allocations) + print("Warm Done.") + + # Prepare the output data + output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) + print(f"output shape : {output.shape} output type : {output.dtype}") + + for batch_data, batch_img_shape, batch_img_id in tqdm(dataloader): + batch_data = batch_data.numpy() + batch_img_shape = [batch_img_shape[0].numpy(), batch_img_shape[1].numpy()] + # batch_img_id = batch_img_id.numpy() + + cur_bsz_sample = batch_data.shape[0] + + # Set input + err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Forward + # start_time = time.time() + context.execute_v2(allocations) + # end_time = time.time() + # forward_time += end_time - start_time + + if config.test_mode == "MAP": + # Fetch output + err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + + # Step 1 : prepare data to nms + _, box_num, box_unit = output.shape + if config.debug: + print(f"[Debug] box_num(25200) : {box_num}, box_unit(6) : {box_unit}") + + if config.decoder_faster == 0: + nms_input = box_class85to6(output.reshape(-1, box_unit)) + else: + nms_input = output + + # Step 2 : nms + # cpu nms(TODO) + + # gpu nms + if config.nms_type == "GPU": + + # Set nms input + err, = cuda.cuMemcpyHtoD(nms_inputs[0]["allocation"], nms_input, nms_input.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + nms_context.execute_v2(nms_allocations) + err, = cuda.cuMemcpyDtoH(nms_output0, nms_outputs[0]["allocation"], nms_outputs[0]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + err, = cuda.cuMemcpyDtoH(nms_output1, nms_outputs[1]["allocation"], nms_outputs[1]["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + # Step 3 : post process + save + pred_boxes = post_process_func( + ori_img_shape=batch_img_shape, + imgsz=(config.imgsz, config.imgsz), + box_datas=nms_output0, + box_nums=nms_output1, + sample_num=cur_bsz_sample, + max_det=config.max_det + ) + save2json(batch_img_id, pred_boxes, json_result, class_map) + + # fps = num_samples / forward_time + + if config.test_mode == "FPS": + start_time = time.time() + for i in range(config.loop_count): + context.execute_v2(allocations) + end_time = time.time() + forward_time = end_time - start_time + fps = (config.loop_count*config.bsz) / forward_time + print("FPS : ", fps) + print(f"Performance Check : Test {fps} >= target {config.fps_target}") + if fps >= config.fps_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + + if config.test_mode == "MAP": + if len(json_result) == 0: + print("Predict zero box!") + exit(10) + + if not os.path.exists(config.pred_dir): + os.makedirs(config.pred_dir) + + pred_json = os.path.join( + config.pred_dir, f"{config.model_name}_{config.precision}_preds.json" + ) + with open(pred_json, "w") as f: + json.dump(json_result, f) + + anno_json = config.coco_gt + anno = COCO(anno_json) # init annotations api + pred = anno.loadRes(pred_json) # init predictions api + eval = COCOeval(anno, pred, "bbox") + + eval.evaluate() + eval.accumulate() + print( + f"==============================eval {config.model_name} {config.precision} coco map ==============================" + ) + eval.summarize() + + map, map50 = eval.stats[:2] + print("MAP@0.5 : ", map50) + print(f"Accuracy Check : Test {map50} >= target {config.map_target}") + if map50 >= config.map_target: + print("pass!") + exit() + else: + print("failed!") + exit(10) + +def parse_config(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_name", type=str, default="YOLOV5s", help="YOLOV3 YOLOV5 YOLOV7 YOLOX" + ) + parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", + help="The precision of datatype") + parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") + parser.add_argument( + "--model_engine", + type=str, + default="", + help="model engine path", + ) + parser.add_argument( + "--nms_engine", + type=str, + default="", + help="nms engine path", + ) + parser.add_argument( + "--coco_gt", + type=str, + default="data/datasets/cv/coco2017/annotations/instances_val2017.json", + help="coco instances_val2017.json", + ) + parser.add_argument("--warm_up", type=int, default=3, help="warm_up count") + parser.add_argument("--loop_count", type=int, default=-1, help="loop count") + parser.add_argument( + "--eval_dir", + type=str, + default="data/datasets/cv/coco2017/val2017", + help="coco image dir", + ) + parser.add_argument("--bsz", type=int, default=32, help="test batch size") + parser.add_argument( + "--imgsz", + "--img", + "--img-size", + type=int, + default=640, + help="inference size h,w", + ) + parser.add_argument("--max_det", type=int, default=1000, help="maximum detections per image") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--use_async", action="store_true") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--pred_dir", type=str, default=".", help="pred save json dirs") + parser.add_argument("--map_target", type=float, default=0.56, help="target mAP") + parser.add_argument("--fps_target", type=float, default=-1.0, help="target fps") + parser.add_argument("--decoder_faster", type=int, default=0, help="decoder faster can use gpu nms directly") + parser.add_argument("--nms_type", type=str, default="GPU", help="GPU/CPU") + + config = parser.parse_args() + print("config:", config) + return config + +if __name__ == "__main__": + config = parse_config() + main(config) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/load_ixrt_plugin.py b/models/cv/object_detection/yolox_sample/ixrt/load_ixrt_plugin.py new file mode 100644 index 00000000..932efbdf --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/load_ixrt_plugin.py @@ -0,0 +1,12 @@ +import ctypes +import tensorrt +from os.path import join, dirname, exists +def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path) + tensorrt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/modify_batchsize.py b/models/cv/object_detection/yolox_sample/ixrt/modify_batchsize.py new file mode 100644 index 00000000..00ed65dd --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/modify_batchsize.py @@ -0,0 +1,37 @@ +import onnx +import argparse + +def change_input_dim(model, bsz): + batch_size = bsz + + # The following code changes the first dimension of every input to be batch_size + # Modify as appropriate ... note that this requires all inputs to + # have the same batch_size + inputs = model.graph.input + for input in inputs: + # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. + # Add checks as needed. + dim1 = input.type.tensor_type.shape.dim[0] + # update dim to be a symbolic value + if isinstance(batch_size, str): + # set dynamic batch size + dim1.dim_param = batch_size + elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): + # set given batch size + dim1.dim_value = int(batch_size) + else: + # set batch size of 1 + dim1.dim_value = 1 + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--batch_size", type=int) + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +model = onnx.load(args.origin_model) +change_input_dim(model, args.batch_size) +onnx.save(model, args.output_model) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/quant.py b/models/cv/object_detection/yolox_sample/ixrt/quant.py new file mode 100644 index 00000000..d73212ca --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/quant.py @@ -0,0 +1,55 @@ +import os +import random +import argparse +import numpy as np +from tensorrt.deploy import static_quantize + +import torch +import sys +sys.path.append("/home/haoyuan.chen/temp/inferencesamples/benchmarks/cv/detection/yolov3/tensorrt") +print(sys.path) +from calibration_dataset import create_dataloaders + +def setseed(seed=42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str) + parser.add_argument("--model", type=str, default="yolov5s_with_decoder.onnx") + parser.add_argument("--data_process_type", type=str, default="none") + parser.add_argument("--dataset_dir", type=str, default="./coco2017/val2017") + parser.add_argument("--ann_file", type=str, default="./coco2017/annotations/instances_val2017.json") + parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") + parser.add_argument("--disable_quant_names", nargs='*', type=str) + parser.add_argument("--save_dir", type=str, help="save path", default=None) + parser.add_argument("--bsz", type=int, default=32) + parser.add_argument("--step", type=int, default=20) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--imgsz", type=int, default=640) + args = parser.parse_args() + return args + +args = parse_args() +setseed(args.seed) +model_name = args.model_name + +out_dir = args.save_dir +dataloader = create_dataloaders( + data_path=args.dataset_dir, + annFile=args.ann_file, + img_sz=args.imgsz, + batch_size=args.bsz, + step=args.step, + data_process_type=args.data_process_type +) +# print("disable_quant_names : ", args.disable_quant_names) +static_quantize(args.model, + calibration_dataloader=dataloader, + save_quant_onnx_path=os.path.join(out_dir, f"quantized_{model_name}.onnx"), + observer=args.observer, + data_preprocess=lambda x: x[0].to("cuda"), + quant_format="qdq", + disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_accuracy.sh b/models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_accuracy.sh new file mode 100644 index 00000000..162f5ab8 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_accuracy.sh @@ -0,0 +1,209 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=-1 +TGT=0.645 +LOOP_COUNT=-1 +RUN_MODE=MAP +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOXM_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +DECODER_INPUT_NAMES=("${DECODER0_INPUT_NAMES[@]}" "${DECODER1_INPUT_NAMES[@]}" "${DECODER2_INPUT_NAMES[@]}") +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}}_quant_fusion_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloxDecoder \ + --with_nms True \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} \ + --focus_input images_DequantizeLinear_Output \ + --focus_output ${FOCUS_OUTPUT_EDGE} \ + --focus_last_node ${FOCUS_LAST_NODE} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_with_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_with_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --map_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_performance.sh b/models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_performance.sh new file mode 100644 index 00000000..f31f12b7 --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/scripts/infer_yolox_sample_int8_performance.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +EXIT_STATUS=0 +check_status() +{ + ret_code=${PIPESTATUS[0]} + if [ ${ret_code} != 0 ]; then + [[ ${ret_code} -eq 10 && "${TEST_PERF:-1}" -eq 0 ]] || EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +WARM_UP=3 +TGT=540 +CPU_AFFINITY=$(ixsmi topo -m|grep "^GPU0" |awk '{print $(NF-1)}') +LOOP_COUNT=100 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +PROJ_DIR=./ +DATASETS_DIR="${PROJ_DIR}/coco" +COCO_GT=${DATASETS_DIR}/annotations/instances_val2017.json +EVAL_DIR=${DATASETS_DIR}/images/val2017 +CHECKPOINTS_DIR="${PROJ_DIR}/checkpoints" +RUN_DIR="${PROJ_DIR}" +CONFIG_DIR="${RUN_DIR}/config/YOLOXM_CONFIG" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +CHECKPOINTS_DIR=${CHECKPOINTS_DIR}/tmp +mkdir -p ${CHECKPOINTS_DIR} + +step=0 +faster=0 +CURRENT_MODEL=${ORIGINE_MODEL} +if [[ ${LAYER_FUSION} == 1 && ${DECODER_FASTER} == 1 ]];then + faster=1 +fi + +# Simplify Model +let step++ +echo [STEP ${step}] : Simplify Model +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model skip, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi +CURRENT_MODEL=${SIM_MODEL} + +# Cut Decoder +let step++ +echo [STEP ${step}] : Cut Decoder +NO_DECODER_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_without_decoder.onnx +DECODER_INPUT_NAMES=("${DECODER0_INPUT_NAMES[@]}" "${DECODER1_INPUT_NAMES[@]}" "${DECODER2_INPUT_NAMES[@]}") +if [ -f ${NO_DECODER_MODEL} ];then + echo " "Cut Decoder skip, ${SIM_MNO_DECODER_MODELODEL} has been existed +else + python3 ${RUN_DIR}/cut_model.py \ + --input_model ${CURRENT_MODEL} \ + --output_model ${NO_DECODER_MODEL} \ + --input_names ${MODEL_INPUT_NAMES[@]} \ + --output_names ${DECODER_INPUT_NAMES[@]} +fi +CURRENT_MODEL=${NO_DECODER_MODEL} + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + CURRENT_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${CURRENT_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${EVAL_DIR} \ + --ann_file ${COCO_GT} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + echo " "Generate ${QUANT_EXIST_ONNX} + fi + CURRENT_MODEL=${QUANT_EXIST_ONNX} +fi + +# Add Decoder +if [ $LAYER_FUSION == "1" ]; then + let step++ + echo; + echo [STEP ${step}] : Add Decoder + FUSION_ONNX=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_fusion_no_cancat.onnx + if [ -f $FUSION_ONNX ];then + echo " "Add Decoder Skip, $FUSION_ONNX has been existed + else + python3 ${RUN_DIR}/deploy.py \ + --src ${CURRENT_MODEL} \ + --dst ${FUSION_ONNX} \ + --decoder_type YoloxDecoder \ + --with_nms False \ + --decoder_input_names ${DECODER_INPUT_NAMES[@]} \ + --num_class ${DECODER_NUM_CLASS} \ + --faster ${faster} \ + --focus_input images_DequantizeLinear_Output \ + --focus_output ${FOCUS_OUTPUT_EDGE} \ + --focus_last_node ${FOCUS_LAST_NODE} + fi + CURRENT_MODEL=${FUSION_ONNX} +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_bs${BSZ}_without_nms.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py \ + --batch_size ${BSZ} \ + --origin_model ${CURRENT_MODEL} \ + --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi +CURRENT_MODEL=${FINAL_MODEL} + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}_without_nms.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${CURRENT_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi +if [[ ${RUN_MODE} == "MAP" && ${NMS_TYPE} == "GPU" ]];then + NMS_ENGINE=${CHECKPOINTS_DIR}/nms.engine + # Build NMS Engine + python3 ${RUN_DIR}/build_nms_engine.py \ + --bsz ${BSZ} \ + --path ${CHECKPOINTS_DIR} \ + --all_box_num ${ALL_BOX_NUM} \ + --max_box_pre_img ${MAX_BOX_PRE_IMG} \ + --iou_thresh ${IOU_THRESH} \ + --score_thresh ${SCORE_THRESH} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +numactl --physcpubind=${CPU_AFFINITY} python3 ${RUN_DIR}/inference.py \ + --model_engine=${ENGINE_FILE} \ + --nms_engine=${NMS_ENGINE} \ + --coco_gt=${COCO_GT} \ + --eval_dir=${EVAL_DIR} \ + --data_process_type ${DATA_PROCESS_TYPE} \ + --decoder_faster=${faster} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --model_name ${MODEL_NAME} \ + --precision ${PRECISION} \ + --pred_dir ${CHECKPOINTS_DIR} \ + --fps_target ${TGT} \ + --max_det ${MAX_BOX_PRE_IMG} \ + --nms_type ${NMS_TYPE} \ + --bsz ${BSZ}; check_status +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/object_detection/yolox_sample/ixrt/simplify_model.py b/models/cv/object_detection/yolox_sample/ixrt/simplify_model.py new file mode 100644 index 00000000..b4254b6f --- /dev/null +++ b/models/cv/object_detection/yolox_sample/ixrt/simplify_model.py @@ -0,0 +1,21 @@ +import onnx +import argparse +from onnxsim import simplify + +# Simplify +def simplify_model(args): + onnx_model = onnx.load(args.origin_model) + model_simp, check = simplify(onnx_model) + model_simp = onnx.shape_inference.infer_shapes(model_simp) + onnx.save(model_simp, args.output_model) + print(" Simplify onnx Done.") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--origin_model", type=str) + parser.add_argument("--output_model", type=str) + args = parser.parse_args() + return args + +args = parse_args() +simplify_model(args) \ No newline at end of file diff --git a/tests/model_info.json b/tests/model_info.json index d03ee48d..7f8d4165 100644 --- a/tests/model_info.json +++ b/tests/model_info.json @@ -8834,8 +8834,8 @@ "demoType": "" }, { - "display_name": "YOLOv5s", - "model_name": "yolov5s", + "display_name": "YOLOv5s_Sample", + "model_name": "yolov5s_sample", "framework": "igie", "release_version": "25.12", "release_sdk": "4.3.0", @@ -8847,8 +8847,8 @@ "mdims": "", "dataset": "", "license": "", - "model_path": "models/cv/object_detection/yolov5s/igie/", - "readme_file": "models/cv/object_detection/yolov5s/igie/README.md", + "model_path": "models/cv/object_detection/yolov5s_sample/igie/", + "readme_file": "models/cv/object_detection/yolov5s_sample/igie/README.md", "bitbucket_repo": "", "bitbucket_branch": "", "bitbucket_path": "", @@ -9197,6 +9197,72 @@ "type": "inference", "hasDemo": false, "demoType": "" + }, + { + "display_name": "YOLOX_Sample", + "model_name": "yolox_sample", + "framework": "ixrt", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.3.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolox_sample/ixrt/", + "readme_file": "models/cv/object_detection/yolox_sample/ixrt/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth", + "need_third_part": true, + "precisions": [ + "int8" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, + { + "display_name": "YOLOv5s_Sample", + "model_name": "yolov5s_sample", + "framework": "ixrt", + "release_version": "25.12", + "release_sdk": "4.3.0", + "release_gpgpu": "MR-V100", + "latest_sdk": "4.3.0", + "latest_gpgpu": "", + "category": "cv/object_detection", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/object_detection/yolov5s_sample/ixrt/", + "readme_file": "models/cv/object_detection/yolov5s_sample/ixrt/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "local/coco", + "download_url": "https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt", + "need_third_part": true, + "precisions": [ + "fp16" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" } ] } \ No newline at end of file -- Gitee From 5979414e08417936992c4d9b28fb1b3885283678 Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Tue, 30 Dec 2025 11:35:38 +0800 Subject: [PATCH 17/19] update ixrt yolox int8 precision --- models/cv/object_detection/yolox_sample/ixrt/build_engine.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/models/cv/object_detection/yolox_sample/ixrt/build_engine.py b/models/cv/object_detection/yolox_sample/ixrt/build_engine.py index d47e45e5..63936d60 100644 --- a/models/cv/object_detection/yolox_sample/ixrt/build_engine.py +++ b/models/cv/object_detection/yolox_sample/ixrt/build_engine.py @@ -22,6 +22,9 @@ def main(config): # print("precision : ", precision) build_config.set_flag(precision) + if config.precision == "int8": + build_config.set_flag(tensorrt.BuilderFlag.FP16) + plan = builder.build_serialized_network(network, build_config) engine_file_path = config.engine with open(engine_file_path, "wb") as f: -- Gitee From 4d3a5194fc8ab7fba1f752512422b690ee560ac8 Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Tue, 30 Dec 2025 14:28:19 +0800 Subject: [PATCH 18/19] fix igie yolov8n ultralytics compatible --- models/cv/object_detection/yolov8n/igie/inference.py | 11 +++++++++-- models/cv/object_detection/yolov8n/igie/quantize.py | 6 ++++++ .../cv/object_detection/yolov8n/igie/requirements.txt | 3 +-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/models/cv/object_detection/yolov8n/igie/inference.py b/models/cv/object_detection/yolov8n/igie/inference.py index be549137..4ca50b00 100644 --- a/models/cv/object_detection/yolov8n/igie/inference.py +++ b/models/cv/object_detection/yolov8n/igie/inference.py @@ -150,6 +150,12 @@ class IGIEValidator(DetectionValidator): return stats + def preprocess(self, batch): + """Preprocess without PyTorch device transfer (for TVM).""" + if 'img' in batch: + batch['img'] = batch['img'].float() / 255.0 + return batch + def init_metrics(self): """Initialize evaluation metrics for YOLO.""" val = self.data.get(self.args.split, '') # validation path @@ -159,10 +165,11 @@ class IGIEValidator(DetectionValidator): self.names = self.data['names'] self.nc = len(self.names) self.metrics.names = self.names - self.confusion_matrix = ConfusionMatrix(nc=80) + self.confusion_matrix = ConfusionMatrix(names=self.names) self.seen = 0 self.jdict = [] - self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[]) + self.end2end = False + self.is_lvis = isinstance(val, str) and "lvis" in val and not self.is_coco # is LVIS def main(): args = parse_args() diff --git a/models/cv/object_detection/yolov8n/igie/quantize.py b/models/cv/object_detection/yolov8n/igie/quantize.py index 5a874788..8a4c42f7 100644 --- a/models/cv/object_detection/yolov8n/igie/quantize.py +++ b/models/cv/object_detection/yolov8n/igie/quantize.py @@ -105,6 +105,12 @@ class PreProcessDatasets(DetectionValidator): return datasets + def preprocess(self, batch): + """Preprocess without PyTorch device transfer (for TVM).""" + if 'img' in batch: + batch['img'] = batch['img'].float() / 255.0 + return batch + class CalibrationDataset(torch.utils.data.Dataset): def __init__(self, datasets): self.datasets = datasets diff --git a/models/cv/object_detection/yolov8n/igie/requirements.txt b/models/cv/object_detection/yolov8n/igie/requirements.txt index d69fe4dc..5cc8ec9c 100644 --- a/models/cv/object_detection/yolov8n/igie/requirements.txt +++ b/models/cv/object_detection/yolov8n/igie/requirements.txt @@ -1,6 +1,5 @@ tqdm onnx pycocotools -# FAILed in 8.2.51 -ultralytics==8.1.34 +ultralytics opencv-python -- Gitee From 6f20669b37d944ae3b5e79aebeb9d79fea96080d Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Tue, 30 Dec 2025 17:34:09 +0800 Subject: [PATCH 19/19] update ixrt yolov8 ultralytics version --- models/cv/object_detection/yolov8n/ixrt/inference.py | 11 +++++++++-- .../cv/object_detection/yolov8n/ixrt/requirements.txt | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/models/cv/object_detection/yolov8n/ixrt/inference.py b/models/cv/object_detection/yolov8n/ixrt/inference.py index 81725723..df501749 100644 --- a/models/cv/object_detection/yolov8n/ixrt/inference.py +++ b/models/cv/object_detection/yolov8n/ixrt/inference.py @@ -205,6 +205,12 @@ class IxRT_Validator(DetectionValidator): return stats + def preprocess(self, batch): + """Preprocess without PyTorch device transfer.""" + if 'img' in batch: + batch['img'] = batch['img'].float() / 255.0 + return batch + def init_metrics(self): """Initialize evaluation metrics for YOLO.""" val = self.data.get(self.args.split, '') # validation path @@ -214,10 +220,11 @@ class IxRT_Validator(DetectionValidator): self.names = self.data['names'] self.nc = len(self.names) self.metrics.names = self.names - self.confusion_matrix = ConfusionMatrix(nc=80) + self.confusion_matrix = ConfusionMatrix(names=self.names) self.seen = 0 self.jdict = [] - self.stats = dict(tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[]) + self.end2end = False + self.is_lvis = isinstance(val, str) and "lvis" in val and not self.is_coco # is LVIS def main(): config = parse_args() diff --git a/models/cv/object_detection/yolov8n/ixrt/requirements.txt b/models/cv/object_detection/yolov8n/ixrt/requirements.txt index 346fe1c7..eb001fe6 100644 --- a/models/cv/object_detection/yolov8n/ixrt/requirements.txt +++ b/models/cv/object_detection/yolov8n/ixrt/requirements.txt @@ -3,5 +3,5 @@ onnx onnxsim pycocotools opencv-python==4.8.0.74 -ultralytics==8.2.51 +ultralytics cuda-python \ No newline at end of file -- Gitee