From 3a84da1d2cd3683e4fa66dfc8a7546d4cd8221ee Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Mon, 28 Apr 2025 13:05:17 +0800 Subject: [PATCH 1/2] refine ixrt common --- .../cv/classification/alexnet/ixrt/README.md | 8 +- .../alexnet/ixrt/build_engine.py | 52 ---- .../alexnet/ixrt/calibration_dataset.py | 112 ------- .../classification/alexnet/ixrt/ci/prepare.sh | 4 +- .../cv/classification/alexnet/ixrt/common.py | 78 ----- .../alexnet/ixrt/export_onnx.py | 45 --- .../classification/alexnet/ixrt/inference.py | 160 ---------- .../alexnet/ixrt/modify_batchsize.py | 56 ---- .../cv/classification/alexnet/ixrt/quant.py | 58 ---- .../alexnet/ixrt/refine_model.py | 290 ----------------- .../alexnet/ixrt/refine_utils/common.py | 36 --- .../alexnet/ixrt/refine_utils/linear_pass.py | 113 ------- .../alexnet/ixrt/requirements.txt | 5 - .../alexnet/ixrt/simplify_model.py | 40 --- .../convnext_base/ixrt/README.md | 11 +- .../convnext_base/ixrt/build_engine.py | 52 ---- .../convnext_base/ixrt/calibration_dataset.py | 112 ------- .../convnext_base/ixrt/ci/prepare.sh | 30 ++ .../convnext_base/ixrt/common.py | 81 ----- .../convnext_base/ixrt/export.py | 61 ---- .../convnext_base/ixrt/inference.py | 171 ---------- .../convnext_base/ixrt/modify_batchsize.py | 56 ---- .../convnext_base/ixrt/requirements.txt | 7 - .../infer_convnext_base_fp16_accuracy.sh | 5 - .../infer_convnext_base_fp16_performance.sh | 5 - .../convnext_small/ixrt/README.md | 11 +- .../convnext_small/ixrt/build_engine.py | 52 ---- .../ixrt/calibration_dataset.py | 112 ------- .../convnext_small/ixrt/ci/prepare.sh | 5 +- .../convnext_small/ixrt/common.py | 81 ----- .../convnext_small/ixrt/export.py | 61 ---- .../convnext_small/ixrt/modify_batchsize.py | 56 ---- .../convnext_small/ixrt/requirements.txt | 7 - .../infer_convnext_small_fp16_accuracy.sh | 5 - .../infer_convnext_small_fp16_performance.sh | 5 - .../convnext_small/ixrt/simplify_model.py | 40 --- .../cspdarknet53/ixrt/README.md | 11 +- .../cspdarknet53/ixrt/build_engine.py | 52 ---- .../cspdarknet53/ixrt/build_i8_engine.py | 113 ------- .../cspdarknet53/ixrt/calibration_dataset.py | 112 ------- .../cspdarknet53/ixrt/ci/prepare.sh | 3 +- .../cspdarknet53/ixrt/common.py | 80 ----- .../cspdarknet53/ixrt/inference.py | 162 ---------- .../classification/cspdarknet53/ixrt/quant.py | 167 ---------- .../ixrt/refine_utils/__init__.py | 0 .../ixrt/refine_utils/linear_pass.py | 113 ------- .../ixrt/refine_utils/matmul_to_gemm_pass.py | 54 ---- .../cspdarknet53/ixrt/requirements.txt | 7 - .../infer_cspdarknet53_fp16_accuracy.sh | 67 +++- .../infer_cspdarknet53_fp16_performance.sh | 67 +++- .../infer_cspdarknet53_int8_accuracy.sh | 46 ++- .../infer_cspdarknet53_int8_performance.sh | 47 ++- .../classification/cspresnet50/ixrt/README.md | 10 +- .../cspresnet50/ixrt/build_engine.py | 53 ---- .../cspresnet50/ixrt/calibration_dataset.py | 112 ------- .../cspresnet50/ixrt/ci/prepare.sh | 3 +- .../classification/cspresnet50/ixrt/common.py | 79 ----- .../cspresnet50/ixrt/inference.py | 160 ---------- .../cspresnet50/ixrt/modify_batchsize.py | 57 ---- .../classification/cspresnet50/ixrt/quant.py | 59 ---- .../cspresnet50/ixrt/refine_model.py | 291 ------------------ .../cspresnet50/ixrt/requirements.txt | 8 - .../cspresnet50/ixrt/simplify_model.py | 41 --- .../classification/deit_tiny/ixrt/README.md | 18 +- .../deit_tiny/ixrt/build_engine.py | 52 ---- .../deit_tiny/ixrt/calibration_dataset.py | 112 ------- .../deit_tiny/ixrt/ci/prepare.sh | 33 ++ .../classification/deit_tiny/ixrt/common.py | 81 ----- .../classification/deit_tiny/ixrt/export.py | 2 +- .../deit_tiny/ixrt/inference.py | 171 ---------- .../deit_tiny/ixrt/modify_batchsize.py | 56 ---- .../deit_tiny/ixrt/requirements.txt | 7 - .../scripts/infer_deit_tiny_fp16_accuracy.sh | 5 - .../infer_deit_tiny_fp16_performance.sh | 5 - .../deit_tiny/ixrt/simplify_model.py | 40 --- .../classification/densenet121/ixrt/README.md | 11 +- .../densenet121/ixrt/build_engine.py | 109 ------- .../densenet121/ixrt/calibration_dataset.py | 112 ------- .../densenet121/ixrt/ci/prepare.sh | 6 +- .../classification/densenet121/ixrt/common.py | 78 ----- .../densenet121/ixrt/export_onnx.py | 43 --- .../densenet121/ixrt/inference.py | 160 ---------- .../densenet121/ixrt/modify_batchsize.py | 56 ---- .../classification/densenet121/ixrt/quant.py | 166 ---------- .../densenet121/ixrt/requirements.txt | 6 - .../densenet121/ixrt/simplify_model.py | 40 --- .../classification/densenet161/ixrt/README.md | 11 +- .../densenet161/ixrt/build_engine.py | 61 ---- .../densenet161/ixrt/calibration_dataset.py | 112 ------- .../densenet161/ixrt/ci/prepare.sh | 6 +- .../classification/densenet161/ixrt/common.py | 80 ----- .../densenet161/ixrt/inference.py | 175 ----------- .../densenet161/ixrt/modify_batchsize.py | 56 ---- .../densenet161/ixrt/requirements.txt | 5 - .../infer_densenet161_fp16_accuracy.sh | 5 - .../infer_densenet161_fp16_performance.sh | 5 - .../densenet161/ixrt/simplify_model.py | 40 --- .../classification/densenet169/ixrt/README.md | 11 +- .../densenet169/ixrt/build_engine.py | 61 ---- .../densenet169/ixrt/calibration_dataset.py | 112 ------- .../densenet169/ixrt/ci/prepare.sh | 6 +- .../classification/densenet169/ixrt/common.py | 80 ----- .../densenet169/ixrt/inference.py | 175 ----------- .../densenet169/ixrt/modify_batchsize.py | 56 ---- .../densenet169/ixrt/requirements.txt | 5 - .../infer_densenet169_fp16_accuracy.sh | 5 - .../infer_densenet169_fp16_performance.sh | 5 - .../densenet169/ixrt/simplify_model.py | 40 --- .../classification/densenet201/ixrt/README.md | 11 +- .../densenet201/ixrt/build_engine.py | 52 ---- .../densenet201/ixrt/calibration_dataset.py | 112 ------- .../densenet201/ixrt/ci/prepare.sh | 30 ++ .../classification/densenet201/ixrt/common.py | 81 ----- .../densenet201/ixrt/inference.py | 171 ---------- .../densenet201/ixrt/modify_batchsize.py | 56 ---- .../densenet201/ixrt/requirements.txt | 7 - .../infer_densenet201_fp16_accuracy.sh | 5 - .../infer_densenet201_fp16_performance.sh | 5 - .../densenet201/ixrt/simplify_model.py | 40 --- .../efficientnet_b0/ixrt/README.md | 9 +- .../efficientnet_b0/ixrt/build_engine.py | 113 ------- .../ixrt/build_engine_by_write_qparams.py | 116 ------- .../ixrt/calibration_dataset.py | 115 ------- .../efficientnet_b0/ixrt/ci/prepare.sh | 5 +- .../efficientnet_b0/ixrt/common.py | 82 ----- .../efficientnet_b0/ixrt/export_onnx.py | 48 --- .../efficientnet_b0/ixrt/inference.py | 164 ---------- .../efficientnet_b0/ixrt/quant.py | 147 --------- .../efficientnet_b0/ixrt/requirements.txt | 7 - .../infer_efficientnet_b0_fp16_accuracy.sh | 150 +++++++-- .../infer_efficientnet_b0_fp16_performance.sh | 152 +++++++-- .../infer_efficientnet_b0_int8_accuracy.sh | 160 ++++++++-- .../infer_efficientnet_b0_int8_performance.sh | 160 ++++++++-- .../efficientnet_b1/ixrt/README.md | 10 +- .../efficientnet_b1/ixrt/build_engine.py | 53 ---- .../ixrt/calibration_dataset.py | 113 ------- .../efficientnet_b1/ixrt/ci/prepare.sh | 6 +- .../efficientnet_b1/ixrt/common.py | 79 ----- .../efficientnet_b1/ixrt/export_onnx.py | 43 --- .../efficientnet_b1/ixrt/inference.py | 160 ---------- .../efficientnet_b1/ixrt/modify_batchsize.py | 57 ---- .../efficientnet_b1/ixrt/quant.py | 59 ---- .../efficientnet_b1/ixrt/refine_model.py | 291 ------------------ .../ixrt/refine_utils/__init__.py | 0 .../ixrt/refine_utils/common.py | 37 --- .../ixrt/refine_utils/linear_pass.py | 114 ------- .../ixrt/refine_utils/matmul_to_gemm_pass.py | 55 ---- .../efficientnet_b1/ixrt/requirements.txt | 5 - .../efficientnet_b1/ixrt/simplify_model.py | 41 --- .../efficientnet_b2/ixrt/README.md | 11 +- .../efficientnet_b2/ixrt/build_engine.py | 52 ---- .../ixrt/calibration_dataset.py | 113 ------- .../efficientnet_b2/ixrt/ci/prepare.sh | 6 +- .../efficientnet_b2/ixrt/common.py | 81 ----- .../efficientnet_b2/ixrt/export.py | 61 ---- .../efficientnet_b2/ixrt/inference.py | 176 ----------- .../efficientnet_b2/ixrt/modify_batchsize.py | 57 ---- .../efficientnet_b2/ixrt/requirements.txt | 4 - .../infer_efficientnet_b2_fp16_accuracy.sh | 5 - .../infer_efficientnet_b2_fp16_performance.sh | 5 - .../efficientnet_b2/ixrt/simplify_model.py | 41 --- .../efficientnet_b3/ixrt/README.md | 11 +- .../efficientnet_b3/ixrt/build_engine.py | 52 ---- .../ixrt/calibration_dataset.py | 113 ------- .../efficientnet_b3/ixrt/ci/prepare.sh | 30 ++ .../efficientnet_b3/ixrt/common.py | 81 ----- .../efficientnet_b3/ixrt/export.py | 61 ---- .../efficientnet_b3/ixrt/inference.py | 172 ----------- .../efficientnet_b3/ixrt/modify_batchsize.py | 57 ---- .../efficientnet_b3/ixrt/requirements.txt | 4 - .../infer_efficientnet_b3_fp16_accuracy.sh | 5 - .../infer_efficientnet_b3_fp16_performance.sh | 5 - .../efficientnet_b3/ixrt/simplify_model.py | 41 --- .../efficientnet_v2/ixrt/README.md | 14 +- .../efficientnet_v2/ixrt/_builder.py | 0 .../efficientnet_v2/ixrt/build_engine.py | 106 ------- .../efficientnet_v2/ixrt/build_i8_engine.py | 113 ------- .../ixrt/calibration_dataset.py | 113 ------- .../efficientnet_v2/ixrt/ci/prepare.sh | 17 +- .../efficientnet_v2/ixrt/common.py | 79 ----- .../efficientnet_v2/ixrt/export_onnx.py | 0 .../efficientnet_v2/ixrt/inference.py | 162 ---------- .../efficientnet_v2/ixrt/modify_batchsize.py | 57 ---- .../efficientnet_v2/ixrt/quant.py | 167 ---------- .../efficientnet_v2/ixrt/refine_model.py | 291 ------------------ .../efficientnet_v2/ixrt/requirements.txt | 8 - .../infer_efficientnet_v2_int8_accuracy.sh | 74 ++--- .../infer_efficientnet_v2_int8_performance.sh | 76 ++--- .../efficientnet_v2/ixrt/simplify_model.py | 41 --- .../efficientnet_v2_s/ixrt/README.md | 11 +- .../efficientnet_v2_s/ixrt/build_engine.py | 52 ---- .../ixrt/calibration_dataset.py | 113 ------- .../efficientnet_v2_s/ixrt/ci/prepare.sh | 30 ++ .../efficientnet_v2_s/ixrt/common.py | 81 ----- .../efficientnet_v2_s/ixrt/export.py | 61 ---- .../efficientnet_v2_s/ixrt/inference.py | 172 ----------- .../ixrt/modify_batchsize.py | 57 ---- .../efficientnet_v2_s/ixrt/requirements.txt | 2 - .../infer_efficientnet_v2_s_fp16_accuracy.sh | 5 - ...fer_efficientnet_v2_s_fp16_performance.sh} | 5 - .../efficientnet_v2_s/ixrt/simplify_model.py | 41 --- .../efficientnetv2_rw_t/ixrt/README.md | 12 +- .../efficientnetv2_rw_t/ixrt/build_engine.py | 52 ---- .../ixrt/calibration_dataset.py | 112 ------- .../efficientnetv2_rw_t/ixrt/ci/prepare.sh | 31 ++ .../efficientnetv2_rw_t/ixrt/common.py | 81 ----- .../efficientnetv2_rw_t/ixrt/inference.py | 187 ----------- .../ixrt/modify_batchsize.py | 56 ---- .../efficientnetv2_rw_t/ixrt/requirements.txt | 8 - ...infer_efficientnetv2_rw_t_fp16_accuracy.sh | 5 - ...er_efficientnetv2_rw_t_fp16_performance.sh | 5 - .../ixrt/simplify_model.py | 40 --- .../classification/googlenet/ixrt/README.md | 8 +- .../googlenet/ixrt/build_engine.py | 52 ---- .../googlenet/ixrt/calibration_dataset.py | 112 ------- .../googlenet/ixrt/ci/prepare.sh | 4 +- .../classification/googlenet/ixrt/common.py | 78 ----- .../googlenet/ixrt/export_onnx.py | 45 --- .../googlenet/ixrt/inference.py | 160 ---------- .../googlenet/ixrt/modify_batchsize.py | 56 ---- .../cv/classification/googlenet/ixrt/quant.py | 58 ---- .../googlenet/ixrt/refine_utils/__init__.py | 0 .../googlenet/ixrt/refine_utils/common.py | 36 --- .../ixrt/refine_utils/linear_pass.py | 113 ------- .../ixrt/refine_utils/matmul_to_gemm_pass.py | 54 ---- .../googlenet/ixrt/requirements.txt | 5 - .../googlenet/ixrt/simplify_model.py | 40 --- .../classification/hrnet_w18/ixrt/README.md | 12 +- .../hrnet_w18/ixrt/build_engine.py | 109 ------- .../hrnet_w18/ixrt/build_i8_engine.py | 38 --- .../hrnet_w18/ixrt/calibration_dataset.py | 111 ------- .../hrnet_w18/ixrt/ci/prepare.sh | 5 +- .../classification/hrnet_w18/ixrt/common.py | 78 ----- .../hrnet_w18/ixrt/config/HRNET_W18_CONFIG | 33 -- .../hrnet_w18/ixrt/inference.py | 159 ---------- .../hrnet_w18/ixrt/modify_batchsize.py | 56 ---- .../hrnet_w18/ixrt/quant_qdq.py | 59 ---- .../hrnet_w18/ixrt/requirements.txt | 9 - .../scripts/infer_hrnet_w18_fp16_accuracy.sh | 32 +- .../infer_hrnet_w18_fp16_performance.sh | 32 +- .../scripts/infer_hrnet_w18_int8_accuracy.sh | 73 +++-- .../infer_hrnet_w18_int8_performance.sh | 72 ++--- .../hrnet_w18/ixrt/simplify_model.py | 40 --- .../inception_resnet_v2/ixrt/README.md | 18 +- .../inception_resnet_v2/ixrt/build_engine.py | 106 ------- .../ixrt/build_i8_engine.py | 112 ------- .../ixrt/calibration_dataset.py | 164 ---------- .../inception_resnet_v2/ixrt/ci/prepare.sh | 5 +- .../inception_resnet_v2/ixrt/common.py | 79 ----- .../inception_resnet_v2/ixrt/inference.py | 163 ---------- .../ixrt/modify_batchsize.py | 57 ---- .../inception_resnet_v2/ixrt/quant.py | 170 ---------- .../inception_resnet_v2/ixrt/requirements.txt | 11 - ...nfer_inception_resnet_v2_fp16_accuracy.sh} | 14 - ...r_inception_resnet_v2_fp16_performance.sh} | 14 - ...nfer_inception_resnet_v2_int8_accuracy.sh} | 18 +- ...r_inception_resnet_v2_int8_performance.sh} | 18 +- .../infer_inceptionresnetv2_int8_accuracy.sh | 145 --------- ...nfer_inceptionresnetv2_int8_performance.sh | 144 --------- .../ixrt/simplify_model.py | 41 --- .../inception_v3/ixrt/README.md | 9 +- .../inception_v3/ixrt/build_engine.py | 106 ------- .../inception_v3/ixrt/build_i8_engine.py | 113 ------- .../inception_v3/ixrt/calibration_dataset.py | 113 ------- .../inception_v3/ixrt/ci/prepare.sh | 4 +- .../inception_v3/ixrt/common.py | 79 ----- .../inception_v3/ixrt/inference.py | 162 ---------- .../inception_v3/ixrt/modify_batchsize.py | 57 ---- .../classification/inception_v3/ixrt/quant.py | 167 ---------- .../inception_v3/ixrt/refine_model.py | 291 ------------------ .../inception_v3/ixrt/requirements.txt | 7 - .../infer_inception_v3_int8_accuracy.sh | 28 +- .../infer_inception_v3_int8_performance.sh | 75 ++--- .../inception_v3/ixrt/simplify_model.py | 41 --- .../cv/classification/ixrt_common/README.md | 1 + .../ixrt => ixrt_common}/build_engine.py | 64 ++-- .../calibration_dataset.py | 110 ++++++- .../ixrt => ixrt_common}/common.py | 72 ++++- .../config/ALEXNET_CONFIG | 14 - .../config/CONVNEXT_BASE_CONFIG} | 0 .../config/CONVNEXT_SMALL_CONFIG} | 4 +- .../config/CSPDARKNET53_CONFIG | 0 .../config/CSPRESNET50_CONFIG | 15 - .../config/DEIT_TINY_CONFIG | 0 .../config/DENSENET121_CONFIG | 0 .../config/DENSENET161_CONFIG} | 0 .../config/DENSENET169_CONFIG} | 2 +- .../config/DENSENET201_CONFIG | 0 .../config/EFFICIENTNETV2_RW_T_CONFIG} | 0 .../config/EFFICIENTNET_B0_CONFIG | 0 .../config/EFFICIENTNET_B1_CONFIG | 0 .../config/EFFICIENTNET_B2_CONFIG | 0 .../config/EFFICIENTNET_B3_CONFIG | 0 .../config/EFFICIENTNET_V2_CONFIG | 0 .../config/EFFICIENTNET_V2_S_CONFIG} | 0 .../config/GOOGLENET_CONFIG | 0 .../config/HRNET_W18_CONFIG} | 20 +- .../config/INCEPTION_RESNET_V2_CONFIG} | 19 +- .../config/INCEPTION_V3_CONFIG} | 5 +- .../ixrt_common/config/MOBILENET_V2_CONFIG | 19 ++ .../config/MOBILENET_V3_CONFIG | 0 .../ixrt => ixrt_common}/config/REPVGG_CONFIG | 0 .../ixrt_common/config/REPVIT_CONFIG | 19 ++ .../config/RES2NET50_CONFIG | 0 .../config/RESNET101_CONFIG | 0 .../config/RESNET18_CONFIG | 0 .../config/RESNET34_CONFIG | 0 .../config/RESNET50_CONFIG | 0 .../config/RESNETV1D50_CONFIG | 2 +- .../config/RESNEXT101_32X8D_CONFIG | 0 .../config/RESNEXT101_64X4D_CONFIG | 0 .../config/RESNEXT50_32X4D_CONFIG} | 0 .../config/SHUFFLENETV2_X0_5_CONFIG} | 0 .../config/SHUFFLENETV2_X1_0_CONFIG | 0 .../config/SHUFFLENETV2_X1_5_CONFIG | 0 .../config/SHUFFLENETV2_X2_0_CONFIG | 0 .../config/SHUFFLENET_V1_CONFIG | 0 .../config/SHUFFLENET_V1_CONFIG copy | 19 ++ .../config/SQUEEZENET_V1_0_CONFIG | 0 .../config/SQUEEZENET_V1_1_CONFIG | 0 .../ixrt => ixrt_common}/config/VGG16_CONFIG | 0 .../config/WIDE_RESNET50_CONFIG | 0 .../ixrt => ixrt_common}/export.py | 14 +- .../ixrt => ixrt_common}/inference.py | 161 ++++++---- .../ixrt_common/load_ixrt_plugin.py | 13 + .../ixrt => ixrt_common}/modify_batchsize.py | 27 +- .../{res2net50/ixrt => ixrt_common}/quant.py | 14 - .../ixrt => ixrt_common}/refine_model.py | 22 +- .../refine_utils/__init__.py | 0 .../refine_utils/common.py | 14 - .../refine_utils/linear_pass.py | 14 - .../refine_utils/matmul_to_gemm_pass.py | 14 - .../ixrt => ixrt_common}/requirements.txt | 5 +- .../ixrt => ixrt_common}/simplify_model.py | 14 - .../mobilenet_v2/ixrt/README.md | 13 +- .../mobilenet_v2/ixrt/build_engine.py | 113 ------- .../ixrt/build_engine_by_write_qparams.py | 116 ------- .../mobilenet_v2/ixrt/ci/prepare.sh | 4 +- .../mobilenet_v2/ixrt/common.py | 82 ----- .../mobilenet_v2/ixrt/export_onnx.py | 47 --- .../mobilenet_v2/ixrt/inference.py | 164 ---------- .../mobilenet_v2/ixrt/modify_batchsize.py | 55 ---- .../classification/mobilenet_v2/ixrt/quant.py | 149 --------- .../mobilenet_v2/ixrt/requirements.txt | 6 - .../infer_mobilenet_v2_fp16_accuracy.sh | 70 +++-- .../infer_mobilenet_v2_fp16_performance.sh | 75 +++-- .../infer_mobilenet_v2_int8_accuracy.sh | 96 +++--- .../infer_mobilenet_v2_int8_performance.sh | 97 +++--- .../mobilenet_v2/ixrt/simplify_model.py | 38 --- .../mobilenet_v3/ixrt/README.md | 8 +- .../mobilenet_v3/ixrt/build_engine.py | 52 ---- .../mobilenet_v3/ixrt/calibration_dataset.py | 112 ------- .../mobilenet_v3/ixrt/ci/prepare.sh | 4 +- .../mobilenet_v3/ixrt/common.py | 78 ----- .../mobilenet_v3/ixrt/export_onnx.py | 47 --- .../mobilenet_v3/ixrt/inference.py | 160 ---------- .../mobilenet_v3/ixrt/modify_batchsize.py | 56 ---- .../mobilenet_v3/ixrt/requirements.txt | 5 - .../mobilenet_v3/ixrt/simplify_model.py | 40 --- .../cv/classification/repvgg/ixrt/README.md | 8 +- .../repvgg/ixrt/calibration_dataset.py | 112 ------- .../classification/repvgg/ixrt/ci/prepare.sh | 3 +- .../cv/classification/repvgg/ixrt/common.py | 79 ----- .../classification/repvgg/ixrt/inference.py | 160 ---------- .../repvgg/ixrt/modify_batchsize.py | 57 ---- models/cv/classification/repvgg/ixrt/quant.py | 59 ---- .../repvgg/ixrt/refine_model.py | 291 ------------------ .../repvgg/ixrt/requirements.txt | 8 - .../repvgg/ixrt/simplify_model.py | 41 --- .../classification/res2net50/ixrt/README.md | 6 +- .../res2net50/ixrt/build_engine.py | 52 ---- .../res2net50/ixrt/calibration_dataset.py | 112 ------- .../res2net50/ixrt/ci/prepare.sh | 2 +- .../classification/res2net50/ixrt/common.py | 78 ----- .../res2net50/ixrt/inference.py | 160 ---------- .../res2net50/ixrt/modify_batchsize.py | 56 ---- .../res2net50/ixrt/refine_model.py | 290 ----------------- .../res2net50/ixrt/refine_utils/__init__.py | 0 .../res2net50/ixrt/refine_utils/common.py | 36 --- .../ixrt/refine_utils/matmul_to_gemm_pass.py | 54 ---- .../res2net50/ixrt/requirements.txt | 5 - .../res2net50/ixrt/simplify_model.py | 40 --- .../classification/resnet101/ixrt/README.md | 10 +- .../resnet101/ixrt/build_engine.py | 53 ---- .../resnet101/ixrt/calibration_dataset.py | 113 ------- .../resnet101/ixrt/ci/prepare.sh | 6 +- .../classification/resnet101/ixrt/common.py | 79 ----- .../resnet101/ixrt/export_onnx.py | 43 --- .../resnet101/ixrt/inference.py | 160 ---------- .../resnet101/ixrt/modify_batchsize.py | 57 ---- .../cv/classification/resnet101/ixrt/quant.py | 59 ---- .../resnet101/ixrt/refine_model.py | 291 ------------------ .../resnet101/ixrt/refine_utils/__init__.py | 0 .../resnet101/ixrt/refine_utils/common.py | 37 --- .../ixrt/refine_utils/linear_pass.py | 114 ------- .../ixrt/refine_utils/matmul_to_gemm_pass.py | 55 ---- .../resnet101/ixrt/requirements.txt | 5 - .../resnet101/ixrt/simplify_model.py | 41 --- .../cv/classification/resnet18/ixrt/README.md | 8 +- .../resnet18/ixrt/build_engine.py | 53 ---- .../resnet18/ixrt/calibration_dataset.py | 113 ------- .../resnet18/ixrt/ci/prepare.sh | 4 +- .../cv/classification/resnet18/ixrt/common.py | 79 ----- .../resnet18/ixrt/export_onnx.py | 45 --- .../classification/resnet18/ixrt/inference.py | 161 ---------- .../resnet18/ixrt/modify_batchsize.py | 57 ---- .../cv/classification/resnet18/ixrt/quant.py | 59 ---- .../resnet18/ixrt/refine_model.py | 291 ------------------ .../resnet18/ixrt/refine_utils/__init__.py | 0 .../resnet18/ixrt/refine_utils/common.py | 37 --- .../resnet18/ixrt/refine_utils/linear_pass.py | 114 ------- .../ixrt/refine_utils/matmul_to_gemm_pass.py | 55 ---- .../resnet18/ixrt/requirements.txt | 5 - .../resnet18/ixrt/simplify_model.py | 41 --- .../cv/classification/resnet34/ixrt/README.md | 10 +- .../resnet34/ixrt/build_engine.py | 52 ---- .../resnet34/ixrt/calibration_dataset.py | 112 ------- .../resnet34/ixrt/ci/prepare.sh | 6 +- .../cv/classification/resnet34/ixrt/common.py | 78 ----- .../resnet34/ixrt/export_onnx.py | 43 --- .../classification/resnet34/ixrt/inference.py | 160 ---------- .../resnet34/ixrt/modify_batchsize.py | 56 ---- .../cv/classification/resnet34/ixrt/quant.py | 58 ---- .../resnet34/ixrt/refine_model.py | 290 ----------------- .../resnet34/ixrt/refine_utils/__init__.py | 0 .../resnet34/ixrt/refine_utils/common.py | 36 --- .../resnet34/ixrt/refine_utils/linear_pass.py | 113 ------- .../ixrt/refine_utils/matmul_to_gemm_pass.py | 54 ---- .../resnet34/ixrt/requirements.txt | 5 - .../resnet34/ixrt/simplify_model.py | 40 --- .../cv/classification/resnet50/ixrt/README.md | 9 +- .../resnet50/ixrt/build_engine.py | 109 ------- .../resnet50/ixrt/build_i8_engine.py | 112 ------- .../resnet50/ixrt/calibration_dataset.py | 112 ------- .../resnet50/ixrt/ci/prepare.sh | 4 +- .../cv/classification/resnet50/ixrt/common.py | 78 ----- .../resnet50/ixrt/export_onnx.py | 45 --- .../classification/resnet50/ixrt/inference.py | 160 ---------- .../resnet50/ixrt/modify_batchsize.py | 56 ---- .../cv/classification/resnet50/ixrt/quant.py | 166 ---------- .../resnet50/ixrt/refine_model.py | 290 ----------------- .../resnet50/ixrt/refine_utils/__init__.py | 0 .../resnet50/ixrt/refine_utils/common.py | 36 --- .../resnet50/ixrt/refine_utils/linear_pass.py | 113 ------- .../ixrt/refine_utils/matmul_to_gemm_pass.py | 54 ---- .../resnet50/ixrt/requirements.txt | 6 - .../scripts/infer_resnet50_fp16_accuracy.sh | 32 +- .../infer_resnet50_fp16_performance.sh | 30 +- .../scripts/infer_resnet50_int8_accuracy.sh | 80 ++--- .../infer_resnet50_int8_performance.sh | 80 +++-- .../resnet50/ixrt/simplify_model.py | 40 --- .../classification/resnetv1d50/ixrt/README.md | 10 +- .../resnetv1d50/ixrt/build_engine.py | 109 ------- .../resnetv1d50/ixrt/build_i8_engine.py | 112 ------- .../resnetv1d50/ixrt/calibration_dataset.py | 112 ------- .../resnetv1d50/ixrt/ci/prepare.sh | 3 +- .../classification/resnetv1d50/ixrt/common.py | 78 ----- .../resnetv1d50/ixrt/inference.py | 159 ---------- .../resnetv1d50/ixrt/modify_batchsize.py | 57 ---- .../classification/resnetv1d50/ixrt/quant.py | 166 ---------- .../resnetv1d50/ixrt/refine_model.py | 290 ----------------- .../resnetv1d50/ixrt/refine_utils/__init__.py | 0 .../resnetv1d50/ixrt/refine_utils/common.py | 36 --- .../ixrt/refine_utils/linear_pass.py | 113 ------- .../ixrt/refine_utils/matmul_to_gemm_pass.py | 54 ---- .../resnetv1d50/ixrt/requirements.txt | 9 - .../infer_resnetv1d50_fp16_accuracy.sh | 32 +- .../infer_resnetv1d50_fp16_performance.sh | 32 +- .../infer_resnetv1d50_int8_accuracy.sh | 79 +++-- .../infer_resnetv1d50_int8_performance.sh | 78 ++--- .../resnetv1d50/ixrt/simplify_model.py | 40 --- .../resnext101_32x8d/ixrt/README.md | 17 +- .../resnext101_32x8d/ixrt/build_engine.py | 52 ---- .../ixrt/calibration_dataset.py | 113 ------- .../resnext101_32x8d/ixrt/ci/prepare.sh | 5 +- .../resnext101_32x8d/ixrt/export.py | 61 ---- .../resnext101_32x8d/ixrt/inference.py | 176 ----------- .../resnext101_32x8d/ixrt/modify_batchsize.py | 57 ---- .../resnext101_32x8d/ixrt/requirements.txt | 8 - .../infer_resnext101_32x8d_fp16_accuracy.sh | 5 - ...infer_resnext101_32x8d_fp16_performance.sh | 5 - .../resnext101_32x8d/ixrt/simplify_model.py | 41 --- .../resnext101_64x4d/ixrt/README.md | 17 +- .../resnext101_64x4d/ixrt/build_engine.py | 52 ---- .../ixrt/calibration_dataset.py | 113 ------- .../resnext101_64x4d/ixrt/ci/prepare.sh | 5 +- .../resnext101_64x4d/ixrt/common.py | 81 ----- .../resnext101_64x4d/ixrt/export.py | 61 ---- .../resnext101_64x4d/ixrt/inference.py | 176 ----------- .../resnext101_64x4d/ixrt/modify_batchsize.py | 57 ---- .../resnext101_64x4d/ixrt/requirements.txt | 8 - .../infer_resnext101_64x4d_fp16_accuracy.sh | 5 - ...infer_resnext101_64x4d_fp16_performance.sh | 5 - .../resnext101_64x4d/ixrt/simplify_model.py | 41 --- .../resnext50_32x4d/ixrt/README.md | 17 +- .../resnext50_32x4d/ixrt/build_engine.py | 52 ---- .../ixrt/calibration_dataset.py | 113 ------- .../resnext50_32x4d/ixrt/ci/prepare.sh | 5 +- .../resnext50_32x4d/ixrt/common.py | 81 ----- .../resnext50_32x4d/ixrt/export.py | 61 ---- .../resnext50_32x4d/ixrt/inference.py | 176 ----------- .../resnext50_32x4d/ixrt/modify_batchsize.py | 57 ---- .../resnext50_32x4d/ixrt/requirements.txt | 8 - .../infer_resnext50_32x4d_fp16_accuracy.sh | 5 - .../infer_resnext50_32x4d_fp16_performance.sh | 5 - .../resnext50_32x4d/ixrt/simplify_model.py | 41 --- .../shufflenet_v1/ixrt/README.md | 8 +- .../shufflenet_v1/ixrt/build_engine.py | 53 ---- .../shufflenet_v1/ixrt/calibration_dataset.py | 112 ------- .../shufflenet_v1/ixrt/ci/prepare.sh | 3 +- .../shufflenet_v1/ixrt/common.py | 79 ----- .../shufflenet_v1/ixrt/inference.py | 161 ---------- .../shufflenet_v1/ixrt/modify_batchsize.py | 57 ---- .../shufflenet_v1/ixrt/quant.py | 59 ---- .../shufflenet_v1/ixrt/refine_model.py | 291 ------------------ .../shufflenet_v1/ixrt/requirements.txt | 8 - .../shufflenet_v1/ixrt/simplify_model.py | 41 --- .../shufflenetv2_x0_5/ixrt/README.md | 11 +- .../shufflenetv2_x0_5/ixrt/build_engine.py | 53 ---- .../ixrt/calibration_dataset.py | 112 ------- .../shufflenetv2_x0_5/ixrt/ci/prepare.sh | 4 +- .../shufflenetv2_x0_5/ixrt/common.py | 79 ----- .../shufflenetv2_x0_5/ixrt/export.py | 61 ---- .../shufflenetv2_x0_5/ixrt/inference.py | 161 ---------- .../ixrt/modify_batchsize.py | 57 ---- .../shufflenetv2_x0_5/ixrt/quant.py | 59 ---- .../shufflenetv2_x0_5/ixrt/refine_model.py | 291 ------------------ .../shufflenetv2_x0_5/ixrt/requirements.txt | 8 - .../shufflenetv2_x0_5/ixrt/simplify_model.py | 41 --- .../shufflenetv2_x1_0/ixrt/README.md | 8 +- .../shufflenetv2_x1_0/ixrt/build_engine.py | 53 ---- .../ixrt/calibration_dataset.py | 112 ------- .../shufflenetv2_x1_0/ixrt/ci/prepare.sh | 4 +- .../shufflenetv2_x1_0/ixrt/common.py | 79 ----- .../shufflenetv2_x1_0/ixrt/export.py | 61 ---- .../shufflenetv2_x1_0/ixrt/inference.py | 161 ---------- .../ixrt/modify_batchsize.py | 57 ---- .../shufflenetv2_x1_0/ixrt/quant.py | 59 ---- .../shufflenetv2_x1_0/ixrt/requirements.txt | 6 - .../shufflenetv2_x1_0/ixrt/simplify_model.py | 41 --- .../shufflenetv2_x1_5/ixrt/README.md | 8 +- .../shufflenetv2_x1_5/ixrt/build_engine.py | 53 ---- .../ixrt/calibration_dataset.py | 112 ------- .../shufflenetv2_x1_5/ixrt/ci/prepare.sh | 4 +- .../shufflenetv2_x1_5/ixrt/common.py | 79 ----- .../shufflenetv2_x1_5/ixrt/export.py | 61 ---- .../shufflenetv2_x1_5/ixrt/inference.py | 161 ---------- .../ixrt/modify_batchsize.py | 57 ---- .../shufflenetv2_x1_5/ixrt/quant.py | 59 ---- .../shufflenetv2_x1_5/ixrt/requirements.txt | 6 - .../shufflenetv2_x1_5/ixrt/simplify_model.py | 41 --- .../shufflenetv2_x2_0/ixrt/README.md | 8 +- .../shufflenetv2_x2_0/ixrt/build_engine.py | 53 ---- .../ixrt/calibration_dataset.py | 112 ------- .../shufflenetv2_x2_0/ixrt/ci/prepare.sh | 4 +- .../shufflenetv2_x2_0/ixrt/common.py | 79 ----- .../shufflenetv2_x2_0/ixrt/inference.py | 161 ---------- .../ixrt/modify_batchsize.py | 57 ---- .../shufflenetv2_x2_0/ixrt/quant.py | 59 ---- .../shufflenetv2_x2_0/ixrt/requirements.txt | 6 - .../shufflenetv2_x2_0/ixrt/simplify_model.py | 41 --- .../squeezenet_v1_0/ixrt/README.md | 10 +- .../squeezenet_v1_0/ixrt/build_engine.py | 53 ---- .../ixrt/calibration_dataset.py | 112 ------- .../squeezenet_v1_0/ixrt/ci/prepare.sh | 4 +- .../squeezenet_v1_0/ixrt/common.py | 79 ----- .../squeezenet_v1_0/ixrt/export_onnx.py | 45 --- .../squeezenet_v1_0/ixrt/inference.py | 160 ---------- .../squeezenet_v1_0/ixrt/modify_batchsize.py | 57 ---- .../squeezenet_v1_0/ixrt/quant.py | 59 ---- .../squeezenet_v1_0/ixrt/refine_model.py | 291 ------------------ .../squeezenet_v1_0/ixrt/requirements.txt | 4 - .../infer_squeezenet_v10_fp16_accuracy.sh | 143 --------- .../infer_squeezenet_v10_fp16_performance.sh | 143 --------- .../squeezenet_v1_0/ixrt/simplify_model.py | 41 --- .../squeezenet_v1_1/ixrt/README.md | 10 +- .../squeezenet_v1_1/ixrt/build_engine.py | 53 ---- .../squeezenet_v1_1/ixrt/build_i8_engine.py | 112 ------- .../ixrt/calibration_dataset.py | 112 ------- .../squeezenet_v1_1/ixrt/ci/prepare.sh | 4 +- .../squeezenet_v1_1/ixrt/common.py | 79 ----- .../squeezenet_v1_1/ixrt/export_onnx.py | 45 --- .../squeezenet_v1_1/ixrt/inference.py | 161 ---------- .../squeezenet_v1_1/ixrt/modify_batchsize.py | 57 ---- .../squeezenet_v1_1/ixrt/quant.py | 166 ---------- .../squeezenet_v1_1/ixrt/refine_model.py | 291 ------------------ .../infer_squeezenet_v1_1_fp16_accuracy.sh | 2 + .../infer_squeezenet_v1_1_fp16_performance.sh | 2 + .../infer_squeezenet_v1_1_int8_accuracy.sh | 80 ++--- .../infer_squeezenet_v1_1_int8_performance.sh | 79 ++--- .../squeezenet_v1_1/ixrt/simplify_model.py | 41 --- models/cv/classification/vgg16/ixrt/README.md | 8 +- .../classification/vgg16/ixrt/build_engine.py | 53 ---- .../vgg16/ixrt/calibration_dataset.py | 112 ------- .../classification/vgg16/ixrt/ci/prepare.sh | 4 +- models/cv/classification/vgg16/ixrt/common.py | 79 ----- .../classification/vgg16/ixrt/export_onnx.py | 45 --- .../cv/classification/vgg16/ixrt/inference.py | 161 ---------- .../vgg16/ixrt/modify_batchsize.py | 57 ---- models/cv/classification/vgg16/ixrt/quant.py | 59 ---- .../classification/vgg16/ixrt/refine_model.py | 291 ------------------ .../vgg16/ixrt/requirements.txt | 4 - .../vgg16/ixrt/simplify_model.py | 41 --- .../wide_resnet50/ixrt/README.md | 17 +- .../wide_resnet50/ixrt/build_engine.py | 109 ------- .../wide_resnet50/ixrt/build_i8_engine.py | 112 ------- .../wide_resnet50/ixrt/calibration_dataset.py | 112 ------- .../wide_resnet50/ixrt/ci/prepare.sh | 4 +- .../wide_resnet50/ixrt/common.py | 78 ----- .../wide_resnet50/ixrt/inference.py | 160 ---------- .../wide_resnet50/ixrt/modify_batchsize.py | 56 ---- .../wide_resnet50/ixrt/quant.py | 166 ---------- .../wide_resnet50/ixrt/refine_model.py | 290 ----------------- .../ixrt/refine_utils/__init__.py | 0 .../wide_resnet50/ixrt/refine_utils/common.py | 36 --- .../ixrt/refine_utils/linear_pass.py | 113 ------- .../ixrt/refine_utils/matmul_to_gemm_pass.py | 54 ---- .../wide_resnet50/ixrt/requirements.txt | 4 - .../infer_wide_resnet50_fp16_accuracy.sh | 30 +- .../infer_wide_resnet50_fp16_performance.sh | 30 +- .../infer_wide_resnet50_int8_accuracy.sh | 80 +++-- .../infer_wide_resnet50_int8_performance.sh | 79 +++-- .../wide_resnet50/ixrt/simplify_model.py | 40 --- tests/model_info.json | 91 ++++-- tests/run_ixrt.py | 7 +- 625 files changed, 2687 insertions(+), 36185 deletions(-) delete mode 100644 models/cv/classification/alexnet/ixrt/build_engine.py delete mode 100644 models/cv/classification/alexnet/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/alexnet/ixrt/common.py delete mode 100644 models/cv/classification/alexnet/ixrt/export_onnx.py delete mode 100644 models/cv/classification/alexnet/ixrt/inference.py delete mode 100644 models/cv/classification/alexnet/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/alexnet/ixrt/quant.py delete mode 100644 models/cv/classification/alexnet/ixrt/refine_model.py delete mode 100644 models/cv/classification/alexnet/ixrt/refine_utils/common.py delete mode 100644 models/cv/classification/alexnet/ixrt/refine_utils/linear_pass.py delete mode 100644 models/cv/classification/alexnet/ixrt/requirements.txt delete mode 100644 models/cv/classification/alexnet/ixrt/simplify_model.py delete mode 100644 models/cv/classification/convnext_base/ixrt/build_engine.py delete mode 100644 models/cv/classification/convnext_base/ixrt/calibration_dataset.py create mode 100644 models/cv/classification/convnext_base/ixrt/ci/prepare.sh delete mode 100644 models/cv/classification/convnext_base/ixrt/common.py delete mode 100644 models/cv/classification/convnext_base/ixrt/export.py delete mode 100644 models/cv/classification/convnext_base/ixrt/inference.py delete mode 100644 models/cv/classification/convnext_base/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/convnext_base/ixrt/requirements.txt delete mode 100644 models/cv/classification/convnext_small/ixrt/build_engine.py delete mode 100644 models/cv/classification/convnext_small/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/convnext_small/ixrt/common.py delete mode 100644 models/cv/classification/convnext_small/ixrt/export.py delete mode 100644 models/cv/classification/convnext_small/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/convnext_small/ixrt/requirements.txt delete mode 100644 models/cv/classification/convnext_small/ixrt/simplify_model.py delete mode 100644 models/cv/classification/cspdarknet53/ixrt/build_engine.py delete mode 100644 models/cv/classification/cspdarknet53/ixrt/build_i8_engine.py delete mode 100644 models/cv/classification/cspdarknet53/ixrt/calibration_dataset.py delete mode 100755 models/cv/classification/cspdarknet53/ixrt/common.py delete mode 100755 models/cv/classification/cspdarknet53/ixrt/inference.py delete mode 100644 models/cv/classification/cspdarknet53/ixrt/quant.py delete mode 100644 models/cv/classification/cspdarknet53/ixrt/refine_utils/__init__.py delete mode 100644 models/cv/classification/cspdarknet53/ixrt/refine_utils/linear_pass.py delete mode 100644 models/cv/classification/cspdarknet53/ixrt/refine_utils/matmul_to_gemm_pass.py delete mode 100644 models/cv/classification/cspdarknet53/ixrt/requirements.txt delete mode 100644 models/cv/classification/cspresnet50/ixrt/build_engine.py delete mode 100644 models/cv/classification/cspresnet50/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/cspresnet50/ixrt/common.py delete mode 100644 models/cv/classification/cspresnet50/ixrt/inference.py delete mode 100644 models/cv/classification/cspresnet50/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/cspresnet50/ixrt/quant.py delete mode 100644 models/cv/classification/cspresnet50/ixrt/refine_model.py delete mode 100644 models/cv/classification/cspresnet50/ixrt/requirements.txt delete mode 100644 models/cv/classification/cspresnet50/ixrt/simplify_model.py delete mode 100644 models/cv/classification/deit_tiny/ixrt/build_engine.py delete mode 100644 models/cv/classification/deit_tiny/ixrt/calibration_dataset.py create mode 100644 models/cv/classification/deit_tiny/ixrt/ci/prepare.sh delete mode 100644 models/cv/classification/deit_tiny/ixrt/common.py delete mode 100644 models/cv/classification/deit_tiny/ixrt/inference.py delete mode 100644 models/cv/classification/deit_tiny/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/deit_tiny/ixrt/requirements.txt delete mode 100644 models/cv/classification/deit_tiny/ixrt/simplify_model.py delete mode 100644 models/cv/classification/densenet121/ixrt/build_engine.py delete mode 100644 models/cv/classification/densenet121/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/densenet121/ixrt/common.py delete mode 100644 models/cv/classification/densenet121/ixrt/export_onnx.py delete mode 100644 models/cv/classification/densenet121/ixrt/inference.py delete mode 100644 models/cv/classification/densenet121/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/densenet121/ixrt/quant.py delete mode 100644 models/cv/classification/densenet121/ixrt/requirements.txt delete mode 100644 models/cv/classification/densenet121/ixrt/simplify_model.py delete mode 100644 models/cv/classification/densenet161/ixrt/build_engine.py delete mode 100644 models/cv/classification/densenet161/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/densenet161/ixrt/common.py delete mode 100644 models/cv/classification/densenet161/ixrt/inference.py delete mode 100644 models/cv/classification/densenet161/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/densenet161/ixrt/requirements.txt delete mode 100644 models/cv/classification/densenet161/ixrt/simplify_model.py delete mode 100644 models/cv/classification/densenet169/ixrt/build_engine.py delete mode 100644 models/cv/classification/densenet169/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/densenet169/ixrt/common.py delete mode 100644 models/cv/classification/densenet169/ixrt/inference.py delete mode 100644 models/cv/classification/densenet169/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/densenet169/ixrt/requirements.txt delete mode 100644 models/cv/classification/densenet169/ixrt/simplify_model.py delete mode 100644 models/cv/classification/densenet201/ixrt/build_engine.py delete mode 100644 models/cv/classification/densenet201/ixrt/calibration_dataset.py create mode 100644 models/cv/classification/densenet201/ixrt/ci/prepare.sh delete mode 100644 models/cv/classification/densenet201/ixrt/common.py delete mode 100644 models/cv/classification/densenet201/ixrt/inference.py delete mode 100644 models/cv/classification/densenet201/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/densenet201/ixrt/requirements.txt delete mode 100644 models/cv/classification/densenet201/ixrt/simplify_model.py delete mode 100644 models/cv/classification/efficientnet_b0/ixrt/build_engine.py delete mode 100644 models/cv/classification/efficientnet_b0/ixrt/build_engine_by_write_qparams.py delete mode 100644 models/cv/classification/efficientnet_b0/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/efficientnet_b0/ixrt/common.py delete mode 100644 models/cv/classification/efficientnet_b0/ixrt/export_onnx.py delete mode 100644 models/cv/classification/efficientnet_b0/ixrt/inference.py delete mode 100644 models/cv/classification/efficientnet_b0/ixrt/quant.py delete mode 100644 models/cv/classification/efficientnet_b0/ixrt/requirements.txt delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/build_engine.py delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/common.py delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/export_onnx.py delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/inference.py delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/quant.py delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/refine_model.py delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/refine_utils/__init__.py delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/refine_utils/common.py delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/refine_utils/linear_pass.py delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/refine_utils/matmul_to_gemm_pass.py delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/requirements.txt delete mode 100644 models/cv/classification/efficientnet_b1/ixrt/simplify_model.py delete mode 100644 models/cv/classification/efficientnet_b2/ixrt/build_engine.py delete mode 100644 models/cv/classification/efficientnet_b2/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/efficientnet_b2/ixrt/common.py delete mode 100644 models/cv/classification/efficientnet_b2/ixrt/export.py delete mode 100644 models/cv/classification/efficientnet_b2/ixrt/inference.py delete mode 100644 models/cv/classification/efficientnet_b2/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/efficientnet_b2/ixrt/requirements.txt delete mode 100644 models/cv/classification/efficientnet_b2/ixrt/simplify_model.py delete mode 100644 models/cv/classification/efficientnet_b3/ixrt/build_engine.py delete mode 100644 models/cv/classification/efficientnet_b3/ixrt/calibration_dataset.py create mode 100644 models/cv/classification/efficientnet_b3/ixrt/ci/prepare.sh delete mode 100644 models/cv/classification/efficientnet_b3/ixrt/common.py delete mode 100644 models/cv/classification/efficientnet_b3/ixrt/export.py delete mode 100644 models/cv/classification/efficientnet_b3/ixrt/inference.py delete mode 100644 models/cv/classification/efficientnet_b3/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/efficientnet_b3/ixrt/requirements.txt delete mode 100644 models/cv/classification/efficientnet_b3/ixrt/simplify_model.py mode change 100755 => 100644 models/cv/classification/efficientnet_v2/ixrt/_builder.py delete mode 100755 models/cv/classification/efficientnet_v2/ixrt/build_engine.py delete mode 100644 models/cv/classification/efficientnet_v2/ixrt/build_i8_engine.py delete mode 100644 models/cv/classification/efficientnet_v2/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/efficientnet_v2/ixrt/common.py mode change 100755 => 100644 models/cv/classification/efficientnet_v2/ixrt/export_onnx.py delete mode 100644 models/cv/classification/efficientnet_v2/ixrt/inference.py delete mode 100644 models/cv/classification/efficientnet_v2/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/efficientnet_v2/ixrt/quant.py delete mode 100644 models/cv/classification/efficientnet_v2/ixrt/refine_model.py delete mode 100644 models/cv/classification/efficientnet_v2/ixrt/requirements.txt delete mode 100644 models/cv/classification/efficientnet_v2/ixrt/simplify_model.py delete mode 100644 models/cv/classification/efficientnet_v2_s/ixrt/build_engine.py delete mode 100644 models/cv/classification/efficientnet_v2_s/ixrt/calibration_dataset.py create mode 100644 models/cv/classification/efficientnet_v2_s/ixrt/ci/prepare.sh delete mode 100644 models/cv/classification/efficientnet_v2_s/ixrt/common.py delete mode 100644 models/cv/classification/efficientnet_v2_s/ixrt/export.py delete mode 100644 models/cv/classification/efficientnet_v2_s/ixrt/inference.py delete mode 100644 models/cv/classification/efficientnet_v2_s/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/efficientnet_v2_s/ixrt/requirements.txt rename models/cv/classification/efficientnet_v2_s/ixrt/scripts/{infer_efficientnet_v2_s_performance.sh => infer_efficientnet_v2_s_fp16_performance.sh} (94%) delete mode 100644 models/cv/classification/efficientnet_v2_s/ixrt/simplify_model.py delete mode 100644 models/cv/classification/efficientnetv2_rw_t/ixrt/build_engine.py delete mode 100644 models/cv/classification/efficientnetv2_rw_t/ixrt/calibration_dataset.py create mode 100644 models/cv/classification/efficientnetv2_rw_t/ixrt/ci/prepare.sh delete mode 100644 models/cv/classification/efficientnetv2_rw_t/ixrt/common.py delete mode 100644 models/cv/classification/efficientnetv2_rw_t/ixrt/inference.py delete mode 100644 models/cv/classification/efficientnetv2_rw_t/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/efficientnetv2_rw_t/ixrt/requirements.txt delete mode 100644 models/cv/classification/efficientnetv2_rw_t/ixrt/simplify_model.py delete mode 100644 models/cv/classification/googlenet/ixrt/build_engine.py delete mode 100644 models/cv/classification/googlenet/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/googlenet/ixrt/common.py delete mode 100644 models/cv/classification/googlenet/ixrt/export_onnx.py delete mode 100644 models/cv/classification/googlenet/ixrt/inference.py delete mode 100644 models/cv/classification/googlenet/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/googlenet/ixrt/quant.py delete mode 100644 models/cv/classification/googlenet/ixrt/refine_utils/__init__.py delete mode 100644 models/cv/classification/googlenet/ixrt/refine_utils/common.py delete mode 100644 models/cv/classification/googlenet/ixrt/refine_utils/linear_pass.py delete mode 100644 models/cv/classification/googlenet/ixrt/refine_utils/matmul_to_gemm_pass.py delete mode 100644 models/cv/classification/googlenet/ixrt/requirements.txt delete mode 100644 models/cv/classification/googlenet/ixrt/simplify_model.py delete mode 100644 models/cv/classification/hrnet_w18/ixrt/build_engine.py delete mode 100644 models/cv/classification/hrnet_w18/ixrt/build_i8_engine.py delete mode 100644 models/cv/classification/hrnet_w18/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/hrnet_w18/ixrt/common.py delete mode 100644 models/cv/classification/hrnet_w18/ixrt/config/HRNET_W18_CONFIG delete mode 100644 models/cv/classification/hrnet_w18/ixrt/inference.py delete mode 100644 models/cv/classification/hrnet_w18/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/hrnet_w18/ixrt/quant_qdq.py delete mode 100644 models/cv/classification/hrnet_w18/ixrt/requirements.txt delete mode 100644 models/cv/classification/hrnet_w18/ixrt/simplify_model.py delete mode 100755 models/cv/classification/inception_resnet_v2/ixrt/build_engine.py delete mode 100644 models/cv/classification/inception_resnet_v2/ixrt/build_i8_engine.py delete mode 100644 models/cv/classification/inception_resnet_v2/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/inception_resnet_v2/ixrt/common.py delete mode 100644 models/cv/classification/inception_resnet_v2/ixrt/inference.py delete mode 100644 models/cv/classification/inception_resnet_v2/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/inception_resnet_v2/ixrt/quant.py delete mode 100644 models/cv/classification/inception_resnet_v2/ixrt/requirements.txt rename models/cv/classification/inception_resnet_v2/ixrt/scripts/{infer_inceptionresnetv2_fp16_accuracy.sh => infer_inception_resnet_v2_fp16_accuracy.sh} (84%) mode change 100755 => 100644 rename models/cv/classification/inception_resnet_v2/ixrt/scripts/{infer_inceptionresnetv2_fp16_performance.sh => infer_inception_resnet_v2_fp16_performance.sh} (84%) mode change 100755 => 100644 rename models/cv/classification/{squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_int8_accuracy.sh => inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_int8_accuracy.sh} (83%) rename models/cv/classification/{squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_int8_performance.sh => inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_int8_performance.sh} (83%) delete mode 100755 models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_int8_accuracy.sh delete mode 100755 models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_int8_performance.sh delete mode 100644 models/cv/classification/inception_resnet_v2/ixrt/simplify_model.py delete mode 100755 models/cv/classification/inception_v3/ixrt/build_engine.py delete mode 100644 models/cv/classification/inception_v3/ixrt/build_i8_engine.py delete mode 100644 models/cv/classification/inception_v3/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/inception_v3/ixrt/common.py delete mode 100644 models/cv/classification/inception_v3/ixrt/inference.py delete mode 100644 models/cv/classification/inception_v3/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/inception_v3/ixrt/quant.py delete mode 100644 models/cv/classification/inception_v3/ixrt/refine_model.py delete mode 100644 models/cv/classification/inception_v3/ixrt/requirements.txt delete mode 100644 models/cv/classification/inception_v3/ixrt/simplify_model.py create mode 100644 models/cv/classification/ixrt_common/README.md rename models/cv/classification/{repvgg/ixrt => ixrt_common}/build_engine.py (31%) rename models/cv/classification/{mobilenet_v2/ixrt => ixrt_common}/calibration_dataset.py (49%) rename models/cv/classification/{resnext101_32x8d/ixrt => ixrt_common}/common.py (55%) rename models/cv/classification/{alexnet/ixrt => ixrt_common}/config/ALEXNET_CONFIG (52%) rename models/cv/classification/{convnext_base/ixrt/config/CONVNEXT_CONFIG => ixrt_common/config/CONVNEXT_BASE_CONFIG} (100%) rename models/cv/classification/{convnext_small/ixrt/config/CONVNEXT_CONFIG => ixrt_common/config/CONVNEXT_SMALL_CONFIG} (96%) rename models/cv/classification/{cspdarknet53/ixrt => ixrt_common}/config/CSPDARKNET53_CONFIG (100%) rename models/cv/classification/{cspresnet50/ixrt => ixrt_common}/config/CSPRESNET50_CONFIG (53%) rename models/cv/classification/{deit_tiny/ixrt => ixrt_common}/config/DEIT_TINY_CONFIG (100%) rename models/cv/classification/{densenet121/ixrt => ixrt_common}/config/DENSENET121_CONFIG (100%) rename models/cv/classification/{densenet161/ixrt/config/DENSENET_CONFIG => ixrt_common/config/DENSENET161_CONFIG} (100%) rename models/cv/classification/{densenet169/ixrt/config/DENSENET_CONFIG => ixrt_common/config/DENSENET169_CONFIG} (98%) rename models/cv/classification/{densenet201/ixrt => ixrt_common}/config/DENSENET201_CONFIG (100%) rename models/cv/classification/{efficientnetv2_rw_t/ixrt/config/EFFICIENTNETV2_CONFIG => ixrt_common/config/EFFICIENTNETV2_RW_T_CONFIG} (100%) rename models/cv/classification/{efficientnet_b0/ixrt => ixrt_common}/config/EFFICIENTNET_B0_CONFIG (100%) rename models/cv/classification/{efficientnet_b1/ixrt => ixrt_common}/config/EFFICIENTNET_B1_CONFIG (100%) rename models/cv/classification/{efficientnet_b2/ixrt => ixrt_common}/config/EFFICIENTNET_B2_CONFIG (100%) rename models/cv/classification/{efficientnet_b3/ixrt => ixrt_common}/config/EFFICIENTNET_B3_CONFIG (100%) rename models/cv/classification/{efficientnet_v2/ixrt => ixrt_common}/config/EFFICIENTNET_V2_CONFIG (100%) rename models/cv/classification/{efficientnet_v2_s/ixrt/config/EFFICIENTNETV2_S_CONFIG => ixrt_common/config/EFFICIENTNET_V2_S_CONFIG} (100%) rename models/cv/classification/{googlenet/ixrt => ixrt_common}/config/GOOGLENET_CONFIG (100%) rename models/cv/classification/{inception_v3/ixrt/config/INCEPTION_V3_CONFIG => ixrt_common/config/HRNET_W18_CONFIG} (49%) rename models/cv/classification/{inception_resnet_v2/ixrt/config/INCEPTIONRESNETV2_CONFIG => ixrt_common/config/INCEPTION_RESNET_V2_CONFIG} (50%) rename models/cv/classification/{squeezenet_v1_0/ixrt/config/SQUEEZENET_V10_CONFIG => ixrt_common/config/INCEPTION_V3_CONFIG} (92%) create mode 100644 models/cv/classification/ixrt_common/config/MOBILENET_V2_CONFIG rename models/cv/classification/{mobilenet_v3/ixrt => ixrt_common}/config/MOBILENET_V3_CONFIG (100%) rename models/cv/classification/{repvgg/ixrt => ixrt_common}/config/REPVGG_CONFIG (100%) create mode 100644 models/cv/classification/ixrt_common/config/REPVIT_CONFIG rename models/cv/classification/{res2net50/ixrt => ixrt_common}/config/RES2NET50_CONFIG (100%) rename models/cv/classification/{resnet101/ixrt => ixrt_common}/config/RESNET101_CONFIG (100%) rename models/cv/classification/{resnet18/ixrt => ixrt_common}/config/RESNET18_CONFIG (100%) rename models/cv/classification/{resnet34/ixrt => ixrt_common}/config/RESNET34_CONFIG (100%) rename models/cv/classification/{resnet50/ixrt => ixrt_common}/config/RESNET50_CONFIG (100%) rename models/cv/classification/{resnetv1d50/ixrt => ixrt_common}/config/RESNETV1D50_CONFIG (98%) rename models/cv/classification/{resnext101_32x8d/ixrt => ixrt_common}/config/RESNEXT101_32X8D_CONFIG (100%) rename models/cv/classification/{resnext101_64x4d/ixrt => ixrt_common}/config/RESNEXT101_64X4D_CONFIG (100%) rename models/cv/classification/{resnext50_32x4d/ixrt/config/RESNEXT50_CONFIG => ixrt_common/config/RESNEXT50_32X4D_CONFIG} (100%) rename models/cv/classification/{shufflenetv2_x0_5/ixrt/config/SHUFFLENET_V2_X0_5_CONFIG => ixrt_common/config/SHUFFLENETV2_X0_5_CONFIG} (100%) rename models/cv/classification/{shufflenetv2_x1_0/ixrt => ixrt_common}/config/SHUFFLENETV2_X1_0_CONFIG (100%) rename models/cv/classification/{shufflenetv2_x1_5/ixrt => ixrt_common}/config/SHUFFLENETV2_X1_5_CONFIG (100%) rename models/cv/classification/{shufflenetv2_x2_0/ixrt => ixrt_common}/config/SHUFFLENETV2_X2_0_CONFIG (100%) rename models/cv/classification/{shufflenet_v1/ixrt => ixrt_common}/config/SHUFFLENET_V1_CONFIG (100%) create mode 100644 models/cv/classification/ixrt_common/config/SHUFFLENET_V1_CONFIG copy rename models/cv/classification/{squeezenet_v1_0/ixrt => ixrt_common}/config/SQUEEZENET_V1_0_CONFIG (100%) rename models/cv/classification/{squeezenet_v1_1/ixrt => ixrt_common}/config/SQUEEZENET_V1_1_CONFIG (100%) rename models/cv/classification/{vgg16/ixrt => ixrt_common}/config/VGG16_CONFIG (100%) rename models/cv/classification/{wide_resnet50/ixrt => ixrt_common}/config/WIDE_RESNET50_CONFIG (100%) rename models/cv/classification/{wide_resnet50/ixrt => ixrt_common}/export.py (78%) rename models/cv/classification/{convnext_small/ixrt => ixrt_common}/inference.py (48%) create mode 100644 models/cv/classification/ixrt_common/load_ixrt_plugin.py rename models/cv/classification/{efficientnet_b0/ixrt => ixrt_common}/modify_batchsize.py (59%) rename models/cv/classification/{res2net50/ixrt => ixrt_common}/quant.py (71%) rename models/cv/classification/{googlenet/ixrt => ixrt_common}/refine_model.py (91%) rename models/cv/classification/{alexnet/ixrt => ixrt_common}/refine_utils/__init__.py (100%) rename models/cv/classification/{cspdarknet53/ixrt => ixrt_common}/refine_utils/common.py (63%) rename models/cv/classification/{res2net50/ixrt => ixrt_common}/refine_utils/linear_pass.py (81%) rename models/cv/classification/{alexnet/ixrt => ixrt_common}/refine_utils/matmul_to_gemm_pass.py (62%) rename models/cv/classification/{squeezenet_v1_1/ixrt => ixrt_common}/requirements.txt (93%) rename models/cv/classification/{convnext_base/ixrt => ixrt_common}/simplify_model.py (49%) delete mode 100644 models/cv/classification/mobilenet_v2/ixrt/build_engine.py delete mode 100644 models/cv/classification/mobilenet_v2/ixrt/build_engine_by_write_qparams.py delete mode 100644 models/cv/classification/mobilenet_v2/ixrt/common.py delete mode 100644 models/cv/classification/mobilenet_v2/ixrt/export_onnx.py delete mode 100644 models/cv/classification/mobilenet_v2/ixrt/inference.py delete mode 100644 models/cv/classification/mobilenet_v2/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/mobilenet_v2/ixrt/quant.py delete mode 100644 models/cv/classification/mobilenet_v2/ixrt/requirements.txt delete mode 100644 models/cv/classification/mobilenet_v2/ixrt/simplify_model.py delete mode 100644 models/cv/classification/mobilenet_v3/ixrt/build_engine.py delete mode 100644 models/cv/classification/mobilenet_v3/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/mobilenet_v3/ixrt/common.py delete mode 100644 models/cv/classification/mobilenet_v3/ixrt/export_onnx.py delete mode 100644 models/cv/classification/mobilenet_v3/ixrt/inference.py delete mode 100644 models/cv/classification/mobilenet_v3/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/mobilenet_v3/ixrt/requirements.txt delete mode 100644 models/cv/classification/mobilenet_v3/ixrt/simplify_model.py delete mode 100644 models/cv/classification/repvgg/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/repvgg/ixrt/common.py delete mode 100644 models/cv/classification/repvgg/ixrt/inference.py delete mode 100644 models/cv/classification/repvgg/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/repvgg/ixrt/quant.py delete mode 100644 models/cv/classification/repvgg/ixrt/refine_model.py delete mode 100644 models/cv/classification/repvgg/ixrt/requirements.txt delete mode 100644 models/cv/classification/repvgg/ixrt/simplify_model.py delete mode 100644 models/cv/classification/res2net50/ixrt/build_engine.py delete mode 100644 models/cv/classification/res2net50/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/res2net50/ixrt/common.py delete mode 100644 models/cv/classification/res2net50/ixrt/inference.py delete mode 100644 models/cv/classification/res2net50/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/res2net50/ixrt/refine_model.py delete mode 100644 models/cv/classification/res2net50/ixrt/refine_utils/__init__.py delete mode 100644 models/cv/classification/res2net50/ixrt/refine_utils/common.py delete mode 100644 models/cv/classification/res2net50/ixrt/refine_utils/matmul_to_gemm_pass.py delete mode 100644 models/cv/classification/res2net50/ixrt/requirements.txt delete mode 100644 models/cv/classification/res2net50/ixrt/simplify_model.py delete mode 100644 models/cv/classification/resnet101/ixrt/build_engine.py delete mode 100644 models/cv/classification/resnet101/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/resnet101/ixrt/common.py delete mode 100644 models/cv/classification/resnet101/ixrt/export_onnx.py delete mode 100644 models/cv/classification/resnet101/ixrt/inference.py delete mode 100644 models/cv/classification/resnet101/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/resnet101/ixrt/quant.py delete mode 100644 models/cv/classification/resnet101/ixrt/refine_model.py delete mode 100644 models/cv/classification/resnet101/ixrt/refine_utils/__init__.py delete mode 100644 models/cv/classification/resnet101/ixrt/refine_utils/common.py delete mode 100644 models/cv/classification/resnet101/ixrt/refine_utils/linear_pass.py delete mode 100644 models/cv/classification/resnet101/ixrt/refine_utils/matmul_to_gemm_pass.py delete mode 100644 models/cv/classification/resnet101/ixrt/requirements.txt delete mode 100644 models/cv/classification/resnet101/ixrt/simplify_model.py delete mode 100644 models/cv/classification/resnet18/ixrt/build_engine.py delete mode 100644 models/cv/classification/resnet18/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/resnet18/ixrt/common.py delete mode 100644 models/cv/classification/resnet18/ixrt/export_onnx.py delete mode 100644 models/cv/classification/resnet18/ixrt/inference.py delete mode 100644 models/cv/classification/resnet18/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/resnet18/ixrt/quant.py delete mode 100644 models/cv/classification/resnet18/ixrt/refine_model.py delete mode 100644 models/cv/classification/resnet18/ixrt/refine_utils/__init__.py delete mode 100644 models/cv/classification/resnet18/ixrt/refine_utils/common.py delete mode 100644 models/cv/classification/resnet18/ixrt/refine_utils/linear_pass.py delete mode 100644 models/cv/classification/resnet18/ixrt/refine_utils/matmul_to_gemm_pass.py delete mode 100644 models/cv/classification/resnet18/ixrt/requirements.txt delete mode 100644 models/cv/classification/resnet18/ixrt/simplify_model.py delete mode 100644 models/cv/classification/resnet34/ixrt/build_engine.py delete mode 100644 models/cv/classification/resnet34/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/resnet34/ixrt/common.py delete mode 100644 models/cv/classification/resnet34/ixrt/export_onnx.py delete mode 100644 models/cv/classification/resnet34/ixrt/inference.py delete mode 100644 models/cv/classification/resnet34/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/resnet34/ixrt/quant.py delete mode 100644 models/cv/classification/resnet34/ixrt/refine_model.py delete mode 100644 models/cv/classification/resnet34/ixrt/refine_utils/__init__.py delete mode 100644 models/cv/classification/resnet34/ixrt/refine_utils/common.py delete mode 100644 models/cv/classification/resnet34/ixrt/refine_utils/linear_pass.py delete mode 100644 models/cv/classification/resnet34/ixrt/refine_utils/matmul_to_gemm_pass.py delete mode 100644 models/cv/classification/resnet34/ixrt/requirements.txt delete mode 100644 models/cv/classification/resnet34/ixrt/simplify_model.py delete mode 100644 models/cv/classification/resnet50/ixrt/build_engine.py delete mode 100644 models/cv/classification/resnet50/ixrt/build_i8_engine.py delete mode 100644 models/cv/classification/resnet50/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/resnet50/ixrt/common.py delete mode 100644 models/cv/classification/resnet50/ixrt/export_onnx.py delete mode 100644 models/cv/classification/resnet50/ixrt/inference.py delete mode 100644 models/cv/classification/resnet50/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/resnet50/ixrt/quant.py delete mode 100644 models/cv/classification/resnet50/ixrt/refine_model.py delete mode 100644 models/cv/classification/resnet50/ixrt/refine_utils/__init__.py delete mode 100644 models/cv/classification/resnet50/ixrt/refine_utils/common.py delete mode 100644 models/cv/classification/resnet50/ixrt/refine_utils/linear_pass.py delete mode 100644 models/cv/classification/resnet50/ixrt/refine_utils/matmul_to_gemm_pass.py delete mode 100644 models/cv/classification/resnet50/ixrt/requirements.txt delete mode 100644 models/cv/classification/resnet50/ixrt/simplify_model.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/build_engine.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/build_i8_engine.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/common.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/inference.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/quant.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/refine_model.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/refine_utils/__init__.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/refine_utils/common.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/refine_utils/linear_pass.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/refine_utils/matmul_to_gemm_pass.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/requirements.txt delete mode 100644 models/cv/classification/resnetv1d50/ixrt/simplify_model.py delete mode 100644 models/cv/classification/resnext101_32x8d/ixrt/build_engine.py delete mode 100644 models/cv/classification/resnext101_32x8d/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/resnext101_32x8d/ixrt/export.py delete mode 100644 models/cv/classification/resnext101_32x8d/ixrt/inference.py delete mode 100644 models/cv/classification/resnext101_32x8d/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/resnext101_32x8d/ixrt/requirements.txt delete mode 100644 models/cv/classification/resnext101_32x8d/ixrt/simplify_model.py delete mode 100644 models/cv/classification/resnext101_64x4d/ixrt/build_engine.py delete mode 100644 models/cv/classification/resnext101_64x4d/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/resnext101_64x4d/ixrt/common.py delete mode 100644 models/cv/classification/resnext101_64x4d/ixrt/export.py delete mode 100644 models/cv/classification/resnext101_64x4d/ixrt/inference.py delete mode 100644 models/cv/classification/resnext101_64x4d/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/resnext101_64x4d/ixrt/requirements.txt delete mode 100644 models/cv/classification/resnext101_64x4d/ixrt/simplify_model.py delete mode 100644 models/cv/classification/resnext50_32x4d/ixrt/build_engine.py delete mode 100644 models/cv/classification/resnext50_32x4d/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/resnext50_32x4d/ixrt/common.py delete mode 100644 models/cv/classification/resnext50_32x4d/ixrt/export.py delete mode 100644 models/cv/classification/resnext50_32x4d/ixrt/inference.py delete mode 100644 models/cv/classification/resnext50_32x4d/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/resnext50_32x4d/ixrt/requirements.txt delete mode 100644 models/cv/classification/resnext50_32x4d/ixrt/simplify_model.py delete mode 100644 models/cv/classification/shufflenet_v1/ixrt/build_engine.py delete mode 100644 models/cv/classification/shufflenet_v1/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/shufflenet_v1/ixrt/common.py delete mode 100644 models/cv/classification/shufflenet_v1/ixrt/inference.py delete mode 100644 models/cv/classification/shufflenet_v1/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/shufflenet_v1/ixrt/quant.py delete mode 100644 models/cv/classification/shufflenet_v1/ixrt/refine_model.py delete mode 100644 models/cv/classification/shufflenet_v1/ixrt/requirements.txt delete mode 100644 models/cv/classification/shufflenet_v1/ixrt/simplify_model.py delete mode 100644 models/cv/classification/shufflenetv2_x0_5/ixrt/build_engine.py delete mode 100644 models/cv/classification/shufflenetv2_x0_5/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/shufflenetv2_x0_5/ixrt/common.py delete mode 100644 models/cv/classification/shufflenetv2_x0_5/ixrt/export.py delete mode 100644 models/cv/classification/shufflenetv2_x0_5/ixrt/inference.py delete mode 100644 models/cv/classification/shufflenetv2_x0_5/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/shufflenetv2_x0_5/ixrt/quant.py delete mode 100644 models/cv/classification/shufflenetv2_x0_5/ixrt/refine_model.py delete mode 100644 models/cv/classification/shufflenetv2_x0_5/ixrt/requirements.txt delete mode 100644 models/cv/classification/shufflenetv2_x0_5/ixrt/simplify_model.py delete mode 100644 models/cv/classification/shufflenetv2_x1_0/ixrt/build_engine.py delete mode 100644 models/cv/classification/shufflenetv2_x1_0/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/shufflenetv2_x1_0/ixrt/common.py delete mode 100644 models/cv/classification/shufflenetv2_x1_0/ixrt/export.py delete mode 100644 models/cv/classification/shufflenetv2_x1_0/ixrt/inference.py delete mode 100644 models/cv/classification/shufflenetv2_x1_0/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/shufflenetv2_x1_0/ixrt/quant.py delete mode 100644 models/cv/classification/shufflenetv2_x1_0/ixrt/requirements.txt delete mode 100644 models/cv/classification/shufflenetv2_x1_0/ixrt/simplify_model.py delete mode 100644 models/cv/classification/shufflenetv2_x1_5/ixrt/build_engine.py delete mode 100644 models/cv/classification/shufflenetv2_x1_5/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/shufflenetv2_x1_5/ixrt/common.py delete mode 100644 models/cv/classification/shufflenetv2_x1_5/ixrt/export.py delete mode 100644 models/cv/classification/shufflenetv2_x1_5/ixrt/inference.py delete mode 100644 models/cv/classification/shufflenetv2_x1_5/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/shufflenetv2_x1_5/ixrt/quant.py delete mode 100644 models/cv/classification/shufflenetv2_x1_5/ixrt/requirements.txt delete mode 100644 models/cv/classification/shufflenetv2_x1_5/ixrt/simplify_model.py delete mode 100644 models/cv/classification/shufflenetv2_x2_0/ixrt/build_engine.py delete mode 100644 models/cv/classification/shufflenetv2_x2_0/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/shufflenetv2_x2_0/ixrt/common.py delete mode 100644 models/cv/classification/shufflenetv2_x2_0/ixrt/inference.py delete mode 100644 models/cv/classification/shufflenetv2_x2_0/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/shufflenetv2_x2_0/ixrt/quant.py delete mode 100644 models/cv/classification/shufflenetv2_x2_0/ixrt/requirements.txt delete mode 100644 models/cv/classification/shufflenetv2_x2_0/ixrt/simplify_model.py delete mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/build_engine.py delete mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/common.py delete mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/export_onnx.py delete mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/inference.py delete mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/quant.py delete mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/refine_model.py delete mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt delete mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_fp16_accuracy.sh delete mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_fp16_performance.sh delete mode 100644 models/cv/classification/squeezenet_v1_0/ixrt/simplify_model.py delete mode 100644 models/cv/classification/squeezenet_v1_1/ixrt/build_engine.py delete mode 100644 models/cv/classification/squeezenet_v1_1/ixrt/build_i8_engine.py delete mode 100644 models/cv/classification/squeezenet_v1_1/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/squeezenet_v1_1/ixrt/common.py delete mode 100644 models/cv/classification/squeezenet_v1_1/ixrt/export_onnx.py delete mode 100644 models/cv/classification/squeezenet_v1_1/ixrt/inference.py delete mode 100644 models/cv/classification/squeezenet_v1_1/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/squeezenet_v1_1/ixrt/quant.py delete mode 100644 models/cv/classification/squeezenet_v1_1/ixrt/refine_model.py delete mode 100644 models/cv/classification/squeezenet_v1_1/ixrt/simplify_model.py delete mode 100644 models/cv/classification/vgg16/ixrt/build_engine.py delete mode 100644 models/cv/classification/vgg16/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/vgg16/ixrt/common.py delete mode 100644 models/cv/classification/vgg16/ixrt/export_onnx.py delete mode 100644 models/cv/classification/vgg16/ixrt/inference.py delete mode 100644 models/cv/classification/vgg16/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/vgg16/ixrt/quant.py delete mode 100644 models/cv/classification/vgg16/ixrt/refine_model.py delete mode 100644 models/cv/classification/vgg16/ixrt/requirements.txt delete mode 100644 models/cv/classification/vgg16/ixrt/simplify_model.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/build_engine.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/build_i8_engine.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/calibration_dataset.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/common.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/inference.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/modify_batchsize.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/quant.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/refine_model.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/refine_utils/__init__.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/refine_utils/common.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/refine_utils/linear_pass.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/refine_utils/matmul_to_gemm_pass.py delete mode 100644 models/cv/classification/wide_resnet50/ixrt/requirements.txt delete mode 100644 models/cv/classification/wide_resnet50/ixrt/simplify_model.py diff --git a/models/cv/classification/alexnet/ixrt/README.md b/models/cv/classification/alexnet/ixrt/README.md index 25b8a5c9..27a87a18 100644 --- a/models/cv/classification/alexnet/ixrt/README.md +++ b/models/cv/classification/alexnet/ixrt/README.md @@ -28,14 +28,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export_onnx.py --origin_model /path/to/alexnet-owt-7be5be79.pth --output_model checkpoints/alexnet.onnx +python3 ../../ixrt_common/export.py --model-name alexnet --weight alexnet-owt-7be5be79.pth --output checkpoints/alexnet.onnx ``` ## Model Inference @@ -44,8 +44,8 @@ python3 export_onnx.py --origin_model /path/to/alexnet-owt-7be5be79.pth --output export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/ALEXNET_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/ALEXNET_CONFIG ``` ### FP16 diff --git a/models/cv/classification/alexnet/ixrt/build_engine.py b/models/cv/classification/alexnet/ixrt/build_engine.py deleted file mode 100644 index 126da5e6..00000000 --- a/models/cv/classification/alexnet/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/alexnet/ixrt/calibration_dataset.py b/models/cv/classification/alexnet/ixrt/calibration_dataset.py deleted file mode 100644 index 442a5602..00000000 --- a/models/cv/classification/alexnet/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/alexnet/ixrt/ci/prepare.sh b/models/cv/classification/alexnet/ixrt/ci/prepare.sh index 7795e04d..c5796a98 100644 --- a/models/cv/classification/alexnet/ixrt/ci/prepare.sh +++ b/models/cv/classification/alexnet/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export_onnx.py --origin_model /root/data/checkpoints/alexnet-owt-7be5be79.pth --output_model checkpoints/alexnet.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name alexnet --weight alexnet-owt-7be5be79.pth --output checkpoints/alexnet.onnx \ No newline at end of file diff --git a/models/cv/classification/alexnet/ixrt/common.py b/models/cv/classification/alexnet/ixrt/common.py deleted file mode 100644 index 0458195e..00000000 --- a/models/cv/classification/alexnet/ixrt/common.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/alexnet/ixrt/export_onnx.py b/models/cv/classification/alexnet/ixrt/export_onnx.py deleted file mode 100644 index 3c72dd60..00000000 --- a/models/cv/classification/alexnet/ixrt/export_onnx.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -import torchvision.models as models -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = models.alexnet() -model.load_state_dict(torch.load(args.origin_model)) -model.cuda() -model.eval() -input = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() \ No newline at end of file diff --git a/models/cv/classification/alexnet/ixrt/inference.py b/models/cv/classification/alexnet/ixrt/inference.py deleted file mode 100644 index 50aafd4f..00000000 --- a/models/cv/classification/alexnet/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/alexnet/ixrt/modify_batchsize.py b/models/cv/classification/alexnet/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/alexnet/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/alexnet/ixrt/quant.py b/models/cv/classification/alexnet/ixrt/quant.py deleted file mode 100644 index 7c7860c9..00000000 --- a/models/cv/classification/alexnet/ixrt/quant.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/alexnet/ixrt/refine_model.py b/models/cv/classification/alexnet/ixrt/refine_model.py deleted file mode 100644 index 6f1e6c2f..00000000 --- a/models/cv/classification/alexnet/ixrt/refine_model.py +++ /dev/null @@ -1,290 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/alexnet/ixrt/refine_utils/common.py b/models/cv/classification/alexnet/ixrt/refine_utils/common.py deleted file mode 100644 index 2af19a14..00000000 --- a/models/cv/classification/alexnet/ixrt/refine_utils/common.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from typing import Union, Callable, List - -from tensorrt.deploy.api import * -from tensorrt.deploy.backend.onnx.converter import default_converter -from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type -from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr -from tensorrt.deploy.ir.operator_type import OperatorType as OP -from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name -from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence -from tensorrt.deploy.ir import Graph -from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator -from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator - -def find_sequence_subgraph(graph, - pattern: Union[List[str], PatternGraph], - callback: Callable[[Graph, PatternGraph], None], - strict=True): - if isinstance(pattern, List): - pattern = build_sequence_graph(pattern) - - matcher = GraphMatcher(pattern, strict=strict) - return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/classification/alexnet/ixrt/refine_utils/linear_pass.py b/models/cv/classification/alexnet/ixrt/refine_utils/linear_pass.py deleted file mode 100644 index 29b5e4a9..00000000 --- a/models/cv/classification/alexnet/ixrt/refine_utils/linear_pass.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import dataclasses - -from refine_utils.common import * - -# AXB=C, Only for B is initializer - -class FusedLinearPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True - ) - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True - ) - return graph - - def to_linear_with_bias(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - add = pattern.nodes[1] - if len(add.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - bias_var = None - for input in add.operator.inputs: - if input not in matmul.operator.outputs: - bias_var = input - - inputs = matmul.operator.inputs - inputs.append(bias_var) - outputs = add.operator.outputs - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 1, - "act_type":"none" - } - - self.transform.make_operator( - "LinearFP16", - inputs=inputs, - outputs=outputs, - **attributes - ) - - self.transform.delete_operator(add.operator) - self.transform.delete_operator(matmul.operator) - - def to_linear(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - if len(matmul.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 0, - "act_type": "none" - } - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - op = self.transform.make_operator( - op_type = "LinearFP16", - inputs = pattern.nodes[0].operator.inputs, - outputs=[pattern.nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(op) - - self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/classification/alexnet/ixrt/requirements.txt b/models/cv/classification/alexnet/ixrt/requirements.txt deleted file mode 100644 index bc645b22..00000000 --- a/models/cv/classification/alexnet/ixrt/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -pycuda \ No newline at end of file diff --git a/models/cv/classification/alexnet/ixrt/simplify_model.py b/models/cv/classification/alexnet/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/alexnet/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/convnext_base/ixrt/README.md b/models/cv/classification/convnext_base/ixrt/README.md index 38028a2b..a0d364bf 100644 --- a/models/cv/classification/convnext_base/ixrt/README.md +++ b/models/cv/classification/convnext_base/ixrt/README.md @@ -27,19 +27,24 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -python3 export.py --weight convnext_base-6075fbad.pth --output convnext_base.onnx +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name convnext_base --weight convnext_base-6075fbad.pth --output checkpoints/convnext_base.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/CONVNEXT_BASE_CONFIG ``` ### FP16 diff --git a/models/cv/classification/convnext_base/ixrt/build_engine.py b/models/cv/classification/convnext_base/ixrt/build_engine.py deleted file mode 100644 index 038c15d5..00000000 --- a/models/cv/classification/convnext_base/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/convnext_base/ixrt/calibration_dataset.py b/models/cv/classification/convnext_base/ixrt/calibration_dataset.py deleted file mode 100644 index ec931c65..00000000 --- a/models/cv/classification/convnext_base/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=True, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader diff --git a/models/cv/classification/convnext_base/ixrt/ci/prepare.sh b/models/cv/classification/convnext_base/ixrt/ci/prepare.sh new file mode 100644 index 00000000..d94da0a6 --- /dev/null +++ b/models/cv/classification/convnext_base/ixrt/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install -r ../../ixrt_common/requirements.txt +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name convnext_base --weight convnext_base-6075fbad.pth --output checkpoints/convnext_base.onnx \ No newline at end of file diff --git a/models/cv/classification/convnext_base/ixrt/common.py b/models/cv/classification/convnext_base/ixrt/common.py deleted file mode 100644 index fd6a84d8..00000000 --- a/models/cv/classification/convnext_base/ixrt/common.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes": size, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/classification/convnext_base/ixrt/export.py b/models/cv/classification/convnext_base/ixrt/export.py deleted file mode 100644 index d9a2fe01..00000000 --- a/models/cv/classification/convnext_base/ixrt/export.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.convnext_base() - model.load_state_dict(torch.load(args.weight)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = dynamic_axes, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/convnext_base/ixrt/inference.py b/models/cv/classification/convnext_base/ixrt/inference.py deleted file mode 100644 index 22f1644c..00000000 --- a/models/cv/classification/convnext_base/ixrt/inference.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/convnext_base/ixrt/modify_batchsize.py b/models/cv/classification/convnext_base/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/convnext_base/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/convnext_base/ixrt/requirements.txt b/models/cv/classification/convnext_base/ixrt/requirements.txt deleted file mode 100644 index 520130b7..00000000 --- a/models/cv/classification/convnext_base/ixrt/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -ppq -tqdm -cuda-python \ No newline at end of file diff --git a/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_accuracy.sh b/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_accuracy.sh index a43c1a20..b743d708 100644 --- a/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_accuracy.sh +++ b/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/CONVNEXT_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_performance.sh b/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_performance.sh index 3e5bca55..e7a4f1a7 100644 --- a/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_performance.sh +++ b/models/cv/classification/convnext_base/ixrt/scripts/infer_convnext_base_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/CONVNEXT_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/convnext_small/ixrt/README.md b/models/cv/classification/convnext_small/ixrt/README.md index ab84dd31..a6a1e52c 100644 --- a/models/cv/classification/convnext_small/ixrt/README.md +++ b/models/cv/classification/convnext_small/ixrt/README.md @@ -27,19 +27,24 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -python3 export.py --weight convnext_small-0c510722.pth --output convnext_small.onnx +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name convnext_small --weight convnext_small-0c510722.pth --output checkpoints/convnext_small.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/CONVNEXT_SMALL_CONFIG ``` ### FP16 diff --git a/models/cv/classification/convnext_small/ixrt/build_engine.py b/models/cv/classification/convnext_small/ixrt/build_engine.py deleted file mode 100644 index 038c15d5..00000000 --- a/models/cv/classification/convnext_small/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/convnext_small/ixrt/calibration_dataset.py b/models/cv/classification/convnext_small/ixrt/calibration_dataset.py deleted file mode 100644 index ec931c65..00000000 --- a/models/cv/classification/convnext_small/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=True, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader diff --git a/models/cv/classification/convnext_small/ixrt/ci/prepare.sh b/models/cv/classification/convnext_small/ixrt/ci/prepare.sh index a6336683..fa4fa18a 100644 --- a/models/cv/classification/convnext_small/ixrt/ci/prepare.sh +++ b/models/cv/classification/convnext_small/ixrt/ci/prepare.sh @@ -25,5 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt -python3 export.py --weight /root/data/checkpoints/convnext_small-0c510722.pth --output convnext_small.onnx \ No newline at end of file +pip install -r ../../ixrt_common/requirements.txt +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name convnext_small --weight /root/data/checkpoints/convnext_small-0c510722.pth --output checkpoints/convnext_small.onnx \ No newline at end of file diff --git a/models/cv/classification/convnext_small/ixrt/common.py b/models/cv/classification/convnext_small/ixrt/common.py deleted file mode 100644 index fd6a84d8..00000000 --- a/models/cv/classification/convnext_small/ixrt/common.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes": size, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/classification/convnext_small/ixrt/export.py b/models/cv/classification/convnext_small/ixrt/export.py deleted file mode 100644 index a58e2d60..00000000 --- a/models/cv/classification/convnext_small/ixrt/export.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.convnext_small() - model.load_state_dict(torch.load(args.weight)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = None, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/convnext_small/ixrt/modify_batchsize.py b/models/cv/classification/convnext_small/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/convnext_small/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/convnext_small/ixrt/requirements.txt b/models/cv/classification/convnext_small/ixrt/requirements.txt deleted file mode 100644 index 520130b7..00000000 --- a/models/cv/classification/convnext_small/ixrt/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -ppq -tqdm -cuda-python \ No newline at end of file diff --git a/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_accuracy.sh b/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_accuracy.sh index a43c1a20..b743d708 100644 --- a/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_accuracy.sh +++ b/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/CONVNEXT_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_performance.sh b/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_performance.sh index 3e5bca55..e7a4f1a7 100644 --- a/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_performance.sh +++ b/models/cv/classification/convnext_small/ixrt/scripts/infer_convnext_small_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/CONVNEXT_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/convnext_small/ixrt/simplify_model.py b/models/cv/classification/convnext_small/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/convnext_small/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/cspdarknet53/ixrt/README.md b/models/cv/classification/cspdarknet53/ixrt/README.md index 709d6c98..caf39486 100644 --- a/models/cv/classification/cspdarknet53/ixrt/README.md +++ b/models/cv/classification/cspdarknet53/ixrt/README.md @@ -27,7 +27,8 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt +pip3 install mmcv==1.5.3 mmcls==0.24.0 ``` ### Model Conversion @@ -51,9 +52,11 @@ onnxsim cspdarknet5.onnx checkpoints/cspdarknet53_sim.onnx ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ -export CHECKPOINTS_DIR=/Path/to/checkpoints/ -export CONFIG_DIR=./config/CSPDARKNET53_CONFIG +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/config/CSPDARKNET53_CONFIG ``` ### FP16 diff --git a/models/cv/classification/cspdarknet53/ixrt/build_engine.py b/models/cv/classification/cspdarknet53/ixrt/build_engine.py deleted file mode 100644 index 126da5e6..00000000 --- a/models/cv/classification/cspdarknet53/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/cspdarknet53/ixrt/build_i8_engine.py b/models/cv/classification/cspdarknet53/ixrt/build_i8_engine.py deleted file mode 100644 index 6e356260..00000000 --- a/models/cv/classification/cspdarknet53/ixrt/build_i8_engine.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os - -import tensorrt -import tensorrt as trt - -TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) - -EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - - -def GiB(val): - return val * 1 << 30 - - -def json_load(filename): - with open(filename) as json_file: - data = json.load(json_file) - return data - - -def setDynamicRange(network, json_file): - """Sets ranges for network layers.""" - quant_param_json = json_load(json_file) - act_quant = quant_param_json["act_quant_info"] - - for i in range(network.num_inputs): - input_tensor = network.get_input(i) - if act_quant.__contains__(input_tensor.name): - print(input_tensor.name) - value = act_quant[input_tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - input_tensor.dynamic_range = (tensor_min, tensor_max) - - for i in range(network.num_layers): - layer = network.get_layer(i) - - for output_index in range(layer.num_outputs): - tensor = layer.get_output(output_index) - - if act_quant.__contains__(tensor.name): - value = act_quant[tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - tensor.dynamic_range = (tensor_min, tensor_max) - else: - print("\033[1;32m%s\033[0m" % tensor.name) - - -def build_engine(onnx_file, json_file, engine_file): - builder = trt.Builder(TRT_LOGGER) - network = builder.create_network(EXPLICIT_BATCH) - - config = builder.create_builder_config() - - # If it is a dynamic onnx model , you need to add the following. - # profile = builder.create_optimization_profile() - # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) - # config.add_optimization_profile(profile) - - parser = trt.OnnxParser(network, TRT_LOGGER) - # config.max_workspace_size = GiB(1) - if not os.path.exists(onnx_file): - quit("ONNX file {} not found".format(onnx_file)) - - with open(onnx_file, "rb") as model: - if not parser.parse(model.read()): - print("ERROR: Failed to parse the ONNX file.") - for error in range(parser.num_errors): - print(parser.get_error(error)) - return None - - config.set_flag(trt.BuilderFlag.INT8) - - setDynamicRange(network, json_file) - - engine = builder.build_engine(network, config) - - with open(engine_file, "wb") as f: - f.write(engine.serialize()) - - -if __name__ == "__main__": - # Add plugins if needed - # import ctypes - # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") - parser = argparse.ArgumentParser( - description="Writing qparams to onnx to convert tensorrt engine." - ) - parser.add_argument("--onnx", type=str, default=None) - parser.add_argument("--qparam_json", type=str, default=None) - parser.add_argument("--engine", type=str, default=None) - arg = parser.parse_args() - - build_engine(arg.onnx, arg.qparam_json, arg.engine) - print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/cspdarknet53/ixrt/calibration_dataset.py b/models/cv/classification/cspdarknet53/ixrt/calibration_dataset.py deleted file mode 100644 index 442a5602..00000000 --- a/models/cv/classification/cspdarknet53/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh b/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh index 29a65f01..933dcae6 100644 --- a/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh +++ b/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh @@ -25,7 +25,8 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt +pip3 install mmcv==1.5.3 mmcls==0.24.0 unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ python3 export.py --cfg mmpretrain/configs/cspnet/cspdarknet50_8xb32_in1k.py --weight /root/data/checkpoints/cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth --output cspdarknet53.onnx diff --git a/models/cv/classification/cspdarknet53/ixrt/common.py b/models/cv/classification/cspdarknet53/ixrt/common.py deleted file mode 100755 index 21c2b399..00000000 --- a/models/cv/classification/cspdarknet53/ixrt/common.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes": size, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/cspdarknet53/ixrt/inference.py b/models/cv/classification/cspdarknet53/ixrt/inference.py deleted file mode 100755 index 360b0cf0..00000000 --- a/models/cv/classification/cspdarknet53/ixrt/inference.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - err, = cuda.cuMemcpyHtoD(inputs[0]["allocation"], batch_data, batch_data.nbytes) - assert(err == cuda.CUresult.CUDA_SUCCESS) - context.execute_v2(allocations) - err, = cuda.cuMemcpyDtoH(output, outputs[0]["allocation"], outputs[0]["nbytes"]) - assert(err == cuda.CUresult.CUDA_SUCCESS) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/cspdarknet53/ixrt/quant.py b/models/cv/classification/cspdarknet53/ixrt/quant.py deleted file mode 100644 index 53ee5cdf..00000000 --- a/models/cv/classification/cspdarknet53/ixrt/quant.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: - -在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 - -This file will show you how to quantize your network with PPQ - You should prepare your model and calibration dataset as follow: - - ~/working/model.onnx <-- your model - ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset - -if you are using caffe model: - ~/working/model.caffemdoel <-- your model - ~/working/model.prototext <-- your model - -### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### - -quantized model will be generated at: ~/working/quantized.onnx -""" -from ppq import * -from ppq.api import * -import os -from calibration_dataset import getdataloader -import argparse -import random -import numpy as np -import torch - - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], - default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - - -config = parse_args() - -# modify configuration below: -WORKING_DIRECTORY = 'checkpoints' # choose your working directory -TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform -MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE -INPUT_LAYOUT = 'chw' # input data layout, chw or hwc -NETWORK_INPUTSHAPE = [32, 3, 224, 224] # input shape of your network -EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. -REQUIRE_ANALYSE = False -TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 -# ------------------------------------------------------------------- -# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 -# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx -# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 -# ------------------------------------------------------------------- -graph = None -if MODEL_TYPE == NetworkFramework.ONNX: - graph = load_onnx_graph(onnx_import_file=config.model) -if MODEL_TYPE == NetworkFramework.CAFFE: - graph = load_caffe_graph( - caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), - prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) -assert graph is not None, 'Graph Loading Error, Check your input again.' - -# ------------------------------------------------------------------- -# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 -# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 -# ------------------------------------------------------------------- -QS = QuantizationSettingFactory.default_setting() - -# ------------------------------------------------------------------- -# 下面向你展示了如何使用 finetuning 过程提升量化精度 -# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 -# 开启他们的方式都是 QS.xxxx = True -# 按需使用,不要全部打开,容易起飞 -# ------------------------------------------------------------------- -if TRAINING_YOUR_NETWORK: - QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 - QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 - QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' - - -dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) -# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 -# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 -with ENABLE_CUDA_KERNEL(): - print('网络正量化中,根据你的量化配置,这将需要一段时间:') - quantized = quantize_native_model( - setting=QS, # setting 对象用来控制标准量化逻辑 - model=graph, - calib_dataloader=dataloader, - calib_steps=config.step, - input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 - inputs=None, - # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] - collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, - # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None - platform=TARGET_PLATFORM, - device=EXECUTING_DEVICE, - do_quantize=True) - - # ------------------------------------------------------------------- - # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor - # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 - # 请注意,必须在 export 之前执行此操作。 - # ------------------------------------------------------------------- - executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) - # output = executor.forward(input) - - # ------------------------------------------------------------------- - # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 - # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% - # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 - # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 - # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 - # ------------------------------------------------------------------- - print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') - reports = graphwise_error_analyse( - graph=quantized, running_device=EXECUTING_DEVICE, steps=32, - dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) - for op, snr in reports.items(): - if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') - - if REQUIRE_ANALYSE: - print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') - layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, - interested_outputs=None, - dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) - - # ------------------------------------------------------------------- - # 使用 export_ppq_graph 函数来导出量化后的模型 - # PPQ 会根据你所选择的导出平台来修改模型格式 - # ------------------------------------------------------------------- - print('网络量化结束,正在生成目标文件:') - export_ppq_graph( - graph=quantized, platform=TARGET_PLATFORM, - graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), - config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/cspdarknet53/ixrt/refine_utils/__init__.py b/models/cv/classification/cspdarknet53/ixrt/refine_utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/models/cv/classification/cspdarknet53/ixrt/refine_utils/linear_pass.py b/models/cv/classification/cspdarknet53/ixrt/refine_utils/linear_pass.py deleted file mode 100644 index 29b5e4a9..00000000 --- a/models/cv/classification/cspdarknet53/ixrt/refine_utils/linear_pass.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import dataclasses - -from refine_utils.common import * - -# AXB=C, Only for B is initializer - -class FusedLinearPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True - ) - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True - ) - return graph - - def to_linear_with_bias(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - add = pattern.nodes[1] - if len(add.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - bias_var = None - for input in add.operator.inputs: - if input not in matmul.operator.outputs: - bias_var = input - - inputs = matmul.operator.inputs - inputs.append(bias_var) - outputs = add.operator.outputs - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 1, - "act_type":"none" - } - - self.transform.make_operator( - "LinearFP16", - inputs=inputs, - outputs=outputs, - **attributes - ) - - self.transform.delete_operator(add.operator) - self.transform.delete_operator(matmul.operator) - - def to_linear(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - if len(matmul.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 0, - "act_type": "none" - } - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - op = self.transform.make_operator( - op_type = "LinearFP16", - inputs = pattern.nodes[0].operator.inputs, - outputs=[pattern.nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(op) - - self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/classification/cspdarknet53/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/cspdarknet53/ixrt/refine_utils/matmul_to_gemm_pass.py deleted file mode 100644 index 4ebfac4d..00000000 --- a/models/cv/classification/cspdarknet53/ixrt/refine_utils/matmul_to_gemm_pass.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from refine_utils.common import * - -# -# Common pattern Matmul to Gemm -# -class FusedGemmPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True - ) - return graph - - def to_gemm(self, graph, pattern: PatternGraph): - matmul_op = pattern.nodes[0] - inputs = matmul_op.operator.inputs - outputs = matmul_op.operator.outputs - - if len(inputs)!=2 and len(outputs)!=1: - return - - for input in inputs: - if self.transform.is_leaf_variable(input): - return - - print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") - self.transform.delete_operator(matmul_op.operator) - - op = self.transform.make_operator( - op_type = "Gemm", - inputs = inputs, - outputs = outputs, - alpha = 1, - beta = 1, - transB = 1 - ) - - self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/classification/cspdarknet53/ixrt/requirements.txt b/models/cv/classification/cspdarknet53/ixrt/requirements.txt deleted file mode 100644 index 972db4d8..00000000 --- a/models/cv/classification/cspdarknet53/ixrt/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -onnx -tqdm -onnxsim -ppq -mmcv==1.5.3 -mmcls -protobuf==3.20.0 \ No newline at end of file diff --git a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_accuracy.sh b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_accuracy.sh index 02f44d22..e62cc5d7 100644 --- a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_accuracy.sh +++ b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_accuracy.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # @@ -12,7 +13,6 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -#!/bin/bash EXIT_STATUS=0 check_status() @@ -43,22 +43,75 @@ do esac done -DATASETS_DIR=${DATASETS_DIR} -CHECKPOINTS_DIR=${CHECKPOINTS_DIR} -CONFIG_DIR=${CONFIG_DIR} source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx -FINAL_MODEL=${SIM_MODEL} + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi # Build Engine let step++ @@ -68,7 +121,7 @@ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine if [ -f $ENGINE_FILE ];then echo " "Build Engine Skip, $ENGINE_FILE has been existed else - python3 build_engine.py \ + python3 ${RUN_DIR}/build_engine.py \ --precision ${PRECISION} \ --model ${FINAL_MODEL} \ --engine ${ENGINE_FILE} @@ -79,7 +132,7 @@ fi let step++ echo; echo [STEP ${step}] : Inference -python3 inference.py \ +python3 ${RUN_DIR}/inference.py \ --engine_file=${ENGINE_FILE} \ --datasets_dir=${DATASETS_DIR} \ --imgsz=${IMGSIZE} \ diff --git a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_performance.sh b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_performance.sh index 2b6b8a66..05c9986f 100644 --- a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_performance.sh +++ b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_fp16_performance.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # @@ -12,7 +13,6 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -#!/bin/bash EXIT_STATUS=0 check_status() @@ -43,22 +43,75 @@ do esac done -DATASETS_DIR=${DATASETS_DIR} -CHECKPOINTS_DIR=${CHECKPOINTS_DIR} -CONFIG_DIR=${CONFIG_DIR} source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx -FINAL_MODEL=${SIM_MODEL} + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi # Build Engine let step++ @@ -68,7 +121,7 @@ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine if [ -f $ENGINE_FILE ];then echo " "Build Engine Skip, $ENGINE_FILE has been existed else - python3 build_engine.py \ + python3 ${RUN_DIR}/build_engine.py \ --precision ${PRECISION} \ --model ${FINAL_MODEL} \ --engine ${ENGINE_FILE} @@ -79,7 +132,7 @@ fi let step++ echo; echo [STEP ${step}] : Inference -python3 inference.py \ +python3 ${RUN_DIR}/inference.py \ --engine_file=${ENGINE_FILE} \ --datasets_dir=${DATASETS_DIR} \ --imgsz=${IMGSIZE} \ diff --git a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_accuracy.sh b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_accuracy.sh index 30e208be..e3a8bb78 100644 --- a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_accuracy.sh +++ b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_accuracy.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # @@ -12,7 +13,6 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -#!/bin/bash EXIT_STATUS=0 check_status() @@ -43,22 +43,35 @@ do esac done -DATASETS_DIR=${DATASETS_DIR} -CHECKPOINTS_DIR=${CHECKPOINTS_DIR} -CONFIG_DIR=${CONFIG_DIR} source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + # Quant Model if [ $PRECISION == "int8" ];then let step++ @@ -71,7 +84,7 @@ if [ $PRECISION == "int8" ];then SIM_MODEL=${QUANT_EXIST_ONNX} echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed else - python3 quant.py \ + python3 ${RUN_DIR}/quant.py \ --model ${SIM_MODEL} \ --model_name ${MODEL_NAME} \ --dataset_dir ${DATASETS_DIR} \ @@ -87,7 +100,18 @@ if [ $PRECISION == "int8" ];then fi fi -FINAL_MODEL=${SIM_MODEL} +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi # Build Engine let step++ @@ -97,10 +121,10 @@ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine if [ -f $ENGINE_FILE ];then echo " "Build Engine Skip, $ENGINE_FILE has been existed else - python3 build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} echo " "Generate Engine ${ENGINE_FILE} fi @@ -108,7 +132,7 @@ fi let step++ echo; echo [STEP ${step}] : Inference -python3 inference.py \ +python3 ${RUN_DIR}/inference.py \ --engine_file=${ENGINE_FILE} \ --datasets_dir=${DATASETS_DIR} \ --imgsz=${IMGSIZE} \ diff --git a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_performance.sh b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_performance.sh index 82ebd283..c526b81a 100644 --- a/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_performance.sh +++ b/models/cv/classification/cspdarknet53/ixrt/scripts/infer_cspdarknet53_int8_performance.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # @@ -12,7 +13,6 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -#!/bin/bash EXIT_STATUS=0 check_status() @@ -43,22 +43,35 @@ do esac done -DATASETS_DIR=${DATASETS_DIR} -CHECKPOINTS_DIR=${CHECKPOINTS_DIR} -CONFIG_DIR=${CONFIG_DIR} source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + # Quant Model if [ $PRECISION == "int8" ];then let step++ @@ -71,7 +84,7 @@ if [ $PRECISION == "int8" ];then SIM_MODEL=${QUANT_EXIST_ONNX} echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed else - python3 quant.py \ + python3 ${RUN_DIR}/quant.py \ --model ${SIM_MODEL} \ --model_name ${MODEL_NAME} \ --dataset_dir ${DATASETS_DIR} \ @@ -87,7 +100,18 @@ if [ $PRECISION == "int8" ];then fi fi -FINAL_MODEL=${SIM_MODEL} +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi # Build Engine let step++ @@ -97,19 +121,18 @@ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine if [ -f $ENGINE_FILE ];then echo " "Build Engine Skip, $ENGINE_FILE has been existed else - python3 build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} echo " "Generate Engine ${ENGINE_FILE} fi - # Inference let step++ echo; echo [STEP ${step}] : Inference -python3 inference.py \ +python3 ${RUN_DIR}/inference.py \ --engine_file=${ENGINE_FILE} \ --datasets_dir=${DATASETS_DIR} \ --imgsz=${IMGSIZE} \ diff --git a/models/cv/classification/cspresnet50/ixrt/README.md b/models/cv/classification/cspresnet50/ixrt/README.md index 0dab1878..73a67292 100644 --- a/models/cv/classification/cspresnet50/ixrt/README.md +++ b/models/cv/classification/cspresnet50/ixrt/README.md @@ -26,7 +26,9 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt + +pip3 install mmcls==0.24.0 mmcv==1.5.3 ``` ### Model Conversion @@ -45,10 +47,10 @@ python3 export_onnx.py \ ```bash export PROJ_DIR=./ -export DATASETS_DIR=/path/to/imagenet_val +export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/CSPRESNET50_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/CSPRESNET50_CONFIG ``` ### FP16 diff --git a/models/cv/classification/cspresnet50/ixrt/build_engine.py b/models/cv/classification/cspresnet50/ixrt/build_engine.py deleted file mode 100644 index 32f549d8..00000000 --- a/models/cv/classification/cspresnet50/ixrt/build_engine.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/cspresnet50/ixrt/calibration_dataset.py b/models/cv/classification/cspresnet50/ixrt/calibration_dataset.py deleted file mode 100644 index b394c76b..00000000 --- a/models/cv/classification/cspresnet50/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh b/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh index d3e995c4..64dbabb3 100644 --- a/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh +++ b/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh @@ -25,7 +25,8 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip3 install mmcls==0.24.0 mmcv==1.5.3 +pip install -r ../../ixrt_common/requirements.txt unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ mkdir -p checkpoints python3 export_onnx.py \ diff --git a/models/cv/classification/cspresnet50/ixrt/common.py b/models/cv/classification/cspresnet50/ixrt/common.py deleted file mode 100644 index abdc147c..00000000 --- a/models/cv/classification/cspresnet50/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/cspresnet50/ixrt/inference.py b/models/cv/classification/cspresnet50/ixrt/inference.py deleted file mode 100644 index 11a90c79..00000000 --- a/models/cv/classification/cspresnet50/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/cspresnet50/ixrt/modify_batchsize.py b/models/cv/classification/cspresnet50/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/cspresnet50/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/cspresnet50/ixrt/quant.py b/models/cv/classification/cspresnet50/ixrt/quant.py deleted file mode 100644 index 8006db24..00000000 --- a/models/cv/classification/cspresnet50/ixrt/quant.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/cspresnet50/ixrt/refine_model.py b/models/cv/classification/cspresnet50/ixrt/refine_model.py deleted file mode 100644 index 000ee4dc..00000000 --- a/models/cv/classification/cspresnet50/ixrt/refine_model.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/cspresnet50/ixrt/requirements.txt b/models/cv/classification/cspresnet50/ixrt/requirements.txt deleted file mode 100644 index 566974bb..00000000 --- a/models/cv/classification/cspresnet50/ixrt/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -tqdm -tabulate -onnx -onnxsim -opencv-python==4.6.0.66 -mmcls==0.24.0 -mmcv==1.5.3 -pycuda \ No newline at end of file diff --git a/models/cv/classification/cspresnet50/ixrt/simplify_model.py b/models/cv/classification/cspresnet50/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/cspresnet50/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/deit_tiny/ixrt/README.md b/models/cv/classification/deit_tiny/ixrt/README.md index e7391bce..73362c8b 100644 --- a/models/cv/classification/deit_tiny/ixrt/README.md +++ b/models/cv/classification/deit_tiny/ixrt/README.md @@ -27,28 +27,32 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt +pip3 install mmcv==1.5.3 mmcls==0.24.0 ``` ### Model Conversion ```bash # git clone mmpretrain -git clone --depth 1 -b v1.1.0 https://github.com/open-mmlab/mmpretrain.git -(cd mmpretrain/ && python3 setup.py develop) +git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git +mkdir checkpoints # export onnx model -python3 export.py --cfg mmpretrain/configs/deit/deit-tiny_4xb256_in1k.py --weight deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth --output deit_tiny.onnx +python3 export.py --cfg mmpretrain/configs/deit/deit-tiny_pt-4xb256_in1k.py --weight deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth --output checkpoints/deit_tiny.onnx # Use onnxsim optimize onnx model -onnxsim deit_tiny.onnx deit_tiny_opt.onnx - +onnxsim checkpoints/deit_tiny.onnx checkpoints/deit_tiny_opt.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/DEIT_TINY_CONFIG ``` ### FP16 diff --git a/models/cv/classification/deit_tiny/ixrt/build_engine.py b/models/cv/classification/deit_tiny/ixrt/build_engine.py deleted file mode 100644 index 038c15d5..00000000 --- a/models/cv/classification/deit_tiny/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/deit_tiny/ixrt/calibration_dataset.py b/models/cv/classification/deit_tiny/ixrt/calibration_dataset.py deleted file mode 100644 index ec931c65..00000000 --- a/models/cv/classification/deit_tiny/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=True, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader diff --git a/models/cv/classification/deit_tiny/ixrt/ci/prepare.sh b/models/cv/classification/deit_tiny/ixrt/ci/prepare.sh new file mode 100644 index 00000000..0cb06a89 --- /dev/null +++ b/models/cv/classification/deit_tiny/ixrt/ci/prepare.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip install -r ../../ixrt_common/requirements.txt +pip install mmcv==1.5.3 mmcls==0.24.0 +unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ +mkdir -p checkpoints +python3 export.py --cfg mmpretrain/configs/deit/deit-tiny_pt-4xb256_in1k.py --weight deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth --output checkpoints/deit_tiny.onnx +onnxsim checkpoints/deit_tiny.onnx checkpoints/deit_tiny_opt.onnx \ No newline at end of file diff --git a/models/cv/classification/deit_tiny/ixrt/common.py b/models/cv/classification/deit_tiny/ixrt/common.py deleted file mode 100644 index fd6a84d8..00000000 --- a/models/cv/classification/deit_tiny/ixrt/common.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes": size, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/classification/deit_tiny/ixrt/export.py b/models/cv/classification/deit_tiny/ixrt/export.py index b2f2f142..0078670f 100644 --- a/models/cv/classification/deit_tiny/ixrt/export.py +++ b/models/cv/classification/deit_tiny/ixrt/export.py @@ -15,7 +15,7 @@ import argparse import torch -from mmpretrain.apis import init_model +from mmcls.apis import init_model class Model(torch.nn.Module): def __init__(self, config_file, checkpoint_file): diff --git a/models/cv/classification/deit_tiny/ixrt/inference.py b/models/cv/classification/deit_tiny/ixrt/inference.py deleted file mode 100644 index 22f1644c..00000000 --- a/models/cv/classification/deit_tiny/ixrt/inference.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/deit_tiny/ixrt/modify_batchsize.py b/models/cv/classification/deit_tiny/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/deit_tiny/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/deit_tiny/ixrt/requirements.txt b/models/cv/classification/deit_tiny/ixrt/requirements.txt deleted file mode 100644 index 520130b7..00000000 --- a/models/cv/classification/deit_tiny/ixrt/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -ppq -tqdm -cuda-python \ No newline at end of file diff --git a/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_accuracy.sh b/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_accuracy.sh index 0379dbe2..b743d708 100644 --- a/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_accuracy.sh +++ b/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/DEIT_TINY_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_performance.sh b/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_performance.sh index 18b48851..e7a4f1a7 100644 --- a/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_performance.sh +++ b/models/cv/classification/deit_tiny/ixrt/scripts/infer_deit_tiny_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/DEIT_TINY_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/deit_tiny/ixrt/simplify_model.py b/models/cv/classification/deit_tiny/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/deit_tiny/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/densenet121/ixrt/README.md b/models/cv/classification/densenet121/ixrt/README.md index c36f7dad..0136d3ab 100644 --- a/models/cv/classification/densenet121/ixrt/README.md +++ b/models/cv/classification/densenet121/ixrt/README.md @@ -14,6 +14,8 @@ Dense Convolutional Network (DenseNet), connects each layer to every other layer ### Prepare Resources +Pretrained model: + Dataset: to download the validation dataset. ### Install Dependencies @@ -25,23 +27,24 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export_onnx.py --output_model checkpoints/densenet121.onnx +python3 ../../ixrt_common/export.py --model-name densenet121 --weight densenet121-a639ec97.pth --output checkpoints/densenet121.onnx ``` ## Model Inference ```bash +export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/DENSENET121_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/DENSENET121_CONFIG ``` ### FP16 diff --git a/models/cv/classification/densenet121/ixrt/build_engine.py b/models/cv/classification/densenet121/ixrt/build_engine.py deleted file mode 100644 index 01e126bc..00000000 --- a/models/cv/classification/densenet121/ixrt/build_engine.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt -from calibration_dataset import getdataloader -import cuda.cudart as cudart - -def assertSuccess(err): - assert(err == cudart.cudaError_t.cudaSuccess) - -class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): - - def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): - super().__init__() - self.cache_file = cache_file - self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) - self.batch_generator = iter(self.image_batcher) - size = img_sz*img_sz*3*bsz - __import__('pdb').set_trace() - err, self.batch_allocation = cudart.cudaMalloc(size) - assertSuccess(err) - - def __del__(self): - err,= cudart.cudaFree(self.batch_allocation) - assertSuccess(err) - - def get_batch_size(self): - return self.image_batcher.batch_size - - def get_batch(self, names): - try: - batch, _ = next(self.batch_generator) - batch = batch.numpy() - __import__('pdb').set_trace() - cudart.cudaMemcpy(self.batch_allocation, - np.ascontiguousarray(batch), - batch.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) - return [int(self.batch_allocation)] - except StopIteration: - return None - - def read_calibration_cache(self): - if os.path.exists(self.cache_file): - with open(self.cache_file, "rb") as f: - return f.read() - - def write_calibration_cache(self, cache): - with open(self.cache_file, "wb") as f: - f.write(cache) - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - print("precision : ", precision) - build_config.set_flag(precision) - if config.precision == "int8": - build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - args = parser.parse_args() - return args - -if __name__ == "__main__": - # cali = EngineCalibrator("tmp", "/home/qiang.zhang/data/imagenet_val/") - # print(cali.get_batch_size()) - # print(cali.get_batch("hello")) - args = parse_args() - main(args) diff --git a/models/cv/classification/densenet121/ixrt/calibration_dataset.py b/models/cv/classification/densenet121/ixrt/calibration_dataset.py deleted file mode 100644 index ec931c65..00000000 --- a/models/cv/classification/densenet121/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=True, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader diff --git a/models/cv/classification/densenet121/ixrt/ci/prepare.sh b/models/cv/classification/densenet121/ixrt/ci/prepare.sh index c3103b01..3ac521c0 100644 --- a/models/cv/classification/densenet121/ixrt/ci/prepare.sh +++ b/models/cv/classification/densenet121/ixrt/ci/prepare.sh @@ -25,8 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -mkdir -p /root/.cache/torch/hub/checkpoints/ -ln -s /root/data/checkpoints/densenet121.pth /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth -python3 export_onnx.py --output_model ./checkpoints/densenet121.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name densenet121 --weight densenet121-a639ec97.pth --output checkpoints/densenet121.onnx \ No newline at end of file diff --git a/models/cv/classification/densenet121/ixrt/common.py b/models/cv/classification/densenet121/ixrt/common.py deleted file mode 100644 index 0458195e..00000000 --- a/models/cv/classification/densenet121/ixrt/common.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/densenet121/ixrt/export_onnx.py b/models/cv/classification/densenet121/ixrt/export_onnx.py deleted file mode 100644 index ff86753a..00000000 --- a/models/cv/classification/densenet121/ixrt/export_onnx.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -import torchvision.models as models -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = models.densenet121(pretrained=True) -model.cuda() -model.eval() -input = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() diff --git a/models/cv/classification/densenet121/ixrt/inference.py b/models/cv/classification/densenet121/ixrt/inference.py deleted file mode 100644 index 50aafd4f..00000000 --- a/models/cv/classification/densenet121/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/densenet121/ixrt/modify_batchsize.py b/models/cv/classification/densenet121/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/densenet121/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/densenet121/ixrt/quant.py b/models/cv/classification/densenet121/ixrt/quant.py deleted file mode 100644 index 5d71c828..00000000 --- a/models/cv/classification/densenet121/ixrt/quant.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: - -在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 - -This file will show you how to quantize your network with PPQ - You should prepare your model and calibration dataset as follow: - - ~/working/model.onnx <-- your model - ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset - -if you are using caffe model: - ~/working/model.caffemdoel <-- your model - ~/working/model.prototext <-- your model - -### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### - -quantized model will be generated at: ~/working/quantized.onnx -""" -from ppq import * -from ppq.api import * -import os -from calibration_dataset import getdataloader -import argparse -import random -import numpy as np -import torch - - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], - default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - - -config = parse_args() - -# modify configuration below: -WORKING_DIRECTORY = 'checkpoints' # choose your working directory -TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform -MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE -INPUT_LAYOUT = 'chw' # input data layout, chw or hwc -NETWORK_INPUTSHAPE = [1, 3, 224, 224] # input shape of your network -EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. -REQUIRE_ANALYSE = False -TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 -# ------------------------------------------------------------------- -# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 -# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx -# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 -# ------------------------------------------------------------------- -graph = None -if MODEL_TYPE == NetworkFramework.ONNX: - graph = load_onnx_graph(onnx_import_file=config.model) -if MODEL_TYPE == NetworkFramework.CAFFE: - graph = load_caffe_graph( - caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), - prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) -assert graph is not None, 'Graph Loading Error, Check your input again.' - -# ------------------------------------------------------------------- -# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 -# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 -# ------------------------------------------------------------------- -QS = QuantizationSettingFactory.default_setting() - -# ------------------------------------------------------------------- -# 下面向你展示了如何使用 finetuning 过程提升量化精度 -# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 -# 开启他们的方式都是 QS.xxxx = True -# 按需使用,不要全部打开,容易起飞 -# ------------------------------------------------------------------- -if TRAINING_YOUR_NETWORK: - QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 - QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 - QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' - - -dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) -# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 -# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 -with ENABLE_CUDA_KERNEL(): - print('网络正量化中,根据你的量化配置,这将需要一段时间:') - quantized = quantize_native_model( - setting=QS, # setting 对象用来控制标准量化逻辑 - model=graph, - calib_dataloader=dataloader, - calib_steps=config.step, - input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 - inputs=None, - # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] - collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, - # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None - platform=TARGET_PLATFORM, - device=EXECUTING_DEVICE, - do_quantize=True) - - # ------------------------------------------------------------------- - # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor - # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 - # 请注意,必须在 export 之前执行此操作。 - # ------------------------------------------------------------------- - executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) - # output = executor.forward(input) - - # ------------------------------------------------------------------- - # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 - # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% - # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 - # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 - # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 - # ------------------------------------------------------------------- - print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') - reports = graphwise_error_analyse( - graph=quantized, running_device=EXECUTING_DEVICE, steps=32, - dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) - for op, snr in reports.items(): - if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') - - if REQUIRE_ANALYSE: - print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') - layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, - interested_outputs=None, - dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) - - # ------------------------------------------------------------------- - # 使用 export_ppq_graph 函数来导出量化后的模型 - # PPQ 会根据你所选择的导出平台来修改模型格式 - # ------------------------------------------------------------------- - print('网络量化结束,正在生成目标文件:') - export_ppq_graph( - graph=quantized, platform=TARGET_PLATFORM, - graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), - config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/densenet121/ixrt/requirements.txt b/models/cv/classification/densenet121/ixrt/requirements.txt deleted file mode 100644 index 01ec4f11..00000000 --- a/models/cv/classification/densenet121/ixrt/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -ppq -pycuda \ No newline at end of file diff --git a/models/cv/classification/densenet121/ixrt/simplify_model.py b/models/cv/classification/densenet121/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/densenet121/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/densenet161/ixrt/README.md b/models/cv/classification/densenet161/ixrt/README.md index 10113c3a..084aaf1c 100644 --- a/models/cv/classification/densenet161/ixrt/README.md +++ b/models/cv/classification/densenet161/ixrt/README.md @@ -26,19 +26,24 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -python3 export.py --weight densenet161-8d451a50.pth --output densenet161.onnx +mkdir checkpoints +python3 export.py --weight densenet161-8d451a50.pth --output checkpoints/densenet161.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/DENSENET161_CONFIG ``` ### FP16 diff --git a/models/cv/classification/densenet161/ixrt/build_engine.py b/models/cv/classification/densenet161/ixrt/build_engine.py deleted file mode 100644 index c0a83a1e..00000000 --- a/models/cv/classification/densenet161/ixrt/build_engine.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np -import torch -import tensorrt -import cuda.cudart as cudart - -def assertSuccess(err): - assert(err == cudart.cudaError_t.cudaSuccess) - - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.FP16 - - build_config.set_flag(precision) - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "float32"], default="float16", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - parser.add_argument( - "--datasets_dir", - type=str, - default="deepsparkinference/data/datasets/imagenet_val/", - help="ImageNet dir", - ) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/models/cv/classification/densenet161/ixrt/calibration_dataset.py b/models/cv/classification/densenet161/ixrt/calibration_dataset.py deleted file mode 100644 index ec931c65..00000000 --- a/models/cv/classification/densenet161/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=True, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader diff --git a/models/cv/classification/densenet161/ixrt/ci/prepare.sh b/models/cv/classification/densenet161/ixrt/ci/prepare.sh index 79d6d753..0c715e25 100644 --- a/models/cv/classification/densenet161/ixrt/ci/prepare.sh +++ b/models/cv/classification/densenet161/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt - -python3 export.py --weight /root/data/checkpoints/densenet161-8d451a50.pth --output densenet161.onnx \ No newline at end of file +pip install -r ../../ixrt_common/requirements.txt +mkdir checkpoints +python3 export.py --weight /root/data/checkpoints/densenet161-8d451a50.pth --output checkpoints/densenet161.onnx \ No newline at end of file diff --git a/models/cv/classification/densenet161/ixrt/common.py b/models/cv/classification/densenet161/ixrt/common.py deleted file mode 100644 index 21c2b399..00000000 --- a/models/cv/classification/densenet161/ixrt/common.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes": size, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/densenet161/ixrt/inference.py b/models/cv/classification/densenet161/ixrt/inference.py deleted file mode 100644 index e7102e50..00000000 --- a/models/cv/classification/densenet161/ixrt/inference.py +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/densenet161/ixrt/modify_batchsize.py b/models/cv/classification/densenet161/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/densenet161/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/densenet161/ixrt/requirements.txt b/models/cv/classification/densenet161/ixrt/requirements.txt deleted file mode 100644 index 4805e251..00000000 --- a/models/cv/classification/densenet161/ixrt/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -cuda-python \ No newline at end of file diff --git a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_accuracy.sh b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_accuracy.sh index dfaf40d7..b743d708 100644 --- a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_accuracy.sh +++ b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/DENSENET_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_performance.sh b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_performance.sh index f09ccfcc..e7a4f1a7 100644 --- a/models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_performance.sh +++ b/models/cv/classification/densenet161/ixrt/scripts/infer_densenet161_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/DENSENET_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/densenet161/ixrt/simplify_model.py b/models/cv/classification/densenet161/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/densenet161/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/densenet169/ixrt/README.md b/models/cv/classification/densenet169/ixrt/README.md index 7fc60063..079b3eab 100644 --- a/models/cv/classification/densenet169/ixrt/README.md +++ b/models/cv/classification/densenet169/ixrt/README.md @@ -27,19 +27,24 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -python3 export.py --weight densenet169-b2777c0a.pth --output densenet169.onnx +mkdir checkpoints +python3 export.py --weight densenet169-b2777c0a.pth --output checkpoints/densenet169.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/DENSENET169_CONFIG ``` ### FP16 diff --git a/models/cv/classification/densenet169/ixrt/build_engine.py b/models/cv/classification/densenet169/ixrt/build_engine.py deleted file mode 100644 index c0a83a1e..00000000 --- a/models/cv/classification/densenet169/ixrt/build_engine.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np -import torch -import tensorrt -import cuda.cudart as cudart - -def assertSuccess(err): - assert(err == cudart.cudaError_t.cudaSuccess) - - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.FP16 - - build_config.set_flag(precision) - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "float32"], default="float16", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - parser.add_argument( - "--datasets_dir", - type=str, - default="deepsparkinference/data/datasets/imagenet_val/", - help="ImageNet dir", - ) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/models/cv/classification/densenet169/ixrt/calibration_dataset.py b/models/cv/classification/densenet169/ixrt/calibration_dataset.py deleted file mode 100644 index ec931c65..00000000 --- a/models/cv/classification/densenet169/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=True, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader diff --git a/models/cv/classification/densenet169/ixrt/ci/prepare.sh b/models/cv/classification/densenet169/ixrt/ci/prepare.sh index da20933f..8e35d120 100644 --- a/models/cv/classification/densenet169/ixrt/ci/prepare.sh +++ b/models/cv/classification/densenet169/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt - -python3 export.py --weight /root/data/checkpoints/densenet169-b2777c0a.pth --output densenet169.onnx \ No newline at end of file +pip install -r ../../ixrt_common/requirements.txt +mkdir checkpoints +python3 export.py --weight /root/data/checkpoints/densenet169-b2777c0a.pth --output checkpoints/densenet169.onnx \ No newline at end of file diff --git a/models/cv/classification/densenet169/ixrt/common.py b/models/cv/classification/densenet169/ixrt/common.py deleted file mode 100644 index 21c2b399..00000000 --- a/models/cv/classification/densenet169/ixrt/common.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes": size, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/densenet169/ixrt/inference.py b/models/cv/classification/densenet169/ixrt/inference.py deleted file mode 100644 index e7102e50..00000000 --- a/models/cv/classification/densenet169/ixrt/inference.py +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/densenet169/ixrt/modify_batchsize.py b/models/cv/classification/densenet169/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/densenet169/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/densenet169/ixrt/requirements.txt b/models/cv/classification/densenet169/ixrt/requirements.txt deleted file mode 100644 index 4805e251..00000000 --- a/models/cv/classification/densenet169/ixrt/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -cuda-python \ No newline at end of file diff --git a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_accuracy.sh b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_accuracy.sh index eb17c406..b743d708 100644 --- a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_accuracy.sh +++ b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR=${DATASETS_DIR} -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/DENSENET_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_performance.sh b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_performance.sh index f09ccfcc..e7a4f1a7 100644 --- a/models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_performance.sh +++ b/models/cv/classification/densenet169/ixrt/scripts/infer_densenet169_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/DENSENET_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/densenet169/ixrt/simplify_model.py b/models/cv/classification/densenet169/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/densenet169/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/densenet201/ixrt/README.md b/models/cv/classification/densenet201/ixrt/README.md index 52b2ad36..fa772090 100644 --- a/models/cv/classification/densenet201/ixrt/README.md +++ b/models/cv/classification/densenet201/ixrt/README.md @@ -27,19 +27,24 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -python3 export.py --weight densenet201-c1103571.pth --output densenet201.onnx +mkdir checkpoints +python3 export.py --weight densenet201-c1103571.pth --output checkpoints/densenet201.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/DENSENET201_CONFIG ``` ### FP16 diff --git a/models/cv/classification/densenet201/ixrt/build_engine.py b/models/cv/classification/densenet201/ixrt/build_engine.py deleted file mode 100644 index 038c15d5..00000000 --- a/models/cv/classification/densenet201/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/densenet201/ixrt/calibration_dataset.py b/models/cv/classification/densenet201/ixrt/calibration_dataset.py deleted file mode 100644 index ec931c65..00000000 --- a/models/cv/classification/densenet201/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=True, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader diff --git a/models/cv/classification/densenet201/ixrt/ci/prepare.sh b/models/cv/classification/densenet201/ixrt/ci/prepare.sh new file mode 100644 index 00000000..41824bd3 --- /dev/null +++ b/models/cv/classification/densenet201/ixrt/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip install -r ../../ixrt_common/requirements.txt +mkdir checkpoints +python3 export.py --weight densenet201-c1103571.pth --output checkpoints/densenet201.onnx \ No newline at end of file diff --git a/models/cv/classification/densenet201/ixrt/common.py b/models/cv/classification/densenet201/ixrt/common.py deleted file mode 100644 index fd6a84d8..00000000 --- a/models/cv/classification/densenet201/ixrt/common.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes": size, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/classification/densenet201/ixrt/inference.py b/models/cv/classification/densenet201/ixrt/inference.py deleted file mode 100644 index 22f1644c..00000000 --- a/models/cv/classification/densenet201/ixrt/inference.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/densenet201/ixrt/modify_batchsize.py b/models/cv/classification/densenet201/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/densenet201/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/densenet201/ixrt/requirements.txt b/models/cv/classification/densenet201/ixrt/requirements.txt deleted file mode 100644 index 520130b7..00000000 --- a/models/cv/classification/densenet201/ixrt/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -ppq -tqdm -cuda-python \ No newline at end of file diff --git a/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_accuracy.sh b/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_accuracy.sh index 30024cbc..b743d708 100644 --- a/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_accuracy.sh +++ b/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/DENSENET201_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_performance.sh b/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_performance.sh index 42b81e12..e7a4f1a7 100644 --- a/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_performance.sh +++ b/models/cv/classification/densenet201/ixrt/scripts/infer_densenet201_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/DENSENET201_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/densenet201/ixrt/simplify_model.py b/models/cv/classification/densenet201/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/densenet201/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/efficientnet_b0/ixrt/README.md b/models/cv/classification/efficientnet_b0/ixrt/README.md index 7bc1444c..9ca340be 100644 --- a/models/cv/classification/efficientnet_b0/ixrt/README.md +++ b/models/cv/classification/efficientnet_b0/ixrt/README.md @@ -27,19 +27,24 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -python3 export_onnx.py --origin_model /path/to/efficientnet_b0_rwightman-3dd342df.pth --output_model efficientnet_b0.onnx +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name efficientnet_b0 --weight /path/to/efficientnet_b0_rwightman-3dd342df.pth --output checkpoints/efficientnet_b0.onnx ``` ## Model Inference ```bash +export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/EFFICIENTNET_B0_CONFIG ``` ### FP16 diff --git a/models/cv/classification/efficientnet_b0/ixrt/build_engine.py b/models/cv/classification/efficientnet_b0/ixrt/build_engine.py deleted file mode 100644 index 24979eb8..00000000 --- a/models/cv/classification/efficientnet_b0/ixrt/build_engine.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt -from calibration_dataset import getdataloader -import cuda.cudart as cudart - - -def assertSuccess(err): - assert(err == cudart.cudaError_t.cudaSuccess) - - -class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): - - def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): - super().__init__() - self.cache_file = cache_file - self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) - self.batch_generator = iter(self.image_batcher) - size = img_sz*img_sz*3*bsz - # __import__('pdb').set_trace() - err, self.batch_allocation = cudart.cudaMalloc(size) - assertSuccess(err) - - def __del__(self): - err,= cudart.cudaFree(self.batch_allocation) - assertSuccess(err) - - def get_batch_size(self): - return self.image_batcher.batch_size - - def get_batch(self, names): - try: - batch, _ = next(self.batch_generator) - batch = batch.numpy() - __import__('pdb').set_trace() - cudart.cudaMemcpy(self.batch_allocation, - np.ascontiguousarray(batch), - batch.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) - return [int(self.batch_allocation)] - except StopIteration: - return None - - def read_calibration_cache(self): - if os.path.exists(self.cache_file): - with open(self.cache_file, "rb") as f: - return f.read() - - def write_calibration_cache(self, cache): - with open(self.cache_file, "wb") as f: - f.write(cache) - - -def main(config): - # IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - print("precision : ", precision) - build_config.set_flag(precision) - if config.precision == "int8": - build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - args = parser.parse_args() - return args - - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b0/ixrt/build_engine_by_write_qparams.py b/models/cv/classification/efficientnet_b0/ixrt/build_engine_by_write_qparams.py deleted file mode 100644 index 8b6678c8..00000000 --- a/models/cv/classification/efficientnet_b0/ixrt/build_engine_by_write_qparams.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os - -import tensorrt -import tensorrt as trt - -TRT_LOGGER = trt.Logger(tensorrt.Logger.WARNING) - -EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - - -def GiB(val): - return val * 1 << 30 - - -def json_load(filename): - with open(filename) as json_file: - data = json.load(json_file) - return data - - -def setDynamicRange(network, json_file): - """Sets ranges for network layers.""" - quant_param_json = json_load(json_file) - act_quant = quant_param_json["act_quant_info"] - - for i in range(network.num_inputs): - input_tensor = network.get_input(i) - if act_quant.__contains__(input_tensor.name): - print(input_tensor.name) - value = act_quant[input_tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - input_tensor.dynamic_range = (tensor_min, tensor_max) - - for i in range(network.num_layers): - layer = network.get_layer(i) - - for output_index in range(layer.num_outputs): - tensor = layer.get_output(output_index) - - if act_quant.__contains__(tensor.name): - value = act_quant[tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - tensor.dynamic_range = (tensor_min, tensor_max) - else: - print("\033[1;32m%s\033[0m" % tensor.name) - - -def build_engine(onnx_file, json_file, engine_file): - builder = trt.Builder(TRT_LOGGER) - network = builder.create_network(EXPLICIT_BATCH) - - config = builder.create_builder_config() - - # If it is a dynamic onnx model , you need to add the following. - # profile = builder.create_optimization_profile() - # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) - # config.add_optimization_profile(profile) - - parser = trt.OnnxParser(network, TRT_LOGGER) - # config.max_workspace_size = GiB(1) - - if not os.path.exists(onnx_file): - quit("ONNX file {} not found".format(onnx_file)) - - with open(onnx_file, "rb") as model: - if not parser.parse(model.read()): - print("ERROR: Failed to parse the ONNX file.") - for error in range(parser.num_errors): - print(parser.get_error(error)) - return None - - config.set_flag(trt.BuilderFlag.INT8) - - setDynamicRange(network, json_file) - - engine = builder.build_engine(network, config) - - with open(engine_file, "wb") as f: - f.write(engine.serialize()) - - -if __name__ == "__main__": - # Add plugins if needed - # import ctypes - # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") - parser = argparse.ArgumentParser( - description="Writing qparams to onnx to convert tensorrt engine." - ) - parser.add_argument("--onnx", type=str, default=None) - parser.add_argument("--qparam_json", type=str, default=None) - parser.add_argument("--engine", type=str, default=None) - arg = parser.parse_args() - - build_engine(arg.onnx, arg.qparam_json, arg.engine) - print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b0/ixrt/calibration_dataset.py b/models/cv/classification/efficientnet_b0/ixrt/calibration_dataset.py deleted file mode 100644 index 5f31de97..00000000 --- a/models/cv/classification/efficientnet_b0/ixrt/calibration_dataset.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - number_ =3 - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(number_,(number_+num_samples)) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - # num_samples = min(total_sample, step * batch_size) - num_samples = 16 * batch_size - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh index ec7e8404..7c7ba3e1 100644 --- a/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh +++ b/models/cv/classification/efficientnet_b0/ixrt/ci/prepare.sh @@ -25,5 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt -python3 export_onnx.py --origin_model /root/data/checkpoints/efficientnet_b0_rwightman-3dd342df.pth --output_model efficientnet_b0.onnx \ No newline at end of file +pip install -r ../../ixrt_common/requirements.txt +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name efficientnet_b0 --weight /root/data/checkpoints/efficientnet_b0_rwightman-3dd342df.pth --output checkpoints/efficientnet_b0.onnx \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b0/ixrt/common.py b/models/cv/classification/efficientnet_b0/ixrt/common.py deleted file mode 100644 index e287f3f1..00000000 --- a/models/cv/classification/efficientnet_b0/ixrt/common.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b0/ixrt/export_onnx.py b/models/cv/classification/efficientnet_b0/ixrt/export_onnx.py deleted file mode 100644 index 8970979c..00000000 --- a/models/cv/classification/efficientnet_b0/ixrt/export_onnx.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision.models as models -import argparse - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - - -args = parse_args() -model = models.efficientnet_b0() -# model = models.resnet18(pretrained=True) -model.load_state_dict(torch.load(args.origin_model)) -model.cuda() -model.eval() -inputx = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - - -torch.onnx.export(model, - inputx, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(f"Convert onnx model in {export_onnx_file}") -exit() diff --git a/models/cv/classification/efficientnet_b0/ixrt/inference.py b/models/cv/classification/efficientnet_b0/ixrt/inference.py deleted file mode 100644 index f1155702..00000000 --- a/models/cv/classification/efficientnet_b0/ixrt/inference.py +++ /dev/null @@ -1,164 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS ACC") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - - -if __name__ == "__main__": - config = parse_config() - main(config) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b0/ixrt/quant.py b/models/cv/classification/efficientnet_b0/ixrt/quant.py deleted file mode 100644 index 72f85638..00000000 --- a/models/cv/classification/efficientnet_b0/ixrt/quant.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from ppq import * -from ppq.api import * -import os -from calibration_dataset import getdataloader -import argparse -import random -import numpy as np -import torch - -random.seed(42) -np.random.seed(42) -torch.manual_seed(42) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], - default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=32) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - - -config = parse_args() - -# modify configuration below: -WORKING_DIRECTORY = 'checkpoints' # choose your working directory -TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform -MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE -INPUT_LAYOUT = 'chw' # input data layout, chw or hwc -NETWORK_INPUTSHAPE = [config.bsz, 3, 224, 224] # input shape of your network -EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. -REQUIRE_ANALYSE = False -TRAINING_YOUR_NETWORK = True # 是否需要 Finetuning 一下你的网络 -# ------------------------------------------------------------------- -# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 -# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx -# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 -# ------------------------------------------------------------------- -graph = None -if MODEL_TYPE == NetworkFramework.ONNX: - graph = load_onnx_graph(onnx_import_file=config.model) -if MODEL_TYPE == NetworkFramework.CAFFE: - graph = load_caffe_graph( - caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), - prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) -assert graph is not None, 'Graph Loading Error, Check your input again.' - -# ------------------------------------------------------------------- -# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 -# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 -# ------------------------------------------------------------------- -QS = QuantizationSettingFactory.default_setting() - -# ------------------------------------------------------------------- -# 下面向你展示了如何使用 finetuning 过程提升量化精度 -# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 -# 开启他们的方式都是 QS.xxxx = True -# 按需使用,不要全部打开,容易起飞 -# ------------------------------------------------------------------- -if TRAINING_YOUR_NETWORK: - QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 - QS.lsq_optimization_setting.steps = 800 # 再训练步数,影响训练时间,500 步大概几分钟 - QS.lsq_optimization_setting.collecting_device = 'cpu' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' - - -dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) -# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 -# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 -with ENABLE_CUDA_KERNEL(): - print('网络正量化中,根据你的量化配置,这将需要一段时间:') - quantized = quantize_native_model( - setting=QS, # setting 对象用来控制标准量化逻辑 - model=graph, - calib_dataloader=dataloader, - calib_steps=config.step, - input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 - inputs=None, - # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] - collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, - # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None - platform=TARGET_PLATFORM, - device=EXECUTING_DEVICE, - do_quantize=True) - - # ------------------------------------------------------------------- - # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor - # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 - # 请注意,必须在 export 之前执行此操作。 - # ------------------------------------------------------------------- - executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) - # output = executor.forward(input) - - # ------------------------------------------------------------------- - # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 - # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% - # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 - # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 - # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 - # ------------------------------------------------------------------- - print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') - reports = graphwise_error_analyse( - graph=quantized, running_device=EXECUTING_DEVICE, steps=32, - dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) - for op, snr in reports.items(): - if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') - - if REQUIRE_ANALYSE: - print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') - layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, - interested_outputs=None, - dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) - - # ------------------------------------------------------------------- - # 使用 export_ppq_graph 函数来导出量化后的模型 - # PPQ 会根据你所选择的导出平台来修改模型格式 - # ------------------------------------------------------------------- - print('网络量化结束,正在生成目标文件:') - export_ppq_graph( - graph=quantized, platform=TARGET_PLATFORM, - graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), - config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b0/ixrt/requirements.txt b/models/cv/classification/efficientnet_b0/ixrt/requirements.txt deleted file mode 100644 index 46a30ba2..00000000 --- a/models/cv/classification/efficientnet_b0/ixrt/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -ppq==0.6.6 -protobuf==3.20.3 -pycuda \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh index dfc034b9..792c25ea 100644 --- a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh +++ b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_accuracy.sh @@ -13,26 +13,130 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -batchsize=32 -model_path="efficientnet_b0" -# model_path="resnet18" -datasets_path=${DATASETS_DIR} - -# change batchsize -python3 modify_batchsize.py \ - --batch_size ${batchsize} \ - --origin_model ${model_path}.onnx \ - --output_model ${model_path}_bs32.onnx - -# build engine -python3 build_engine.py \ - --model ${model_path}_bs32.onnx \ - --precision float16 \ - --engine ${model_path}_fp16.engine - -# inference -python3 inference.py \ - --test_mode ACC \ - --engine_file ${model_path}_fp16.engine \ - --bsz ${batchsize} \ - --datasets_dir ${datasets_path} \ No newline at end of file +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh index 818c066d..61051638 100644 --- a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh +++ b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_fp16_performance.sh @@ -13,28 +13,130 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -batchsize=32 -model_path="efficientnet_b0" -# model_path="resnet18" -datasets_path=${DATASETS_DIR} - -# change batchsize -python3 modify_batchsize.py \ - --batch_size ${batchsize} \ - --origin_model ${model_path}.onnx \ - --output_model ${model_path}_bs32.onnx - -# build engine -python3 build_engine.py \ - --model ${model_path}_bs32.onnx \ - --precision float16 \ - --engine ${model_path}_fp16.engine - -# inference -python3 inference.py \ - --test_mode FPS \ - --engine_file ${model_path}_fp16.engine \ - --bsz ${batchsize} \ - --datasets_dir ${datasets_path} \ - --warm_up 5 \ - --loop_count 20 \ No newline at end of file +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=float16 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh index 97892512..a66d6a25 100644 --- a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh +++ b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_accuracy.sh @@ -13,34 +13,132 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -batchsize=32 -model_path="efficientnet_b0" -# model_path="resnet18" -datasets_path=${DATASETS_DIR} - -# change batchsize -python3 modify_batchsize.py \ - --batch_size ${batchsize} \ - --origin_model ${model_path}.onnx \ - --output_model ${model_path}_bs32.onnx - -# quant -python3 quant.py \ - --model_name ${model_path} \ - --model ${model_path}_bs32.onnx \ - --dataset_dir ${datasets_path} \ - --bsz ${batchsize} \ - --save_dir ./ - -# build engine -python3 build_engine_by_write_qparams.py \ - --onnx quantized_${model_path}.onnx \ - --qparam_json quant_cfg.json \ - --engine ${model_path}_int8.engine - -# inference -python3 inference.py \ - --test_mode ACC \ - --engine_file ${model_path}_int8.engine \ - --bsz ${batchsize} \ - --datasets_dir ${datasets_path} \ No newline at end of file +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=0 +LOOP_COUNT=-1 +RUN_MODE=ACC +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --acc_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_performance.sh b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_performance.sh index e016fd1a..9e246604 100644 --- a/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_performance.sh +++ b/models/cv/classification/efficientnet_b0/ixrt/scripts/infer_efficientnet_b0_int8_performance.sh @@ -13,36 +13,130 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -batchsize=32 -model_path="efficientnet_b0" -# model_path="resnet18" -datasets_path=${DATASETS_DIR} - -# change batchsize -python3 modify_batchsize.py \ - --batch_size ${batchsize} \ - --origin_model ${model_path}.onnx \ - --output_model ${model_path}_bs32.onnx - -# quant -python3 quant.py \ - --model_name ${model_path} \ - --model ${model_path}_bs32.onnx \ - --dataset_dir ${datasets_path} \ - --bsz ${batchsize} \ - --save_dir ./ - -# build engine -python3 build_engine_by_write_qparams.py \ - --onnx quantized_${model_path}.onnx \ - --qparam_json quant_cfg.json \ - --engine ${model_path}_int8.engine - -# inference -python3 inference.py \ - --test_mode FPS \ - --engine_file ${model_path}_int8.engine \ - --bsz ${batchsize} \ - --datasets_dir ${datasets_path} \ - --warm_up 5 \ - --loop_count 20 +EXIT_STATUS=0 +check_status() +{ + if ((${PIPESTATUS[0]} != 0));then + EXIT_STATUS=1 + fi +} + +# Run paraments +BSZ=32 +TGT=-1 +WARM_UP=3 +LOOP_COUNT=20 +RUN_MODE=FPS +PRECISION=int8 + +# Update arguments +index=0 +options=$@ +arguments=($options) +for argument in $options +do + index=`expr $index + 1` + case $argument in + --bs) BSZ=${arguments[index]};; + --tgt) TGT=${arguments[index]};; + esac +done + +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} + +echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} +echo DATASETS_DIR : ${DATASETS_DIR} +echo RUN_DIR : ${RUN_DIR} +echo CONFIG_DIR : ${CONFIG_DIR} +echo ====================== Model Info ====================== +echo Model Name : ${MODEL_NAME} +echo Onnx Path : ${ORIGINE_MODEL} + +step=0 +SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + +# Simplify Model +let step++ +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi + +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi + +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi + +# Inference +let step++ +echo; +echo [STEP ${step}] : Inference +python3 ${RUN_DIR}/inference.py \ + --engine_file=${ENGINE_FILE} \ + --datasets_dir=${DATASETS_DIR} \ + --imgsz=${IMGSIZE} \ + --warm_up=${WARM_UP} \ + --loop_count ${LOOP_COUNT} \ + --test_mode ${RUN_MODE} \ + --fps_target ${TGT} \ + --bsz ${BSZ}; check_status + +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b1/ixrt/README.md b/models/cv/classification/efficientnet_b1/ixrt/README.md index f21bcb7b..e85cef7a 100644 --- a/models/cv/classification/efficientnet_b1/ixrt/README.md +++ b/models/cv/classification/efficientnet_b1/ixrt/README.md @@ -14,6 +14,8 @@ EfficientNet B1 is one of the variants in the EfficientNet family of neural netw ### Prepare Resources +Pretrained model: + Dataset: to download the validation dataset. ### Install Dependencies @@ -25,14 +27,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export_onnx.py --output_model checkpoints/efficientnet-b1.onnx +python3 ../../ixrt_common/export.py --model-name efficientnet_b1 --weight efficientnet_b1-c27df63c.pth --output checkpoints/efficientnet_b1.onnx ``` ## Model Inference @@ -41,8 +43,8 @@ python3 export_onnx.py --output_model checkpoints/efficientnet-b1.onnx export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/EFFICIENTNET_B1_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/EFFICIENTNET_B1_CONFIG ``` ### FP16 diff --git a/models/cv/classification/efficientnet_b1/ixrt/build_engine.py b/models/cv/classification/efficientnet_b1/ixrt/build_engine.py deleted file mode 100644 index 32f549d8..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/build_engine.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b1/ixrt/calibration_dataset.py b/models/cv/classification/efficientnet_b1/ixrt/calibration_dataset.py deleted file mode 100644 index d7525d51..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/calibration_dataset.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh index d9e780f2..c69f7471 100644 --- a/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh +++ b/models/cv/classification/efficientnet_b1/ixrt/ci/prepare.sh @@ -25,9 +25,7 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -mkdir -p /root/.cache/torch/hub/checkpoints/ -ln -s /root/data/checkpoints/efficientnet_b1_rwightman-bac287d4.pth /root/.cache/torch/hub/checkpoints/efficientnet_b1_rwightman-bac287d4.pth -python3 export_onnx.py --output_model checkpoints/efficientnet-b1.onnx \ No newline at end of file +python3 ../../ixrt_common/export_onnx.py --model-name efficientnet_b1 --output_model checkpoints/efficientnet-b1.onnx \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b1/ixrt/common.py b/models/cv/classification/efficientnet_b1/ixrt/common.py deleted file mode 100644 index abdc147c..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/efficientnet_b1/ixrt/export_onnx.py b/models/cv/classification/efficientnet_b1/ixrt/export_onnx.py deleted file mode 100644 index 21b41082..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/export_onnx.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -import torchvision.models as models -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = models.efficientnet_b1(pretrained=True) -model.cuda() -model.eval() -input = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b1/ixrt/inference.py b/models/cv/classification/efficientnet_b1/ixrt/inference.py deleted file mode 100644 index 11a90c79..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/efficientnet_b1/ixrt/modify_batchsize.py b/models/cv/classification/efficientnet_b1/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/efficientnet_b1/ixrt/quant.py b/models/cv/classification/efficientnet_b1/ixrt/quant.py deleted file mode 100644 index 8006db24..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/quant.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b1/ixrt/refine_model.py b/models/cv/classification/efficientnet_b1/ixrt/refine_model.py deleted file mode 100644 index 000ee4dc..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/refine_model.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/efficientnet_b1/ixrt/refine_utils/__init__.py b/models/cv/classification/efficientnet_b1/ixrt/refine_utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/models/cv/classification/efficientnet_b1/ixrt/refine_utils/common.py b/models/cv/classification/efficientnet_b1/ixrt/refine_utils/common.py deleted file mode 100644 index b19dccfc..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/refine_utils/common.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from typing import Union, Callable, List - -from tensorrt.deploy.api import * -from tensorrt.deploy.backend.onnx.converter import default_converter -from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type -from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr -from tensorrt.deploy.ir.operator_type import OperatorType as OP -from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name -from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence -from tensorrt.deploy.ir import Graph -from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator -from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator - -def find_sequence_subgraph(graph, - pattern: Union[List[str], PatternGraph], - callback: Callable[[Graph, PatternGraph], None], - strict=True): - if isinstance(pattern, List): - pattern = build_sequence_graph(pattern) - - matcher = GraphMatcher(pattern, strict=strict) - return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b1/ixrt/refine_utils/linear_pass.py b/models/cv/classification/efficientnet_b1/ixrt/refine_utils/linear_pass.py deleted file mode 100644 index bab7e575..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/refine_utils/linear_pass.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import dataclasses - -from refine_utils.common import * - -# AXB=C, Only for B is initializer - -class FusedLinearPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True - ) - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True - ) - return graph - - def to_linear_with_bias(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - add = pattern.nodes[1] - if len(add.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - bias_var = None - for input in add.operator.inputs: - if input not in matmul.operator.outputs: - bias_var = input - - inputs = matmul.operator.inputs - inputs.append(bias_var) - outputs = add.operator.outputs - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 1, - "act_type":"none" - } - - self.transform.make_operator( - "LinearFP16", - inputs=inputs, - outputs=outputs, - **attributes - ) - - self.transform.delete_operator(add.operator) - self.transform.delete_operator(matmul.operator) - - def to_linear(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - if len(matmul.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 0, - "act_type": "none" - } - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - op = self.transform.make_operator( - op_type = "LinearFP16", - inputs = pattern.nodes[0].operator.inputs, - outputs=[pattern.nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(op) - - self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b1/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/efficientnet_b1/ixrt/refine_utils/matmul_to_gemm_pass.py deleted file mode 100644 index 5823c4a5..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/refine_utils/matmul_to_gemm_pass.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from refine_utils.common import * - -# -# Common pattern Matmul to Gemm -# -class FusedGemmPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True - ) - return graph - - def to_gemm(self, graph, pattern: PatternGraph): - matmul_op = pattern.nodes[0] - inputs = matmul_op.operator.inputs - outputs = matmul_op.operator.outputs - - if len(inputs)!=2 and len(outputs)!=1: - return - - for input in inputs: - if self.transform.is_leaf_variable(input): - return - - print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") - self.transform.delete_operator(matmul_op.operator) - - op = self.transform.make_operator( - op_type = "Gemm", - inputs = inputs, - outputs = outputs, - alpha = 1, - beta = 1, - transB = 1 - ) - - self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b1/ixrt/requirements.txt b/models/cv/classification/efficientnet_b1/ixrt/requirements.txt deleted file mode 100644 index bc645b22..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -pycuda \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b1/ixrt/simplify_model.py b/models/cv/classification/efficientnet_b1/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/efficientnet_b1/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/efficientnet_b2/ixrt/README.md b/models/cv/classification/efficientnet_b2/ixrt/README.md index 1a37fb62..e0beff5f 100644 --- a/models/cv/classification/efficientnet_b2/ixrt/README.md +++ b/models/cv/classification/efficientnet_b2/ixrt/README.md @@ -27,19 +27,24 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -python3 export.py --weight efficientnet_b2_rwightman-c35c1473.pth --output efficientnet_b2.onnx +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name efficientnet_b2 --weight efficientnet_b2_rwightman-c35c1473.pth --output checkpoints/efficientnet_b2.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/EFFICIENTNET_B2_CONFIG ``` ### FP16 diff --git a/models/cv/classification/efficientnet_b2/ixrt/build_engine.py b/models/cv/classification/efficientnet_b2/ixrt/build_engine.py deleted file mode 100644 index 038c15d5..00000000 --- a/models/cv/classification/efficientnet_b2/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b2/ixrt/calibration_dataset.py b/models/cv/classification/efficientnet_b2/ixrt/calibration_dataset.py deleted file mode 100644 index d7525d51..00000000 --- a/models/cv/classification/efficientnet_b2/ixrt/calibration_dataset.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b2/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b2/ixrt/ci/prepare.sh index 9ec3aff1..a5f6c0c7 100644 --- a/models/cv/classification/efficientnet_b2/ixrt/ci/prepare.sh +++ b/models/cv/classification/efficientnet_b2/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt - -python3 export.py --weight /root/data/checkpoints/efficientnet_b2_rwightman-c35c1473.pth --output efficientnet_b2.onnx \ No newline at end of file +pip install -r ../../ixrt_common/requirements.txt +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name efficientnet_b2 --weight /root/data/checkpoints/efficientnet_b2_rwightman-c35c1473.pth --output checkpoints/efficientnet_b2.onnx \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b2/ixrt/common.py b/models/cv/classification/efficientnet_b2/ixrt/common.py deleted file mode 100644 index 69bc5bd6..00000000 --- a/models/cv/classification/efficientnet_b2/ixrt/common.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes" : size - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/efficientnet_b2/ixrt/export.py b/models/cv/classification/efficientnet_b2/ixrt/export.py deleted file mode 100644 index 2056b473..00000000 --- a/models/cv/classification/efficientnet_b2/ixrt/export.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.efficientnet_b2() - model.load_state_dict(torch.load(args.weight)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = None, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/efficientnet_b2/ixrt/inference.py b/models/cv/classification/efficientnet_b2/ixrt/inference.py deleted file mode 100644 index e5a03525..00000000 --- a/models/cv/classification/efficientnet_b2/ixrt/inference.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/efficientnet_b2/ixrt/modify_batchsize.py b/models/cv/classification/efficientnet_b2/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/efficientnet_b2/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/efficientnet_b2/ixrt/requirements.txt b/models/cv/classification/efficientnet_b2/ixrt/requirements.txt deleted file mode 100644 index e1eda59c..00000000 --- a/models/cv/classification/efficientnet_b2/ixrt/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -tqdm -onnx -onnxsim -tabulate \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b2_fp16_accuracy.sh b/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b2_fp16_accuracy.sh index 48707017..ba64b248 100644 --- a/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b2_fp16_accuracy.sh +++ b/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b2_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/EFFICIENTNET_B2_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b2_fp16_performance.sh b/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b2_fp16_performance.sh index cd0ece77..01765155 100644 --- a/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b2_fp16_performance.sh +++ b/models/cv/classification/efficientnet_b2/ixrt/scripts/infer_efficientnet_b2_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/EFFICIENTNET_B2_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/efficientnet_b2/ixrt/simplify_model.py b/models/cv/classification/efficientnet_b2/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/efficientnet_b2/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/efficientnet_b3/ixrt/README.md b/models/cv/classification/efficientnet_b3/ixrt/README.md index 4e007b81..d885c156 100644 --- a/models/cv/classification/efficientnet_b3/ixrt/README.md +++ b/models/cv/classification/efficientnet_b3/ixrt/README.md @@ -27,19 +27,24 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -python3 export.py --weight efficientnet_b3_rwightman-b3899882.pth --output efficientnet_b3.onnx +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name efficientnet_b3 --weight efficientnet_b3_rwightman-b3899882.pth --output checkpoints/efficientnet_b3.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/EFFICIENTNET_B3_CONFIG ``` ### FP16 diff --git a/models/cv/classification/efficientnet_b3/ixrt/build_engine.py b/models/cv/classification/efficientnet_b3/ixrt/build_engine.py deleted file mode 100644 index 88a8e8da..00000000 --- a/models/cv/classification/efficientnet_b3/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.ERROR) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b3/ixrt/calibration_dataset.py b/models/cv/classification/efficientnet_b3/ixrt/calibration_dataset.py deleted file mode 100644 index d7525d51..00000000 --- a/models/cv/classification/efficientnet_b3/ixrt/calibration_dataset.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b3/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_b3/ixrt/ci/prepare.sh new file mode 100644 index 00000000..ff2b3636 --- /dev/null +++ b/models/cv/classification/efficientnet_b3/ixrt/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip install -r ../../ixrt_common/requirements.txt +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name efficientnet_b3 --weight efficientnet_b3_rwightman-b3899882.pth --output checkpoints/efficientnet_b3.onnx \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b3/ixrt/common.py b/models/cv/classification/efficientnet_b3/ixrt/common.py deleted file mode 100644 index 69bc5bd6..00000000 --- a/models/cv/classification/efficientnet_b3/ixrt/common.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes" : size - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/efficientnet_b3/ixrt/export.py b/models/cv/classification/efficientnet_b3/ixrt/export.py deleted file mode 100644 index bfd120cf..00000000 --- a/models/cv/classification/efficientnet_b3/ixrt/export.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.efficientnet_b3() - model.load_state_dict(torch.load(args.weight)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = None, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/efficientnet_b3/ixrt/inference.py b/models/cv/classification/efficientnet_b3/ixrt/inference.py deleted file mode 100644 index 4afba6bc..00000000 --- a/models/cv/classification/efficientnet_b3/ixrt/inference.py +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/efficientnet_b3/ixrt/modify_batchsize.py b/models/cv/classification/efficientnet_b3/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/efficientnet_b3/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/efficientnet_b3/ixrt/requirements.txt b/models/cv/classification/efficientnet_b3/ixrt/requirements.txt deleted file mode 100644 index e1eda59c..00000000 --- a/models/cv/classification/efficientnet_b3/ixrt/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -tqdm -onnx -onnxsim -tabulate \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b3/ixrt/scripts/infer_efficientnet_b3_fp16_accuracy.sh b/models/cv/classification/efficientnet_b3/ixrt/scripts/infer_efficientnet_b3_fp16_accuracy.sh index ddfe9c6e..ba64b248 100644 --- a/models/cv/classification/efficientnet_b3/ixrt/scripts/infer_efficientnet_b3_fp16_accuracy.sh +++ b/models/cv/classification/efficientnet_b3/ixrt/scripts/infer_efficientnet_b3_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/EFFICIENTNET_B3_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/efficientnet_b3/ixrt/scripts/infer_efficientnet_b3_fp16_performance.sh b/models/cv/classification/efficientnet_b3/ixrt/scripts/infer_efficientnet_b3_fp16_performance.sh index 0817b737..01765155 100644 --- a/models/cv/classification/efficientnet_b3/ixrt/scripts/infer_efficientnet_b3_fp16_performance.sh +++ b/models/cv/classification/efficientnet_b3/ixrt/scripts/infer_efficientnet_b3_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/EFFICIENTNET_B3_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/efficientnet_b3/ixrt/simplify_model.py b/models/cv/classification/efficientnet_b3/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/efficientnet_b3/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md index 1e131e4d..f92fce16 100755 --- a/models/cv/classification/efficientnet_v2/ixrt/README.md +++ b/models/cv/classification/efficientnet_v2/ixrt/README.md @@ -16,7 +16,7 @@ incorporates a series of enhancement strategies to further boost performance. ### Prepare Resources -Pretrained model: +Pretrained model: Dataset: to download the validation dataset. @@ -29,14 +29,15 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt +pip3 install timm==1.0.11 ``` ### Model Conversion ```bash mkdir checkpoints -git clone -b v1.0.11 https://github.com/huggingface/pytorch-image-models.git +git clone -b v1.0.11 --depth=1 https://github.com/huggingface/pytorch-image-models.git cp ./export_onnx.py pytorch-image-models/timm/models cp ./_builder.py pytorch-image-models/timm/models cd pytorch-image-models/timm @@ -49,12 +50,11 @@ cd ../../ ## Model Inference ```bash -export PROJ_DIR=/Path/to/efficientnet_v2/ixrt +export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=/Path/to/efficientnet_v2/ixrt -export CONFIG_DIR=/Path/to/config/EFFICIENTNET_V2_CONFIG -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/EFFICIENTNET_V2_CONFIG ``` ### FP16 diff --git a/models/cv/classification/efficientnet_v2/ixrt/_builder.py b/models/cv/classification/efficientnet_v2/ixrt/_builder.py old mode 100755 new mode 100644 diff --git a/models/cv/classification/efficientnet_v2/ixrt/build_engine.py b/models/cv/classification/efficientnet_v2/ixrt/build_engine.py deleted file mode 100755 index 41e6af8d..00000000 --- a/models/cv/classification/efficientnet_v2/ixrt/build_engine.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt -from calibration_dataset import getdataloader -import cuda.cudart as cudart - -def assertSuccess(err): - assert(err == cudart.cudaError_t.cudaSuccess) - -class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): - - def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): - super().__init__() - self.cache_file = cache_file - self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) - self.batch_generator = iter(self.image_batcher) - size = img_sz*img_sz*3*bsz - __import__('pdb').set_trace() - err, self.batch_allocation = cudart.cudaMalloc(size) - assertSuccess(err) - - def __del__(self): - err,= cudart.cudaFree(self.batch_allocation) - assertSuccess(err) - - def get_batch_size(self): - return self.image_batcher.batch_size - - def get_batch(self, names): - try: - batch, _ = next(self.batch_generator) - batch = batch.numpy() - __import__('pdb').set_trace() - cudart.cudaMemcpy(self.batch_allocation, - np.ascontiguousarray(batch), - batch.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) - return [int(self.batch_allocation)] - except StopIteration: - return None - - def read_calibration_cache(self): - if os.path.exists(self.cache_file): - with open(self.cache_file, "rb") as f: - return f.read() - - def write_calibration_cache(self, cache): - with open(self.cache_file, "wb") as f: - f.write(cache) - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - print("precision : ", precision) - build_config.set_flag(precision) - if config.precision == "int8": - build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/build_i8_engine.py b/models/cv/classification/efficientnet_v2/ixrt/build_i8_engine.py deleted file mode 100644 index 6e356260..00000000 --- a/models/cv/classification/efficientnet_v2/ixrt/build_i8_engine.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os - -import tensorrt -import tensorrt as trt - -TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) - -EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - - -def GiB(val): - return val * 1 << 30 - - -def json_load(filename): - with open(filename) as json_file: - data = json.load(json_file) - return data - - -def setDynamicRange(network, json_file): - """Sets ranges for network layers.""" - quant_param_json = json_load(json_file) - act_quant = quant_param_json["act_quant_info"] - - for i in range(network.num_inputs): - input_tensor = network.get_input(i) - if act_quant.__contains__(input_tensor.name): - print(input_tensor.name) - value = act_quant[input_tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - input_tensor.dynamic_range = (tensor_min, tensor_max) - - for i in range(network.num_layers): - layer = network.get_layer(i) - - for output_index in range(layer.num_outputs): - tensor = layer.get_output(output_index) - - if act_quant.__contains__(tensor.name): - value = act_quant[tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - tensor.dynamic_range = (tensor_min, tensor_max) - else: - print("\033[1;32m%s\033[0m" % tensor.name) - - -def build_engine(onnx_file, json_file, engine_file): - builder = trt.Builder(TRT_LOGGER) - network = builder.create_network(EXPLICIT_BATCH) - - config = builder.create_builder_config() - - # If it is a dynamic onnx model , you need to add the following. - # profile = builder.create_optimization_profile() - # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) - # config.add_optimization_profile(profile) - - parser = trt.OnnxParser(network, TRT_LOGGER) - # config.max_workspace_size = GiB(1) - if not os.path.exists(onnx_file): - quit("ONNX file {} not found".format(onnx_file)) - - with open(onnx_file, "rb") as model: - if not parser.parse(model.read()): - print("ERROR: Failed to parse the ONNX file.") - for error in range(parser.num_errors): - print(parser.get_error(error)) - return None - - config.set_flag(trt.BuilderFlag.INT8) - - setDynamicRange(network, json_file) - - engine = builder.build_engine(network, config) - - with open(engine_file, "wb") as f: - f.write(engine.serialize()) - - -if __name__ == "__main__": - # Add plugins if needed - # import ctypes - # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") - parser = argparse.ArgumentParser( - description="Writing qparams to onnx to convert tensorrt engine." - ) - parser.add_argument("--onnx", type=str, default=None) - parser.add_argument("--qparam_json", type=str, default=None) - parser.add_argument("--engine", type=str, default=None) - arg = parser.parse_args() - - build_engine(arg.onnx, arg.qparam_json, arg.engine) - print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/calibration_dataset.py b/models/cv/classification/efficientnet_v2/ixrt/calibration_dataset.py deleted file mode 100644 index d7525d51..00000000 --- a/models/cv/classification/efficientnet_v2/ixrt/calibration_dataset.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_v2/ixrt/ci/prepare.sh index 89b3d7e0..d4d74132 100644 --- a/models/cv/classification/efficientnet_v2/ixrt/ci/prepare.sh +++ b/models/cv/classification/efficientnet_v2/ixrt/ci/prepare.sh @@ -25,13 +25,14 @@ else echo "Not Support Os" fi -pip install -r requirements.txt -mkdir -p checkpoints -unzip /root/data/repos/pytorch-image-models-a852318b636a8.zip -d ./ +pip install -r ../../ixrt_common/requirements.txt +pip3 install timm==1.0.11 +mkdir checkpoints +git clone -b v1.0.11 --depth=1 https://github.com/huggingface/pytorch-image-models.git cp ./export_onnx.py pytorch-image-models/timm/models -rm pytorch-image-models/timm/models/_builder.py -mv ./_builder.py pytorch-image-models/timm/models +cp ./_builder.py pytorch-image-models/timm/models +cd pytorch-image-models/timm mkdir -p /root/.cache/torch/hub/checkpoints/ -ln -s /root/data/checkpoints/efficientnet_v2.pth /root/.cache/torch/hub/checkpoints/efficientnetv2_t_agc-3620981a.pth -cd pytorch-image-models/timm/ && python3 -m models.export_onnx --output_model ../../checkpoints/efficientnet_v2.onnx -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python \ No newline at end of file +ln -s /root/data/checkpoints/efficientnetv2_t_agc-3620981a.pth /root/.cache/torch/hub/checkpoints/ +python3 -m models.export_onnx --output_model ../../checkpoints/efficientnet_v2.onnx +cd ../../ \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/common.py b/models/cv/classification/efficientnet_v2/ixrt/common.py deleted file mode 100644 index abdc147c..00000000 --- a/models/cv/classification/efficientnet_v2/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py b/models/cv/classification/efficientnet_v2/ixrt/export_onnx.py old mode 100755 new mode 100644 diff --git a/models/cv/classification/efficientnet_v2/ixrt/inference.py b/models/cv/classification/efficientnet_v2/ixrt/inference.py deleted file mode 100644 index fcca27a3..00000000 --- a/models/cv/classification/efficientnet_v2/ixrt/inference.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/efficientnet_v2/ixrt/modify_batchsize.py b/models/cv/classification/efficientnet_v2/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/efficientnet_v2/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/efficientnet_v2/ixrt/quant.py b/models/cv/classification/efficientnet_v2/ixrt/quant.py deleted file mode 100644 index 6c06eba2..00000000 --- a/models/cv/classification/efficientnet_v2/ixrt/quant.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: - -在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 - -This file will show you how to quantize your network with PPQ - You should prepare your model and calibration dataset as follow: - - ~/working/model.onnx <-- your model - ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset - -if you are using caffe model: - ~/working/model.caffemdoel <-- your model - ~/working/model.prototext <-- your model - -### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### - -quantized model will be generated at: ~/working/quantized.onnx -""" -from ppq import * -from ppq.api import * -import os -from calibration_dataset import getdataloader -import argparse -import random -import numpy as np -import torch - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], - default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=288) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - - -config = parse_args() - -# modify configuration below: -WORKING_DIRECTORY = 'checkpoints' # choose your working directory -TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform -MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE -INPUT_LAYOUT = 'chw' # input data layout, chw or hwc -NETWORK_INPUTSHAPE = [32, 3, 288, 288] # input shape of your network -EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. -REQUIRE_ANALYSE = False -TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 -# ------------------------------------------------------------------- -# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 -# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx -# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 -# ------------------------------------------------------------------- -graph = None -if MODEL_TYPE == NetworkFramework.ONNX: - graph = load_onnx_graph(onnx_import_file=config.model) -if MODEL_TYPE == NetworkFramework.CAFFE: - graph = load_caffe_graph( - caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), - prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) -assert graph is not None, 'Graph Loading Error, Check your input again.' - -# ------------------------------------------------------------------- -# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 -# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 -# ------------------------------------------------------------------- -QS = QuantizationSettingFactory.default_setting() - -# ------------------------------------------------------------------- -# 下面向你展示了如何使用 finetuning 过程提升量化精度 -# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 -# 开启他们的方式都是 QS.xxxx = True -# 按需使用,不要全部打开,容易起飞 -# ------------------------------------------------------------------- -if TRAINING_YOUR_NETWORK: - QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 - QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 - QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' - - - -dataloader = getdataloader(config.dataset_dir, config.step, config.bsz, img_sz=config.imgsz) -# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 -# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 -with ENABLE_CUDA_KERNEL(): - print('网络正量化中,根据你的量化配置,这将需要一段时间:') - quantized = quantize_native_model( - setting=QS, # setting 对象用来控制标准量化逻辑 - model=graph, - calib_dataloader=dataloader, - calib_steps=config.step, - input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 - inputs=None, - # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] - collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, - # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None - platform=TARGET_PLATFORM, - device=EXECUTING_DEVICE, - do_quantize=True) - - # ------------------------------------------------------------------- - # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor - # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 - # 请注意,必须在 export 之前执行此操作。 - # ------------------------------------------------------------------- - executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) - # output = executor.forward(input) - - # ------------------------------------------------------------------- - # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 - # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% - # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 - # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 - # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 - # ------------------------------------------------------------------- - print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') - reports = graphwise_error_analyse( - graph=quantized, running_device=EXECUTING_DEVICE, steps=32, - dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) - for op, snr in reports.items(): - if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') - - if REQUIRE_ANALYSE: - print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') - layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, - interested_outputs=None, - dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) - - # ------------------------------------------------------------------- - # 使用 export_ppq_graph 函数来导出量化后的模型 - # PPQ 会根据你所选择的导出平台来修改模型格式 - # ------------------------------------------------------------------- - print('网络量化结束,正在生成目标文件:') - export_ppq_graph( - graph=quantized, platform=TARGET_PLATFORM, - graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), - config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/refine_model.py b/models/cv/classification/efficientnet_v2/ixrt/refine_model.py deleted file mode 100644 index 000ee4dc..00000000 --- a/models/cv/classification/efficientnet_v2/ixrt/refine_model.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/efficientnet_v2/ixrt/requirements.txt b/models/cv/classification/efficientnet_v2/ixrt/requirements.txt deleted file mode 100644 index e0bd6846..00000000 --- a/models/cv/classification/efficientnet_v2/ixrt/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -timm==1.0.11 -ppq -pycuda -protobuf==3.20.0 \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_v2_int8_accuracy.sh b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_v2_int8_accuracy.sh index a58f44b3..edaf9f30 100644 --- a/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_v2_int8_accuracy.sh +++ b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_v2_int8_accuracy.sh @@ -45,7 +45,6 @@ do done source ${CONFIG_DIR} -echo ${QUANT_OBSERVER} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} @@ -63,15 +62,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -101,36 +101,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ diff --git a/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_v2_int8_performance.sh b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_v2_int8_performance.sh index 07872405..c526b81a 100644 --- a/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_v2_int8_performance.sh +++ b/models/cv/classification/efficientnet_v2/ixrt/scripts/infer_efficientnet_v2_int8_performance.sh @@ -44,7 +44,6 @@ do done source ${CONFIG_DIR} -echo ${QUANT_OBSERVER} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} @@ -62,15 +61,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -100,36 +100,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -139,7 +139,7 @@ python3 ${RUN_DIR}/inference.py \ --warm_up=${WARM_UP} \ --loop_count ${LOOP_COUNT} \ --test_mode ${RUN_MODE} \ - --acc_target ${TGT} \ + --fps_target ${TGT} \ --bsz ${BSZ}; check_status exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2/ixrt/simplify_model.py b/models/cv/classification/efficientnet_v2/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/efficientnet_v2/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/README.md b/models/cv/classification/efficientnet_v2_s/ixrt/README.md index a31e481d..6932d3a1 100644 --- a/models/cv/classification/efficientnet_v2_s/ixrt/README.md +++ b/models/cv/classification/efficientnet_v2_s/ixrt/README.md @@ -21,19 +21,24 @@ Dataset: to download the validation dat ### Install Dependencies ```bash -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -python3 export.py --weight efficientnet_v2_s-dd5fe13b.pth --output efficientnet_v2_s.onnx +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name efficientnet_v2_s --weight efficientnet_v2_s-dd5fe13b.pth --output checkpoints/efficientnet_v2_s.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/EFFICIENTNET_V2_S_CONFIG ``` ### FP16 diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/build_engine.py b/models/cv/classification/efficientnet_v2_s/ixrt/build_engine.py deleted file mode 100644 index 038c15d5..00000000 --- a/models/cv/classification/efficientnet_v2_s/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/calibration_dataset.py b/models/cv/classification/efficientnet_v2_s/ixrt/calibration_dataset.py deleted file mode 100644 index d7525d51..00000000 --- a/models/cv/classification/efficientnet_v2_s/ixrt/calibration_dataset.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/ci/prepare.sh b/models/cv/classification/efficientnet_v2_s/ixrt/ci/prepare.sh new file mode 100644 index 00000000..05e30e1e --- /dev/null +++ b/models/cv/classification/efficientnet_v2_s/ixrt/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip install -r ../../ixrt_common/requirements.txt +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name efficientnet_v2_s --weight efficientnet_v2_s-dd5fe13b.pth --output checkpoints/efficientnet_v2_s.onnx \ No newline at end of file diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/common.py b/models/cv/classification/efficientnet_v2_s/ixrt/common.py deleted file mode 100644 index 2279dc0c..00000000 --- a/models/cv/classification/efficientnet_v2_s/ixrt/common.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes": size, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/export.py b/models/cv/classification/efficientnet_v2_s/ixrt/export.py deleted file mode 100644 index 1748e69d..00000000 --- a/models/cv/classification/efficientnet_v2_s/ixrt/export.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.efficientnet_v2_s() - model.load_state_dict(torch.load(args.weight)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(32, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = None, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/inference.py b/models/cv/classification/efficientnet_v2_s/ixrt/inference.py deleted file mode 100644 index 4afba6bc..00000000 --- a/models/cv/classification/efficientnet_v2_s/ixrt/inference.py +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/modify_batchsize.py b/models/cv/classification/efficientnet_v2_s/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/efficientnet_v2_s/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/requirements.txt b/models/cv/classification/efficientnet_v2_s/ixrt/requirements.txt deleted file mode 100644 index 9e811126..00000000 --- a/models/cv/classification/efficientnet_v2_s/ixrt/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -onnx -tqdm diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/scripts/infer_efficientnet_v2_s_fp16_accuracy.sh b/models/cv/classification/efficientnet_v2_s/ixrt/scripts/infer_efficientnet_v2_s_fp16_accuracy.sh index def36659..ba64b248 100644 --- a/models/cv/classification/efficientnet_v2_s/ixrt/scripts/infer_efficientnet_v2_s_fp16_accuracy.sh +++ b/models/cv/classification/efficientnet_v2_s/ixrt/scripts/infer_efficientnet_v2_s_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/EFFICIENTNETV2_S_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/scripts/infer_efficientnet_v2_s_performance.sh b/models/cv/classification/efficientnet_v2_s/ixrt/scripts/infer_efficientnet_v2_s_fp16_performance.sh similarity index 94% rename from models/cv/classification/efficientnet_v2_s/ixrt/scripts/infer_efficientnet_v2_s_performance.sh rename to models/cv/classification/efficientnet_v2_s/ixrt/scripts/infer_efficientnet_v2_s_fp16_performance.sh index 2d4ab425..dc2dde87 100644 --- a/models/cv/classification/efficientnet_v2_s/ixrt/scripts/infer_efficientnet_v2_s_performance.sh +++ b/models/cv/classification/efficientnet_v2_s/ixrt/scripts/infer_efficientnet_v2_s_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/EFFICIENTNETV2_S_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/simplify_model.py b/models/cv/classification/efficientnet_v2_s/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/efficientnet_v2_s/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/README.md b/models/cv/classification/efficientnetv2_rw_t/ixrt/README.md index 9448e132..b65145f4 100644 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/README.md +++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/README.md @@ -27,19 +27,25 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt +pip install timm ``` ### Model Conversion ```bash -python3 export.py --weight efficientnetv2_t_agc-3620981a.pth --output efficientnetv2_rw_t.onnx +mkdir checkpoints +python3 export.py --weight efficientnetv2_t_agc-3620981a.pth --output checkpoints/efficientnetv2_rw_t.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/EFFICIENTNETV2_RW_T_CONFIG ``` ### FP16 diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/build_engine.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/build_engine.py deleted file mode 100644 index 038c15d5..00000000 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/calibration_dataset.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/calibration_dataset.py deleted file mode 100644 index ec931c65..00000000 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=True, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/ci/prepare.sh b/models/cv/classification/efficientnetv2_rw_t/ixrt/ci/prepare.sh new file mode 100644 index 00000000..1c968041 --- /dev/null +++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/ci/prepare.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip install -r ../../ixrt_common/requirements.txt +pip install timm +mkdir checkpoints +python3 export.py --weight efficientnetv2_t_agc-3620981a.pth --output checkpoints/efficientnetv2_rw_t.onnx \ No newline at end of file diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/common.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/common.py deleted file mode 100644 index fd6a84d8..00000000 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/common.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes": size, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/inference.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/inference.py deleted file mode 100644 index e04f1c3a..00000000 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/inference.py +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from timm.data import create_dataset, create_loader -from common import eval_batch, create_engine_context, get_io_bindings - -def get_dataloader(data_path, batch_size, num_workers): - datasets = create_dataset(root=data_path, name="") - - dataloader = create_loader( - datasets, - input_size=(3, 288, 288), - batch_size=batch_size, - interpolation='bicubic', - mean=(0.485, 0.456, 0.406), - std=(0.229, 0.224, 0.225), - crop_pct=1.0, - use_prefetcher = False, - num_workers = num_workers - ) - return dataloader - -def main(config): - dataloader = get_dataloader(config.datasets_dir, config.bsz, 16) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/modify_batchsize.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/requirements.txt b/models/cv/classification/efficientnetv2_rw_t/ixrt/requirements.txt deleted file mode 100644 index 72371658..00000000 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -tqdm -timm -onnx -onnxsim -tabulate -ppq -tqdm -cuda-python \ No newline at end of file diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh b/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh index 86ecfac0..b743d708 100644 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh +++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/EFFICIENTNETV2_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh b/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh index 597a7a64..e7a4f1a7 100644 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh +++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/scripts/infer_efficientnetv2_rw_t_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/EFFICIENTNETV2_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/simplify_model.py b/models/cv/classification/efficientnetv2_rw_t/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/googlenet/ixrt/README.md b/models/cv/classification/googlenet/ixrt/README.md index 2c5865f8..1688676e 100644 --- a/models/cv/classification/googlenet/ixrt/README.md +++ b/models/cv/classification/googlenet/ixrt/README.md @@ -27,14 +27,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export_onnx.py --origin_model /path/to/googlenet-1378be20.pth --output_model checkpoints/googlenet.onnx +python3 ../../ixrt_common/export.py --model-name googlenet --weight googlenet-1378be20.pth --output checkpoints/googlenet.onnx ``` ## Model Inference @@ -43,8 +43,8 @@ python3 export_onnx.py --origin_model /path/to/googlenet-1378be20.pth --output_m export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/GOOGLENET_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/GOOGLENET_CONFIG ``` ### FP16 diff --git a/models/cv/classification/googlenet/ixrt/build_engine.py b/models/cv/classification/googlenet/ixrt/build_engine.py deleted file mode 100644 index 126da5e6..00000000 --- a/models/cv/classification/googlenet/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/googlenet/ixrt/calibration_dataset.py b/models/cv/classification/googlenet/ixrt/calibration_dataset.py deleted file mode 100644 index 442a5602..00000000 --- a/models/cv/classification/googlenet/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/googlenet/ixrt/ci/prepare.sh b/models/cv/classification/googlenet/ixrt/ci/prepare.sh index 8a8c7769..7108399f 100644 --- a/models/cv/classification/googlenet/ixrt/ci/prepare.sh +++ b/models/cv/classification/googlenet/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export_onnx.py --origin_model /root/data/checkpoints/googlenet.pth --output_model checkpoints/googlenet.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name googlenet --weight googlenet-1378be20.pth --output checkpoints/googlenet.onnx \ No newline at end of file diff --git a/models/cv/classification/googlenet/ixrt/common.py b/models/cv/classification/googlenet/ixrt/common.py deleted file mode 100644 index 0458195e..00000000 --- a/models/cv/classification/googlenet/ixrt/common.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/googlenet/ixrt/export_onnx.py b/models/cv/classification/googlenet/ixrt/export_onnx.py deleted file mode 100644 index 5a7cc5b1..00000000 --- a/models/cv/classification/googlenet/ixrt/export_onnx.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -import torchvision.models as models -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = models.googlenet() -model.load_state_dict(torch.load(args.origin_model)) -model.cuda() -model.eval() -input = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() \ No newline at end of file diff --git a/models/cv/classification/googlenet/ixrt/inference.py b/models/cv/classification/googlenet/ixrt/inference.py deleted file mode 100644 index 50aafd4f..00000000 --- a/models/cv/classification/googlenet/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/googlenet/ixrt/modify_batchsize.py b/models/cv/classification/googlenet/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/googlenet/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/googlenet/ixrt/quant.py b/models/cv/classification/googlenet/ixrt/quant.py deleted file mode 100644 index 7c7860c9..00000000 --- a/models/cv/classification/googlenet/ixrt/quant.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/googlenet/ixrt/refine_utils/__init__.py b/models/cv/classification/googlenet/ixrt/refine_utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/models/cv/classification/googlenet/ixrt/refine_utils/common.py b/models/cv/classification/googlenet/ixrt/refine_utils/common.py deleted file mode 100644 index 2af19a14..00000000 --- a/models/cv/classification/googlenet/ixrt/refine_utils/common.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from typing import Union, Callable, List - -from tensorrt.deploy.api import * -from tensorrt.deploy.backend.onnx.converter import default_converter -from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type -from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr -from tensorrt.deploy.ir.operator_type import OperatorType as OP -from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name -from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence -from tensorrt.deploy.ir import Graph -from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator -from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator - -def find_sequence_subgraph(graph, - pattern: Union[List[str], PatternGraph], - callback: Callable[[Graph, PatternGraph], None], - strict=True): - if isinstance(pattern, List): - pattern = build_sequence_graph(pattern) - - matcher = GraphMatcher(pattern, strict=strict) - return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/classification/googlenet/ixrt/refine_utils/linear_pass.py b/models/cv/classification/googlenet/ixrt/refine_utils/linear_pass.py deleted file mode 100644 index 29b5e4a9..00000000 --- a/models/cv/classification/googlenet/ixrt/refine_utils/linear_pass.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import dataclasses - -from refine_utils.common import * - -# AXB=C, Only for B is initializer - -class FusedLinearPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True - ) - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True - ) - return graph - - def to_linear_with_bias(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - add = pattern.nodes[1] - if len(add.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - bias_var = None - for input in add.operator.inputs: - if input not in matmul.operator.outputs: - bias_var = input - - inputs = matmul.operator.inputs - inputs.append(bias_var) - outputs = add.operator.outputs - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 1, - "act_type":"none" - } - - self.transform.make_operator( - "LinearFP16", - inputs=inputs, - outputs=outputs, - **attributes - ) - - self.transform.delete_operator(add.operator) - self.transform.delete_operator(matmul.operator) - - def to_linear(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - if len(matmul.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 0, - "act_type": "none" - } - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - op = self.transform.make_operator( - op_type = "LinearFP16", - inputs = pattern.nodes[0].operator.inputs, - outputs=[pattern.nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(op) - - self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/classification/googlenet/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/googlenet/ixrt/refine_utils/matmul_to_gemm_pass.py deleted file mode 100644 index 4ebfac4d..00000000 --- a/models/cv/classification/googlenet/ixrt/refine_utils/matmul_to_gemm_pass.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from refine_utils.common import * - -# -# Common pattern Matmul to Gemm -# -class FusedGemmPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True - ) - return graph - - def to_gemm(self, graph, pattern: PatternGraph): - matmul_op = pattern.nodes[0] - inputs = matmul_op.operator.inputs - outputs = matmul_op.operator.outputs - - if len(inputs)!=2 and len(outputs)!=1: - return - - for input in inputs: - if self.transform.is_leaf_variable(input): - return - - print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") - self.transform.delete_operator(matmul_op.operator) - - op = self.transform.make_operator( - op_type = "Gemm", - inputs = inputs, - outputs = outputs, - alpha = 1, - beta = 1, - transB = 1 - ) - - self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/classification/googlenet/ixrt/requirements.txt b/models/cv/classification/googlenet/ixrt/requirements.txt deleted file mode 100644 index a3ef1a19..00000000 --- a/models/cv/classification/googlenet/ixrt/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -pycuda -tqdm -onnx -onnxsim -tabulate \ No newline at end of file diff --git a/models/cv/classification/googlenet/ixrt/simplify_model.py b/models/cv/classification/googlenet/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/googlenet/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/hrnet_w18/ixrt/README.md b/models/cv/classification/hrnet_w18/ixrt/README.md index d924c6da..f03a9e47 100644 --- a/models/cv/classification/hrnet_w18/ixrt/README.md +++ b/models/cv/classification/hrnet_w18/ixrt/README.md @@ -14,6 +14,8 @@ HRNet-W18 is a powerful image classification model developed by Jingdong AI Rese ### Prepare Resources +Pretrained model: + Dataset: to download the validation dataset. ### Install Dependencies @@ -25,23 +27,25 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt +pip3 install mmpretrain ``` ### Model Conversion ```bash mkdir checkpoints -python3 export_onnx.py --output_model checkpoints/hrnet-w18.onnx +python3 export_onnx.py --output_model checkpoints/hrnet_w18.onnx ``` ## Model Inference ```bash +export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/HRNET_W18_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/HRNET_W18_CONFIG ``` ### FP16 diff --git a/models/cv/classification/hrnet_w18/ixrt/build_engine.py b/models/cv/classification/hrnet_w18/ixrt/build_engine.py deleted file mode 100644 index 01e126bc..00000000 --- a/models/cv/classification/hrnet_w18/ixrt/build_engine.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt -from calibration_dataset import getdataloader -import cuda.cudart as cudart - -def assertSuccess(err): - assert(err == cudart.cudaError_t.cudaSuccess) - -class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): - - def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): - super().__init__() - self.cache_file = cache_file - self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) - self.batch_generator = iter(self.image_batcher) - size = img_sz*img_sz*3*bsz - __import__('pdb').set_trace() - err, self.batch_allocation = cudart.cudaMalloc(size) - assertSuccess(err) - - def __del__(self): - err,= cudart.cudaFree(self.batch_allocation) - assertSuccess(err) - - def get_batch_size(self): - return self.image_batcher.batch_size - - def get_batch(self, names): - try: - batch, _ = next(self.batch_generator) - batch = batch.numpy() - __import__('pdb').set_trace() - cudart.cudaMemcpy(self.batch_allocation, - np.ascontiguousarray(batch), - batch.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) - return [int(self.batch_allocation)] - except StopIteration: - return None - - def read_calibration_cache(self): - if os.path.exists(self.cache_file): - with open(self.cache_file, "rb") as f: - return f.read() - - def write_calibration_cache(self, cache): - with open(self.cache_file, "wb") as f: - f.write(cache) - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - print("precision : ", precision) - build_config.set_flag(precision) - if config.precision == "int8": - build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - args = parser.parse_args() - return args - -if __name__ == "__main__": - # cali = EngineCalibrator("tmp", "/home/qiang.zhang/data/imagenet_val/") - # print(cali.get_batch_size()) - # print(cali.get_batch("hello")) - args = parse_args() - main(args) diff --git a/models/cv/classification/hrnet_w18/ixrt/build_i8_engine.py b/models/cv/classification/hrnet_w18/ixrt/build_i8_engine.py deleted file mode 100644 index 04477118..00000000 --- a/models/cv/classification/hrnet_w18/ixrt/build_i8_engine.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/hrnet_w18/ixrt/calibration_dataset.py b/models/cv/classification/hrnet_w18/ixrt/calibration_dataset.py deleted file mode 100644 index f36c8459..00000000 --- a/models/cv/classification/hrnet_w18/ixrt/calibration_dataset.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh b/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh index dbb45d9a..07795f2a 100644 --- a/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh +++ b/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh @@ -25,8 +25,9 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt +pip3 install mmpretrain mkdir checkpoints mkdir -p /root/.cache/torch/hub/checkpoints/ ln -s /root/data/checkpoints/hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth /root/.cache/torch/hub/checkpoints/hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth -python3 export_onnx.py --output_model checkpoints/hrnet-w18.onnx \ No newline at end of file +python3 export_onnx.py --output_model checkpoints/hrnet_w18.onnx \ No newline at end of file diff --git a/models/cv/classification/hrnet_w18/ixrt/common.py b/models/cv/classification/hrnet_w18/ixrt/common.py deleted file mode 100644 index 0458195e..00000000 --- a/models/cv/classification/hrnet_w18/ixrt/common.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/hrnet_w18/ixrt/config/HRNET_W18_CONFIG b/models/cv/classification/hrnet_w18/ixrt/config/HRNET_W18_CONFIG deleted file mode 100644 index d419e535..00000000 --- a/models/cv/classification/hrnet_w18/ixrt/config/HRNET_W18_CONFIG +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# IMGSIZE : 模型输入hw大小 -# MODEL_NAME : 生成onnx/engine的basename -# ORIGINE_MODEL : 原始onnx文件名称 -IMGSIZE=224 -MODEL_NAME=HRNet_W18 -ORIGINE_MODEL=hrnet-w18.onnx - -# QUANT CONFIG (仅PRECISION为int8时生效) - # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] - # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) - # QUANT_STEP : 量化步数 - # QUANT_SEED : 随机种子 保证量化结果可复现 - # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 -QUANT_OBSERVER=minmax -QUANT_BATCHSIZE=32 -QUANT_STEP=32 -QUANT_SEED=42 -DISABLE_QUANT_LIST= -QUANT_EXIST_ONNX= diff --git a/models/cv/classification/hrnet_w18/ixrt/inference.py b/models/cv/classification/hrnet_w18/ixrt/inference.py deleted file mode 100644 index 47a3f640..00000000 --- a/models/cv/classification/hrnet_w18/ixrt/inference.py +++ /dev/null @@ -1,159 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) \ No newline at end of file diff --git a/models/cv/classification/hrnet_w18/ixrt/modify_batchsize.py b/models/cv/classification/hrnet_w18/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/hrnet_w18/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/hrnet_w18/ixrt/quant_qdq.py b/models/cv/classification/hrnet_w18/ixrt/quant_qdq.py deleted file mode 100644 index 8006db24..00000000 --- a/models/cv/classification/hrnet_w18/ixrt/quant_qdq.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/hrnet_w18/ixrt/requirements.txt b/models/cv/classification/hrnet_w18/ixrt/requirements.txt deleted file mode 100644 index 7d0f090a..00000000 --- a/models/cv/classification/hrnet_w18/ixrt/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -ppq -mmpretrain -mmcv-lite -pycuda -transformers==4.37.1 \ No newline at end of file diff --git a/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_fp16_accuracy.sh b/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_fp16_accuracy.sh index b743d708..48292493 100644 --- a/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_fp16_accuracy.sh +++ b/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_fp16_accuracy.sh @@ -51,6 +51,8 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 @@ -69,6 +71,34 @@ else echo " "Generate ${SIM_MODEL} fi +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + # Change Batchsize let step++ echo; @@ -111,4 +141,4 @@ python3 ${RUN_DIR}/inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_fp16_performance.sh b/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_fp16_performance.sh index e7a4f1a7..c843057d 100644 --- a/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_fp16_performance.sh +++ b/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_fp16_performance.sh @@ -51,6 +51,8 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 @@ -69,6 +71,34 @@ else echo " "Generate ${SIM_MODEL} fi +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + # Change Batchsize let step++ echo; @@ -111,4 +141,4 @@ python3 ${RUN_DIR}/inference.py \ --fps_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_int8_accuracy.sh b/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_int8_accuracy.sh index 8e4a82bd..a288e9ca 100644 --- a/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_int8_accuracy.sh +++ b/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_int8_accuracy.sh @@ -29,7 +29,7 @@ WARM_UP=0 LOOP_COUNT=-1 RUN_MODE=ACC PRECISION=int8 -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + # Update arguments index=0 options=$@ @@ -44,7 +44,6 @@ do done source ${CONFIG_DIR} -echo ${QUANT_OBSERVER} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} @@ -62,15 +61,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -84,7 +84,7 @@ if [ $PRECISION == "int8" ];then SIM_MODEL=${QUANT_EXIST_ONNX} echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed else - python3 ${RUN_DIR}/quant_qdq.py \ + python3 ${RUN_DIR}/quant.py \ --model ${SIM_MODEL} \ --model_name ${MODEL_NAME} \ --dataset_dir ${DATASETS_DIR} \ @@ -100,37 +100,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ --precision ${PRECISION} \ --model ${FINAL_MODEL} \ --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} fi - # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -143,4 +142,4 @@ python3 ${RUN_DIR}/inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_int8_performance.sh b/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_int8_performance.sh index 714a8284..e578762e 100644 --- a/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_int8_performance.sh +++ b/models/cv/classification/hrnet_w18/ixrt/scripts/infer_hrnet_w18_int8_performance.sh @@ -28,7 +28,7 @@ WARM_UP=3 LOOP_COUNT=20 RUN_MODE=FPS PRECISION=int8 -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + # Update arguments index=0 options=$@ @@ -43,7 +43,6 @@ do done source ${CONFIG_DIR} -echo ${QUANT_OBSERVER} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} @@ -61,15 +60,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -99,36 +99,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ --precision ${PRECISION} \ --model ${FINAL_MODEL} \ --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -138,7 +138,7 @@ python3 ${RUN_DIR}/inference.py \ --warm_up=${WARM_UP} \ --loop_count ${LOOP_COUNT} \ --test_mode ${RUN_MODE} \ - --acc_target ${TGT} \ + --fps_target ${TGT} \ --bsz ${BSZ}; check_status exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/hrnet_w18/ixrt/simplify_model.py b/models/cv/classification/hrnet_w18/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/hrnet_w18/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/inception_resnet_v2/ixrt/README.md b/models/cv/classification/inception_resnet_v2/ixrt/README.md index 2aa4923f..fcacfd97 100755 --- a/models/cv/classification/inception_resnet_v2/ixrt/README.md +++ b/models/cv/classification/inception_resnet_v2/ixrt/README.md @@ -27,7 +27,8 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt +pip3 install Pillow ``` ### Model Conversion @@ -41,30 +42,29 @@ python3 export_model.py --output_model /Path/to/checkpoints/inceptionresnetv2.on ## Model Inference ```bash -export PROJ_DIR=/Path/to/inceptionresnetv2/ixrt +export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=/Path/to/inceptionresnetv2/ixrt -export CONFIG_DIR=/Path/to/config/INCEPTIONRESNETV2_CONFIG -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/INCEPTION_RESNET_V2_CONFIG ``` ### FP16 ```bash # Accuracy -bash scripts/infer_inceptionresnetv2_fp16_accuracy.sh +bash scripts/infer_inception_resnet_v2_fp16_accuracy.sh # Performance -bash scripts/infer_inceptionresnetv2_fp16_performance.sh +bash scripts/infer_inception_resnet_v2_fp16_performance.sh ``` ### INT8 ```bash # Accuracy -bash scripts/infer_inceptionresnetv2_int8_accuracy.sh +bash scripts/infer_inception_resnet_v2_int8_accuracy.sh # Performance -bash scripts/infer_inceptionresnetv2_int8_performance.sh +bash scripts/infer_inception_resnet_v2_int8_performance.sh ``` ## Model Results diff --git a/models/cv/classification/inception_resnet_v2/ixrt/build_engine.py b/models/cv/classification/inception_resnet_v2/ixrt/build_engine.py deleted file mode 100755 index 41e6af8d..00000000 --- a/models/cv/classification/inception_resnet_v2/ixrt/build_engine.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt -from calibration_dataset import getdataloader -import cuda.cudart as cudart - -def assertSuccess(err): - assert(err == cudart.cudaError_t.cudaSuccess) - -class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): - - def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): - super().__init__() - self.cache_file = cache_file - self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) - self.batch_generator = iter(self.image_batcher) - size = img_sz*img_sz*3*bsz - __import__('pdb').set_trace() - err, self.batch_allocation = cudart.cudaMalloc(size) - assertSuccess(err) - - def __del__(self): - err,= cudart.cudaFree(self.batch_allocation) - assertSuccess(err) - - def get_batch_size(self): - return self.image_batcher.batch_size - - def get_batch(self, names): - try: - batch, _ = next(self.batch_generator) - batch = batch.numpy() - __import__('pdb').set_trace() - cudart.cudaMemcpy(self.batch_allocation, - np.ascontiguousarray(batch), - batch.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) - return [int(self.batch_allocation)] - except StopIteration: - return None - - def read_calibration_cache(self): - if os.path.exists(self.cache_file): - with open(self.cache_file, "rb") as f: - return f.read() - - def write_calibration_cache(self, cache): - with open(self.cache_file, "wb") as f: - f.write(cache) - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - print("precision : ", precision) - build_config.set_flag(precision) - if config.precision == "int8": - build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/inception_resnet_v2/ixrt/build_i8_engine.py b/models/cv/classification/inception_resnet_v2/ixrt/build_i8_engine.py deleted file mode 100644 index 6038b33f..00000000 --- a/models/cv/classification/inception_resnet_v2/ixrt/build_i8_engine.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os - -import tensorrt -import tensorrt as trt - -TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) - -EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - - -def GiB(val): - return val * 1 << 30 - - -def json_load(filename): - with open(filename) as json_file: - data = json.load(json_file) - return data - - -def setDynamicRange(network, json_file): - """Sets ranges for network layers.""" - quant_param_json = json_load(json_file) - act_quant = quant_param_json["act_quant_info"] - - for i in range(network.num_inputs): - input_tensor = network.get_input(i) - if act_quant.__contains__(input_tensor.name): - print(input_tensor.name) - value = act_quant[input_tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - input_tensor.dynamic_range = (tensor_min, tensor_max) - - for i in range(network.num_layers): - layer = network.get_layer(i) - - for output_index in range(layer.num_outputs): - tensor = layer.get_output(output_index) - - if act_quant.__contains__(tensor.name): - value = act_quant[tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - tensor.dynamic_range = (tensor_min, tensor_max) - else: - print("\033[1;32m%s\033[0m" % tensor.name) - - -def build_engine(onnx_file, json_file, engine_file): - builder = trt.Builder(TRT_LOGGER) - network = builder.create_network(EXPLICIT_BATCH) - - config = builder.create_builder_config() - - # If it is a dynamic onnx model , you need to add the following. - # profile = builder.create_optimization_profile() - # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) - # config.add_optimization_profile(profile) - - parser = trt.OnnxParser(network, TRT_LOGGER) - # config.max_workspace_size = GiB(1) - if not os.path.exists(onnx_file): - quit("ONNX file {} not found".format(onnx_file)) - - with open(onnx_file, "rb") as model: - if not parser.parse(model.read()): - print("ERROR: Failed to parse the ONNX file.") - for error in range(parser.num_errors): - print(parser.get_error(error)) - return None - - config.set_flag(trt.BuilderFlag.INT8) - - setDynamicRange(network, json_file) - - engine = builder.build_engine(network, config) - - with open(engine_file, "wb") as f: - f.write(engine.serialize()) - - -if __name__ == "__main__": - # Add plugins if needed - # import ctypes - # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") - parser = argparse.ArgumentParser( - description="Writing qparams to onnx to convert tensorrt engine." - ) - parser.add_argument("--onnx", type=str, default=None) - parser.add_argument("--qparam_json", type=str, default=None) - parser.add_argument("--engine", type=str, default=None) - arg = parser.parse_args() - - build_engine(arg.onnx, arg.qparam_json, arg.engine) - print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/inception_resnet_v2/ixrt/calibration_dataset.py b/models/cv/classification/inception_resnet_v2/ixrt/calibration_dataset.py deleted file mode 100644 index 2037ff78..00000000 --- a/models/cv/classification/inception_resnet_v2/ixrt/calibration_dataset.py +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import math -from PIL import Image - -import numpy as np -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - -class ToSpaceBGR(object): - def __init__(self, is_bgr): - self.is_bgr = is_bgr - - def __call__(self, tensor): - if self.is_bgr: - new_tensor = tensor.clone() - new_tensor[0] = tensor[2] - new_tensor[2] = tensor[0] - tensor = new_tensor - return tensor - -class ToRange255(object): - def __init__(self, is_255): - self.is_255 = is_255 - - def __call__(self, tensor): - if self.is_255: - tensor.mul_(255) - return tensor - -class TransformImage(object): - def __init__(self, imgsize=300, scale=0.875, - preserve_aspect_ratio=True): - self.input_size = [3, imgsize, imgsize] - self.input_space = 'RGB' - self.input_range = [0, 1] - self.mean = [0.5, 0.5, 0.5] - self.std = [0.5, 0.5, 0.5] - - # https://github.com/tensorflow/models/blob/master/research/inception/inception/image_processing.py#L294 - self.scale = scale - - tfs = [] - if preserve_aspect_ratio: - tfs.append(T.Resize(int(math.floor(max(self.input_size)/self.scale)))) - else: - height = int(self.input_size[1] / self.scale) - width = int(self.input_size[2] / self.scale) - tfs.append(T.Resize((height, width))) - - - tfs.append(T.CenterCrop(max(self.input_size))) - tfs.append(T.ToTensor()) - tfs.append(ToSpaceBGR(self.input_space=='BGR')) - tfs.append(ToRange255(max(self.input_range)==255)) - tfs.append(T.Normalize(mean=self.mean, std=self.std)) - - self.tf = T.Compose(tfs) - - def __call__(self, img): - tensor = self.tf(img) - return tensor - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - sample = sample.convert("RGB") - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, transforms, num_samples=1024, img_sz=300, batch_size=64, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=transforms, - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, transforms, step=20, batch_size=64, workers=0, img_sz=299, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - transforms, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/inception_resnet_v2/ixrt/ci/prepare.sh b/models/cv/classification/inception_resnet_v2/ixrt/ci/prepare.sh index 2221e9b3..38c2a28a 100644 --- a/models/cv/classification/inception_resnet_v2/ixrt/ci/prepare.sh +++ b/models/cv/classification/inception_resnet_v2/ixrt/ci/prepare.sh @@ -25,8 +25,9 @@ else echo "Not Support Os" fi -pip install -r requirements.txt -mkdir checkpoints +pip install -r ../../ixrt_common/requirements.txt +pip3 install Pillow +mkdir -p checkpoints mkdir -p /root/.cache/torch/hub/checkpoints/ ln -s /root/data/checkpoints/inceptionresnetv2-520b38e4.pth /root/.cache/torch/hub/checkpoints/inceptionresnetv2-520b38e4.pth python3 export_model.py --output_model ./checkpoints/inceptionresnetv2.onnx \ No newline at end of file diff --git a/models/cv/classification/inception_resnet_v2/ixrt/common.py b/models/cv/classification/inception_resnet_v2/ixrt/common.py deleted file mode 100644 index e941bf92..00000000 --- a/models/cv/classification/inception_resnet_v2/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.from_numpy(batch_score) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations \ No newline at end of file diff --git a/models/cv/classification/inception_resnet_v2/ixrt/inference.py b/models/cv/classification/inception_resnet_v2/ixrt/inference.py deleted file mode 100644 index 17f473bf..00000000 --- a/models/cv/classification/inception_resnet_v2/ixrt/inference.py +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import torch.nn.functional as F -import onnxruntime -import tensorrt - -from calibration_dataset import TransformImage, getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - - val_tf = TransformImage( - imgsize=config.imgsz, - scale=0.875, - preserve_aspect_ratio=True - ) - - dataloader = getdataloader(config.datasets_dir, val_tf, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - fps = config.loop_count * config.bsz / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - - batch_data = batch_data.numpy() - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - tmp = torch.from_numpy(output) - output = F.softmax(tmp, dim=1) - - output = output.reshape(output.shape[0],output.shape[1]) - batch_top1, batch_top5 = eval_batch(output.numpy(), batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - e2e_time = time.time() - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=64, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=299, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) \ No newline at end of file diff --git a/models/cv/classification/inception_resnet_v2/ixrt/modify_batchsize.py b/models/cv/classification/inception_resnet_v2/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/inception_resnet_v2/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/inception_resnet_v2/ixrt/quant.py b/models/cv/classification/inception_resnet_v2/ixrt/quant.py deleted file mode 100644 index 4310f17e..00000000 --- a/models/cv/classification/inception_resnet_v2/ixrt/quant.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: - -在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 - -This file will show you how to quantize your network with PPQ - You should prepare your model and calibration dataset as follow: - - ~/working/model.onnx <-- your model - ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset - -if you are using caffe model: - ~/working/model.caffemdoel <-- your model - ~/working/model.prototext <-- your model - -### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### - -quantized model will be generated at: ~/working/quantized.onnx -""" -from ppq import * -from ppq.api import * -import os -from calibration_dataset import getdataloader, TransformImage -import argparse -import random -import numpy as np -import torch - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], - default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=300) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - - -config = parse_args() - -# modify configuration below: -WORKING_DIRECTORY = 'checkpoints' # choose your working directory -TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform -MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE -INPUT_LAYOUT = 'chw' # input data layout, chw or hwc -NETWORK_INPUTSHAPE = [64, 3, 300, 300] # input shape of your network -EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. -REQUIRE_ANALYSE = False -TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 -# ------------------------------------------------------------------- -# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 -# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx -# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 -# ------------------------------------------------------------------- -graph = None -if MODEL_TYPE == NetworkFramework.ONNX: - graph = load_onnx_graph(onnx_import_file=config.model) -if MODEL_TYPE == NetworkFramework.CAFFE: - graph = load_caffe_graph( - caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), - prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) -assert graph is not None, 'Graph Loading Error, Check your input again.' - -# ------------------------------------------------------------------- -# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 -# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 -# ------------------------------------------------------------------- -QS = QuantizationSettingFactory.default_setting() - -# ------------------------------------------------------------------- -# 下面向你展示了如何使用 finetuning 过程提升量化精度 -# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 -# 开启他们的方式都是 QS.xxxx = True -# 按需使用,不要全部打开,容易起飞 -# ------------------------------------------------------------------- -if TRAINING_YOUR_NETWORK: - QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 - QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 - QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' - -val_tf = TransformImage( - imgsize=config.imgsz, - scale=0.875, - preserve_aspect_ratio=True - ) - -dataloader = getdataloader(config.dataset_dir, val_tf, config.step, config.bsz, img_sz=config.imgsz) -# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 -# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 -with ENABLE_CUDA_KERNEL(): - print('网络正量化中,根据你的量化配置,这将需要一段时间:') - quantized = quantize_native_model( - setting=QS, # setting 对象用来控制标准量化逻辑 - model=graph, - calib_dataloader=dataloader, - calib_steps=config.step, - input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 - inputs=None, - # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] - collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, - # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None - platform=TARGET_PLATFORM, - device=EXECUTING_DEVICE, - do_quantize=True) - - # ------------------------------------------------------------------- - # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor - # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 - # 请注意,必须在 export 之前执行此操作。 - # ------------------------------------------------------------------- - executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) - # output = executor.forward(input) - - # ------------------------------------------------------------------- - # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 - # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% - # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 - # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 - # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 - # ------------------------------------------------------------------- - print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') - reports = graphwise_error_analyse( - graph=quantized, running_device=EXECUTING_DEVICE, steps=32, - dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) - for op, snr in reports.items(): - if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') - - if REQUIRE_ANALYSE: - print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') - layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, - interested_outputs=None, - dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) - - # ------------------------------------------------------------------- - # 使用 export_ppq_graph 函数来导出量化后的模型 - # PPQ 会根据你所选择的导出平台来修改模型格式 - # ------------------------------------------------------------------- - print('网络量化结束,正在生成目标文件:') - export_ppq_graph( - graph=quantized, platform=TARGET_PLATFORM, - graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), - config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) \ No newline at end of file diff --git a/models/cv/classification/inception_resnet_v2/ixrt/requirements.txt b/models/cv/classification/inception_resnet_v2/ixrt/requirements.txt deleted file mode 100644 index 7d96aa09..00000000 --- a/models/cv/classification/inception_resnet_v2/ixrt/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -tqdm -onnx -onnxsim -ultralytics -pycocotools -Pillow -tabulate -pycuda -opencv-python==4.6.0.66 -ppq -protobuf==3.20.0 \ No newline at end of file diff --git a/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_fp16_accuracy.sh b/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_fp16_accuracy.sh old mode 100755 new mode 100644 similarity index 84% rename from models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_fp16_accuracy.sh rename to models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_fp16_accuracy.sh index fa4dc990..3c4ea224 --- a/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_fp16_accuracy.sh +++ b/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_fp16_accuracy.sh @@ -1,18 +1,4 @@ #!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. EXIT_STATUS=0 check_status() diff --git a/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_fp16_performance.sh b/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_fp16_performance.sh old mode 100755 new mode 100644 similarity index 84% rename from models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_fp16_performance.sh rename to models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_fp16_performance.sh index 6b81d421..bb92ffda --- a/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_fp16_performance.sh +++ b/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_fp16_performance.sh @@ -1,18 +1,4 @@ #!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. EXIT_STATUS=0 check_status() diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_int8_accuracy.sh b/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_int8_accuracy.sh similarity index 83% rename from models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_int8_accuracy.sh rename to models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_int8_accuracy.sh index e35dcc10..7798599a 100644 --- a/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_int8_accuracy.sh +++ b/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_int8_accuracy.sh @@ -1,18 +1,4 @@ #!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. EXIT_STATUS=0 check_status() @@ -23,7 +9,7 @@ check_status() } # Run paraments -BSZ=32 +BSZ=64 TGT=-1 WARM_UP=0 LOOP_COUNT=-1 @@ -52,6 +38,8 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_int8_performance.sh b/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_int8_performance.sh similarity index 83% rename from models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_int8_performance.sh rename to models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_int8_performance.sh index 12c3bc0b..3335992a 100644 --- a/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_int8_performance.sh +++ b/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inception_resnet_v2_int8_performance.sh @@ -1,18 +1,4 @@ #!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. EXIT_STATUS=0 check_status() @@ -23,7 +9,7 @@ check_status() } # Run paraments -BSZ=32 +BSZ=64 TGT=-1 WARM_UP=3 LOOP_COUNT=20 @@ -52,6 +38,8 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 diff --git a/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_int8_accuracy.sh b/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_int8_accuracy.sh deleted file mode 100755 index 0ed9dc0e..00000000 --- a/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_int8_accuracy.sh +++ /dev/null @@ -1,145 +0,0 @@ -#!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -set -x -EXIT_STATUS=0 -check_status() -{ - if ((${PIPESTATUS[0]} != 0));then - EXIT_STATUS=1 - fi -} - -# Run paraments -BSZ=64 -TGT=-1 -WARM_UP=0 -LOOP_COUNT=-1 -RUN_MODE=ACC -PRECISION=int8 -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python -# Update arguments -index=0 -options=$@ -arguments=($options) -for argument in $options -do - index=`expr $index + 1` - case $argument in - --bs) BSZ=${arguments[index]};; - --tgt) TGT=${arguments[index]};; - esac -done - -source ${CONFIG_DIR} -echo ${QUANT_OBSERVER} -ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} - -echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} -echo DATASETS_DIR : ${DATASETS_DIR} -echo RUN_DIR : ${RUN_DIR} -echo CONFIG_DIR : ${CONFIG_DIR} -echo ====================== Model Info ====================== -echo Model Name : ${MODEL_NAME} -echo Model Input Name : ${MODEL_INPUT_NAME} -echo Model Output Name : ${MODEL_OUTPUT_NAME} -echo Onnx Path : ${ORIGINE_MODEL} - -step=0 -SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx - -# Simplify Model -let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi - -# Quant Model -if [ $PRECISION == "int8" ];then - let step++ - echo; - echo [STEP ${step}] : Quant Model - if [[ -z ${QUANT_EXIST_ONNX} ]];then - QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx - fi - if [[ -f ${QUANT_EXIST_ONNX} ]];then - SIM_MODEL=${QUANT_EXIST_ONNX} - echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed - else - python3 ${RUN_DIR}/quant.py \ - --model ${SIM_MODEL} \ - --model_name ${MODEL_NAME} \ - --dataset_dir ${DATASETS_DIR} \ - --observer ${QUANT_OBSERVER} \ - --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ - --save_dir $CHECKPOINTS_DIR \ - --bsz ${QUANT_BATCHSIZE} \ - --step ${QUANT_STEP} \ - --seed ${QUANT_SEED} \ - --imgsz ${IMGSIZE} - SIM_MODEL=${QUANT_EXIST_ONNX} - echo " "Generate ${SIM_MODEL} - fi -fi - - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi - - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi - -# Inference -# let step++ -echo; -echo [STEP ${step}] : Inference -python3 ${RUN_DIR}/inference.py \ - --engine_file=${ENGINE_FILE} \ - --datasets_dir=${DATASETS_DIR} \ - --imgsz=${IMGSIZE} \ - --warm_up=${WARM_UP} \ - --loop_count ${LOOP_COUNT} \ - --test_mode ${RUN_MODE} \ - --acc_target ${TGT} \ - --bsz ${BSZ}; check_status - -exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_int8_performance.sh b/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_int8_performance.sh deleted file mode 100755 index 85852d8b..00000000 --- a/models/cv/classification/inception_resnet_v2/ixrt/scripts/infer_inceptionresnetv2_int8_performance.sh +++ /dev/null @@ -1,144 +0,0 @@ -#!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -EXIT_STATUS=0 -check_status() -{ - if ((${PIPESTATUS[0]} != 0));then - EXIT_STATUS=1 - fi -} - -# Run paraments -BSZ=64 -TGT=-1 -WARM_UP=3 -LOOP_COUNT=20 -RUN_MODE=FPS -PRECISION=int8 -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python -# Update arguments -index=0 -options=$@ -arguments=($options) -for argument in $options -do - index=`expr $index + 1` - case $argument in - --bs) BSZ=${arguments[index]};; - --tgt) TGT=${arguments[index]};; - esac -done - -source ${CONFIG_DIR} -echo ${QUANT_OBSERVER} -ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} - -echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} -echo DATASETS_DIR : ${DATASETS_DIR} -echo RUN_DIR : ${RUN_DIR} -echo CONFIG_DIR : ${CONFIG_DIR} -echo ====================== Model Info ====================== -echo Model Name : ${MODEL_NAME} -echo Model Input Name : ${MODEL_INPUT_NAME} -echo Model Output Name : ${MODEL_OUTPUT_NAME} -echo Onnx Path : ${ORIGINE_MODEL} - -step=0 -SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx - -# Simplify Model -let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi - -# Quant Model -if [ $PRECISION == "int8" ];then - let step++ - echo; - echo [STEP ${step}] : Quant Model - if [[ -z ${QUANT_EXIST_ONNX} ]];then - QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx - fi - if [[ -f ${QUANT_EXIST_ONNX} ]];then - SIM_MODEL=${QUANT_EXIST_ONNX} - echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed - else - python3 ${RUN_DIR}/quant.py \ - --model ${SIM_MODEL} \ - --model_name ${MODEL_NAME} \ - --dataset_dir ${DATASETS_DIR} \ - --observer ${QUANT_OBSERVER} \ - --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ - --save_dir $CHECKPOINTS_DIR \ - --bsz ${QUANT_BATCHSIZE} \ - --step ${QUANT_STEP} \ - --seed ${QUANT_SEED} \ - --imgsz ${IMGSIZE} - SIM_MODEL=${QUANT_EXIST_ONNX} - echo " "Generate ${SIM_MODEL} - fi -fi - - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi - - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi - -# Inference -# let step++ -echo; -echo [STEP ${step}] : Inference -python3 ${RUN_DIR}/inference.py \ - --engine_file=${ENGINE_FILE} \ - --datasets_dir=${DATASETS_DIR} \ - --imgsz=${IMGSIZE} \ - --warm_up=${WARM_UP} \ - --loop_count ${LOOP_COUNT} \ - --test_mode ${RUN_MODE} \ - --acc_target ${TGT} \ - --bsz ${BSZ}; check_status - -exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/inception_resnet_v2/ixrt/simplify_model.py b/models/cv/classification/inception_resnet_v2/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/inception_resnet_v2/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/inception_v3/ixrt/README.md b/models/cv/classification/inception_v3/ixrt/README.md index 1f6326a2..7beb9105 100755 --- a/models/cv/classification/inception_v3/ixrt/README.md +++ b/models/cv/classification/inception_v3/ixrt/README.md @@ -27,7 +27,7 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion @@ -41,12 +41,11 @@ python3 export_onnx.py --origin_model inception_v3_google-0cc3c7bd.pth --output_ ## Model Inference ```bash -export PROJ_DIR=/Path/to/inception_v3/ixrt +export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=/Path/to/inception_v3/ixrt -export CONFIG_DIR=/Path/to/config/INCEPTION_V3_CONFIG -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/INCEPTION_V3_CONFIG ``` ### FP16 diff --git a/models/cv/classification/inception_v3/ixrt/build_engine.py b/models/cv/classification/inception_v3/ixrt/build_engine.py deleted file mode 100755 index 41e6af8d..00000000 --- a/models/cv/classification/inception_v3/ixrt/build_engine.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt -from calibration_dataset import getdataloader -import cuda.cudart as cudart - -def assertSuccess(err): - assert(err == cudart.cudaError_t.cudaSuccess) - -class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): - - def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): - super().__init__() - self.cache_file = cache_file - self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) - self.batch_generator = iter(self.image_batcher) - size = img_sz*img_sz*3*bsz - __import__('pdb').set_trace() - err, self.batch_allocation = cudart.cudaMalloc(size) - assertSuccess(err) - - def __del__(self): - err,= cudart.cudaFree(self.batch_allocation) - assertSuccess(err) - - def get_batch_size(self): - return self.image_batcher.batch_size - - def get_batch(self, names): - try: - batch, _ = next(self.batch_generator) - batch = batch.numpy() - __import__('pdb').set_trace() - cudart.cudaMemcpy(self.batch_allocation, - np.ascontiguousarray(batch), - batch.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) - return [int(self.batch_allocation)] - except StopIteration: - return None - - def read_calibration_cache(self): - if os.path.exists(self.cache_file): - with open(self.cache_file, "rb") as f: - return f.read() - - def write_calibration_cache(self, cache): - with open(self.cache_file, "wb") as f: - f.write(cache) - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - print("precision : ", precision) - build_config.set_flag(precision) - if config.precision == "int8": - build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/build_i8_engine.py b/models/cv/classification/inception_v3/ixrt/build_i8_engine.py deleted file mode 100644 index 6e356260..00000000 --- a/models/cv/classification/inception_v3/ixrt/build_i8_engine.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os - -import tensorrt -import tensorrt as trt - -TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) - -EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - - -def GiB(val): - return val * 1 << 30 - - -def json_load(filename): - with open(filename) as json_file: - data = json.load(json_file) - return data - - -def setDynamicRange(network, json_file): - """Sets ranges for network layers.""" - quant_param_json = json_load(json_file) - act_quant = quant_param_json["act_quant_info"] - - for i in range(network.num_inputs): - input_tensor = network.get_input(i) - if act_quant.__contains__(input_tensor.name): - print(input_tensor.name) - value = act_quant[input_tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - input_tensor.dynamic_range = (tensor_min, tensor_max) - - for i in range(network.num_layers): - layer = network.get_layer(i) - - for output_index in range(layer.num_outputs): - tensor = layer.get_output(output_index) - - if act_quant.__contains__(tensor.name): - value = act_quant[tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - tensor.dynamic_range = (tensor_min, tensor_max) - else: - print("\033[1;32m%s\033[0m" % tensor.name) - - -def build_engine(onnx_file, json_file, engine_file): - builder = trt.Builder(TRT_LOGGER) - network = builder.create_network(EXPLICIT_BATCH) - - config = builder.create_builder_config() - - # If it is a dynamic onnx model , you need to add the following. - # profile = builder.create_optimization_profile() - # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) - # config.add_optimization_profile(profile) - - parser = trt.OnnxParser(network, TRT_LOGGER) - # config.max_workspace_size = GiB(1) - if not os.path.exists(onnx_file): - quit("ONNX file {} not found".format(onnx_file)) - - with open(onnx_file, "rb") as model: - if not parser.parse(model.read()): - print("ERROR: Failed to parse the ONNX file.") - for error in range(parser.num_errors): - print(parser.get_error(error)) - return None - - config.set_flag(trt.BuilderFlag.INT8) - - setDynamicRange(network, json_file) - - engine = builder.build_engine(network, config) - - with open(engine_file, "wb") as f: - f.write(engine.serialize()) - - -if __name__ == "__main__": - # Add plugins if needed - # import ctypes - # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") - parser = argparse.ArgumentParser( - description="Writing qparams to onnx to convert tensorrt engine." - ) - parser.add_argument("--onnx", type=str, default=None) - parser.add_argument("--qparam_json", type=str, default=None) - parser.add_argument("--engine", type=str, default=None) - arg = parser.parse_args() - - build_engine(arg.onnx, arg.qparam_json, arg.engine) - print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/calibration_dataset.py b/models/cv/classification/inception_v3/ixrt/calibration_dataset.py deleted file mode 100644 index d7525d51..00000000 --- a/models/cv/classification/inception_v3/ixrt/calibration_dataset.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/ci/prepare.sh b/models/cv/classification/inception_v3/ixrt/ci/prepare.sh index a9b110fd..39ed126c 100644 --- a/models/cv/classification/inception_v3/ixrt/ci/prepare.sh +++ b/models/cv/classification/inception_v3/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export_onnx.py --origin_model /root/data/checkpoints/inception_v3.pth --output_model checkpoints/inception_v3.onnx \ No newline at end of file +python3 export_onnx.py --origin_model inception_v3_google-0cc3c7bd.pth --output_model checkpoints/inception_v3.onnx \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/common.py b/models/cv/classification/inception_v3/ixrt/common.py deleted file mode 100644 index abdc147c..00000000 --- a/models/cv/classification/inception_v3/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/inception_v3/ixrt/inference.py b/models/cv/classification/inception_v3/ixrt/inference.py deleted file mode 100644 index fcca27a3..00000000 --- a/models/cv/classification/inception_v3/ixrt/inference.py +++ /dev/null @@ -1,162 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/inception_v3/ixrt/modify_batchsize.py b/models/cv/classification/inception_v3/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/inception_v3/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/inception_v3/ixrt/quant.py b/models/cv/classification/inception_v3/ixrt/quant.py deleted file mode 100644 index 40cd6171..00000000 --- a/models/cv/classification/inception_v3/ixrt/quant.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: - -在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 - -This file will show you how to quantize your network with PPQ - You should prepare your model and calibration dataset as follow: - - ~/working/model.onnx <-- your model - ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset - -if you are using caffe model: - ~/working/model.caffemdoel <-- your model - ~/working/model.prototext <-- your model - -### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### - -quantized model will be generated at: ~/working/quantized.onnx -""" -from ppq import * -from ppq.api import * -import os -from calibration_dataset import getdataloader -import argparse -import random -import numpy as np -import torch - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], - default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=288) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - - -config = parse_args() - -# modify configuration below: -WORKING_DIRECTORY = 'checkpoints' # choose your working directory -TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform -MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE -INPUT_LAYOUT = 'chw' # input data layout, chw or hwc -NETWORK_INPUTSHAPE = [32, 3, 224, 224] # input shape of your network -EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. -REQUIRE_ANALYSE = False -TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 -# ------------------------------------------------------------------- -# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 -# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx -# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 -# ------------------------------------------------------------------- -graph = None -if MODEL_TYPE == NetworkFramework.ONNX: - graph = load_onnx_graph(onnx_import_file=config.model) -if MODEL_TYPE == NetworkFramework.CAFFE: - graph = load_caffe_graph( - caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), - prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) -assert graph is not None, 'Graph Loading Error, Check your input again.' - -# ------------------------------------------------------------------- -# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 -# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 -# ------------------------------------------------------------------- -QS = QuantizationSettingFactory.default_setting() - -# ------------------------------------------------------------------- -# 下面向你展示了如何使用 finetuning 过程提升量化精度 -# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 -# 开启他们的方式都是 QS.xxxx = True -# 按需使用,不要全部打开,容易起飞 -# ------------------------------------------------------------------- -if TRAINING_YOUR_NETWORK: - QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 - QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 - QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' - - - -dataloader = getdataloader(config.dataset_dir, config.step, config.bsz, img_sz=config.imgsz) -# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 -# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 -with ENABLE_CUDA_KERNEL(): - print('网络正量化中,根据你的量化配置,这将需要一段时间:') - quantized = quantize_native_model( - setting=QS, # setting 对象用来控制标准量化逻辑 - model=graph, - calib_dataloader=dataloader, - calib_steps=config.step, - input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 - inputs=None, - # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] - collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, - # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None - platform=TARGET_PLATFORM, - device=EXECUTING_DEVICE, - do_quantize=True) - - # ------------------------------------------------------------------- - # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor - # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 - # 请注意,必须在 export 之前执行此操作。 - # ------------------------------------------------------------------- - executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) - # output = executor.forward(input) - - # ------------------------------------------------------------------- - # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 - # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% - # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 - # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 - # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 - # ------------------------------------------------------------------- - print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') - reports = graphwise_error_analyse( - graph=quantized, running_device=EXECUTING_DEVICE, steps=32, - dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) - for op, snr in reports.items(): - if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') - - if REQUIRE_ANALYSE: - print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') - layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, - interested_outputs=None, - dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) - - # ------------------------------------------------------------------- - # 使用 export_ppq_graph 函数来导出量化后的模型 - # PPQ 会根据你所选择的导出平台来修改模型格式 - # ------------------------------------------------------------------- - print('网络量化结束,正在生成目标文件:') - export_ppq_graph( - graph=quantized, platform=TARGET_PLATFORM, - graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), - config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/refine_model.py b/models/cv/classification/inception_v3/ixrt/refine_model.py deleted file mode 100644 index 000ee4dc..00000000 --- a/models/cv/classification/inception_v3/ixrt/refine_model.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/inception_v3/ixrt/requirements.txt b/models/cv/classification/inception_v3/ixrt/requirements.txt deleted file mode 100644 index bb2683dd..00000000 --- a/models/cv/classification/inception_v3/ixrt/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -pycuda -tqdm -onnx -onnxsim -tabulate -ppq -protobuf==3.20.0 \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_accuracy.sh b/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_accuracy.sh index cf11c5a6..e3a8bb78 100755 --- a/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_accuracy.sh +++ b/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_accuracy.sh @@ -113,20 +113,20 @@ else echo " "Generate ${FINAL_MODEL} fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference let step++ diff --git a/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_performance.sh b/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_performance.sh index d13d7d01..e578762e 100755 --- a/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_performance.sh +++ b/models/cv/classification/inception_v3/ixrt/scripts/infer_inception_v3_int8_performance.sh @@ -60,15 +60,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -98,36 +99,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -137,7 +138,7 @@ python3 ${RUN_DIR}/inference.py \ --warm_up=${WARM_UP} \ --loop_count ${LOOP_COUNT} \ --test_mode ${RUN_MODE} \ - --acc_target ${TGT} \ + --fps_target ${TGT} \ --bsz ${BSZ}; check_status exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/simplify_model.py b/models/cv/classification/inception_v3/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/inception_v3/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/ixrt_common/README.md b/models/cv/classification/ixrt_common/README.md new file mode 100644 index 00000000..c672bcca --- /dev/null +++ b/models/cv/classification/ixrt_common/README.md @@ -0,0 +1 @@ +# 此代码是分类网络基于imagenet数据集的通用实现 \ No newline at end of file diff --git a/models/cv/classification/repvgg/ixrt/build_engine.py b/models/cv/classification/ixrt_common/build_engine.py similarity index 31% rename from models/cv/classification/repvgg/ixrt/build_engine.py rename to models/cv/classification/ixrt_common/build_engine.py index 32f549d8..007acd08 100644 --- a/models/cv/classification/repvgg/ixrt/build_engine.py +++ b/models/cv/classification/ixrt_common/build_engine.py @@ -1,18 +1,3 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - import os import cv2 import argparse @@ -20,21 +5,63 @@ import numpy as np import torch import tensorrt +import ixrt + +TRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin(TRT_LOGGER) def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) + if config.silent: + action = tensorrt.Logger.WARNING + else: + action = tensorrt.Logger.ERROR + IXRT_LOGGER = tensorrt.Logger(action) builder = tensorrt.Builder(IXRT_LOGGER) EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) network = builder.create_network(EXPLICIT_BATCH) build_config = builder.create_builder_config() parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) + if not parser.parse_from_file(config.model): + raise Exception(f"Failed to parse {config.model}, please check detailed debug info") precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 # print("precision : ", precision) build_config.set_flag(precision) + # due to fp16 of elementwise div of swin_v2 will exceed the range of f16 representation, so set fp32. + if ("swin_v2_s_model_sim" in config.model) and (config.precision == "float16"): + build_config.set_flag(ixrt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) + float32_set = {"/features/features.3/features.3.0/attn/Div_4", + "/features/features.3/features.3.1/attn/Div_4", + "/features/features.5/features.5.0/attn/Div_4", + "/features/features.5/features.5.1/attn/Div_4", + "/features/features.5/features.5.2/attn/Div_4", + "/features/features.5/features.5.3/attn/Div_4", + "/features/features.5/features.5.4/attn/Div_4", + "/features/features.5/features.5.5/attn/Div_4", + "/features/features.5/features.5.6/attn/Div_4", + "/features/features.5/features.5.7/attn/Div_4", + "/features/features.5/features.5.8/attn/Div_4", + "/features/features.5/features.5.9/attn/Div_4", + "/features/features.5/features.5.10/attn/Div_4", + "/features/features.5/features.5.11/attn/Div_4", + "/features/features.5/features.5.12/attn/Div_4", + "/features/features.5/features.5.13/attn/Div_4", + "/features/features.5/features.5.14/attn/Div_4", + "/features/features.5/features.5.15/attn/Div_4", + "/features/features.5/features.5.16/attn/Div_4", + "/features/features.5/features.5.17/attn/Div_4", + "/features/features.7/features.7.0/attn/Div_4", + "/features/features.7/features.7.1/attn/Div_4",} + for i in range(network.num_layers): + layer = network.get_layer(i) + if layer.name in float32_set: + layer.precision = ixrt.float32 + plan = builder.build_serialized_network(network, build_config) + if not plan: + raise Exception("Failed to build engine, please check detailed debug info") engine_file_path = config.engine with open(engine_file_path, "wb") as f: f.write(plan) @@ -45,9 +72,10 @@ def parse_args(): parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", help="The precision of datatype") parser.add_argument("--engine", type=str, default=None) + parser.add_argument("--silent", action="store_true") args = parser.parse_args() return args if __name__ == "__main__": args = parse_args() - main(args) \ No newline at end of file + main(args) diff --git a/models/cv/classification/mobilenet_v2/ixrt/calibration_dataset.py b/models/cv/classification/ixrt_common/calibration_dataset.py similarity index 49% rename from models/cv/classification/mobilenet_v2/ixrt/calibration_dataset.py rename to models/cv/classification/ixrt_common/calibration_dataset.py index 34bd570c..9c28c342 100644 --- a/models/cv/classification/mobilenet_v2/ixrt/calibration_dataset.py +++ b/models/cv/classification/ixrt_common/calibration_dataset.py @@ -1,19 +1,6 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - import os +import numpy as np +from PIL import Image import torch import torchvision.datasets @@ -59,6 +46,97 @@ class CalibrationImageNet(torchvision.datasets.ImageFolder): target = self.img2label_map[img_name] return sample, target + +class CalibrationRGBImageNet(torchvision.datasets.ImageFolder): + def __init__(self, *args, **kwargs): + super(CalibrationRGBImageNet, self).__init__(*args, **kwargs) + img2label_path = os.path.join(self.root, "val_map.txt") + if not os.path.exists(img2label_path): + raise FileNotFoundError(f"Not found label file `{img2label_path}`.") + + self.img2label_map = self.make_img2label_map(img2label_path) + + def make_img2label_map(self, path): + with open(path) as f: + lines = f.readlines() + + img2lable_map = dict() + for line in lines: + line = line.lstrip().rstrip().split("\t") + if len(line) != 2: + continue + img_name, label = line + img_name = img_name.strip() + if img_name in [None, ""]: + continue + label = int(label.strip()) + img2lable_map[img_name] = label + return img2lable_map + + def __getitem__(self, index): + path, target = self.samples[index] + sample = self.loader(path) + sample = sample.convert("RGB") + sample = np.array(sample)[:, :, ::-1] + sample = Image.fromarray(np.uint8(sample)) + if self.transform is not None: + sample = self.transform(sample) + img_name = os.path.basename(path) + target = self.img2label_map[img_name] + + return sample, target + + +def create_mobilenetv1_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): + dataset = CalibrationRGBImageNet( + data_path, + transform=T.Compose( + [ + T.Resize(256), + T.CenterCrop(img_sz), + T.ToTensor(), + T.Normalize(mean=[103.940002441/255, 116.779998779/255, 123.680000305/255], std=[1.0/(255*0.0170000009239), 1.0/(255*0.0170000009239), 1.0/(255*0.0170000009239)]), + ] + ), + ) + + calibration_dataset = dataset + if num_samples is not None: + calibration_dataset = torch.utils.data.Subset( + dataset, indices=range(num_samples) + ) + + calibration_dataloader = DataLoader( + calibration_dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + verify_dataloader = DataLoader( + dataset, + shuffle=False, + batch_size=batch_size, + drop_last=False, + num_workers=workers, + ) + + return calibration_dataloader, verify_dataloader + + +def getmobilenetv1dataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): + num_samples = min(total_sample, step * batch_size) + if step < 0: + num_samples = None + calibration_dataloader, _ = create_mobilenetv1_dataloaders( + dataset_dir, + img_sz=img_sz, + batch_size=batch_size, + workers=workers, + num_samples=num_samples, + ) + return calibration_dataloader def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): @@ -110,4 +188,4 @@ def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, to workers=workers, num_samples=num_samples, ) - return calibration_dataloader + return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/resnext101_32x8d/ixrt/common.py b/models/cv/classification/ixrt_common/common.py similarity index 55% rename from models/cv/classification/resnext101_32x8d/ixrt/common.py rename to models/cv/classification/ixrt_common/common.py index 5abaf512..3406b60b 100644 --- a/models/cv/classification/resnext101_32x8d/ixrt/common.py +++ b/models/cv/classification/ixrt_common/common.py @@ -1,18 +1,3 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - import os import cv2 import glob @@ -44,7 +29,7 @@ def create_engine_context(engine_path, logger): return engine, context -def get_io_bindings(engine): +def _get_engine_io_bindings(engine): # Setup I/O bindings inputs = [] outputs = [] @@ -79,3 +64,58 @@ def get_io_bindings(engine): else: outputs.append(binding) return inputs, outputs, allocations + +def _convert_dtype_ort_to_np(type): + type = type.replace("tensor", "") + type = type.replace("(", "") + type = type.replace(")", "") + if type == "float": + type = "float32" + return np.dtype(type) + +def _get_bytes_of_tensor(shape, type: np.dtype): + size = type.itemsize + for s in shape: + size *= s + return size +def _alloc_gpu_tensor(shape, dtype): + size = _get_bytes_of_tensor(shape, dtype) + err, allocation = cudart.cudaMalloc(size) + assert err == cudart.cudaError_t.cudaSuccess + return allocation + +def _alloc_onnx_io_binding(io, index): + type = _convert_dtype_ort_to_np(io.type) + binding = { + "index": index, + "name": io.name, + "dtype": type, + "shape": io.shape, + "allocation": None, + "nbytes": _get_bytes_of_tensor(io.shape, type), + } + return binding +def _get_onnx_io_bindings(ort_session): + # Setup I/O bindings + inputs = [] + outputs = [] + allocations = [] + + index = 0 + for input in ort_session.get_inputs(): + binding = _alloc_onnx_io_binding(input, index) + index+=1 + inputs.append(binding) + allocations.append(binding) + for output in ort_session.get_outputs(): + binding = _alloc_onnx_io_binding(output, index) + index+=1 + outputs.append(binding) + allocations.append(binding) + return inputs, outputs, allocations + +def get_io_bindings(engine): + if isinstance(engine, tensorrt.ICudaEngine): + return _get_engine_io_bindings(engine) + else: + return _get_onnx_io_bindings(engine) diff --git a/models/cv/classification/alexnet/ixrt/config/ALEXNET_CONFIG b/models/cv/classification/ixrt_common/config/ALEXNET_CONFIG similarity index 52% rename from models/cv/classification/alexnet/ixrt/config/ALEXNET_CONFIG rename to models/cv/classification/ixrt_common/config/ALEXNET_CONFIG index 3081a326..fb5a8a73 100644 --- a/models/cv/classification/alexnet/ixrt/config/ALEXNET_CONFIG +++ b/models/cv/classification/ixrt_common/config/ALEXNET_CONFIG @@ -1,17 +1,3 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. # IMGSIZE : 模型输入hw大小 # MODEL_NAME : 生成onnx/engine的basename # ORIGINE_MODEL : 原始onnx文件名称 diff --git a/models/cv/classification/convnext_base/ixrt/config/CONVNEXT_CONFIG b/models/cv/classification/ixrt_common/config/CONVNEXT_BASE_CONFIG similarity index 100% rename from models/cv/classification/convnext_base/ixrt/config/CONVNEXT_CONFIG rename to models/cv/classification/ixrt_common/config/CONVNEXT_BASE_CONFIG diff --git a/models/cv/classification/convnext_small/ixrt/config/CONVNEXT_CONFIG b/models/cv/classification/ixrt_common/config/CONVNEXT_SMALL_CONFIG similarity index 96% rename from models/cv/classification/convnext_small/ixrt/config/CONVNEXT_CONFIG rename to models/cv/classification/ixrt_common/config/CONVNEXT_SMALL_CONFIG index 26112ba6..2cab7206 100644 --- a/models/cv/classification/convnext_small/ixrt/config/CONVNEXT_CONFIG +++ b/models/cv/classification/ixrt_common/config/CONVNEXT_SMALL_CONFIG @@ -16,7 +16,7 @@ # MODEL_NAME : 生成onnx/engine的basename # ORIGINE_MODEL : 原始onnx文件名称 IMGSIZE=224 -MODEL_NAME=Convnext_small +MODEL_NAME=convnext_small ORIGINE_MODEL=convnext_small.onnx # QUANT CONFIG (仅PRECISION为int8时生效) @@ -30,4 +30,4 @@ QUANT_BATCHSIZE=1 QUANT_STEP=32 QUANT_SEED=42 DISABLE_QUANT_LIST= -QUANT_EXIST_ONNX= +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/classification/cspdarknet53/ixrt/config/CSPDARKNET53_CONFIG b/models/cv/classification/ixrt_common/config/CSPDARKNET53_CONFIG similarity index 100% rename from models/cv/classification/cspdarknet53/ixrt/config/CSPDARKNET53_CONFIG rename to models/cv/classification/ixrt_common/config/CSPDARKNET53_CONFIG diff --git a/models/cv/classification/cspresnet50/ixrt/config/CSPRESNET50_CONFIG b/models/cv/classification/ixrt_common/config/CSPRESNET50_CONFIG similarity index 53% rename from models/cv/classification/cspresnet50/ixrt/config/CSPRESNET50_CONFIG rename to models/cv/classification/ixrt_common/config/CSPRESNET50_CONFIG index cd0cb715..b44fd50f 100644 --- a/models/cv/classification/cspresnet50/ixrt/config/CSPRESNET50_CONFIG +++ b/models/cv/classification/ixrt_common/config/CSPRESNET50_CONFIG @@ -1,18 +1,3 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - # IMGSIZE : 模型输入hw大小 # MODEL_NAME : 生成onnx/engine的basename # ORIGINE_MODEL : 原始onnx文件名称 diff --git a/models/cv/classification/deit_tiny/ixrt/config/DEIT_TINY_CONFIG b/models/cv/classification/ixrt_common/config/DEIT_TINY_CONFIG similarity index 100% rename from models/cv/classification/deit_tiny/ixrt/config/DEIT_TINY_CONFIG rename to models/cv/classification/ixrt_common/config/DEIT_TINY_CONFIG diff --git a/models/cv/classification/densenet121/ixrt/config/DENSENET121_CONFIG b/models/cv/classification/ixrt_common/config/DENSENET121_CONFIG similarity index 100% rename from models/cv/classification/densenet121/ixrt/config/DENSENET121_CONFIG rename to models/cv/classification/ixrt_common/config/DENSENET121_CONFIG diff --git a/models/cv/classification/densenet161/ixrt/config/DENSENET_CONFIG b/models/cv/classification/ixrt_common/config/DENSENET161_CONFIG similarity index 100% rename from models/cv/classification/densenet161/ixrt/config/DENSENET_CONFIG rename to models/cv/classification/ixrt_common/config/DENSENET161_CONFIG diff --git a/models/cv/classification/densenet169/ixrt/config/DENSENET_CONFIG b/models/cv/classification/ixrt_common/config/DENSENET169_CONFIG similarity index 98% rename from models/cv/classification/densenet169/ixrt/config/DENSENET_CONFIG rename to models/cv/classification/ixrt_common/config/DENSENET169_CONFIG index 73aeedee..960b6d47 100644 --- a/models/cv/classification/densenet169/ixrt/config/DENSENET_CONFIG +++ b/models/cv/classification/ixrt_common/config/DENSENET169_CONFIG @@ -30,4 +30,4 @@ QUANT_BATCHSIZE=1 QUANT_STEP=32 QUANT_SEED=42 DISABLE_QUANT_LIST= -QUANT_EXIST_ONNX= +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/classification/densenet201/ixrt/config/DENSENET201_CONFIG b/models/cv/classification/ixrt_common/config/DENSENET201_CONFIG similarity index 100% rename from models/cv/classification/densenet201/ixrt/config/DENSENET201_CONFIG rename to models/cv/classification/ixrt_common/config/DENSENET201_CONFIG diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/config/EFFICIENTNETV2_CONFIG b/models/cv/classification/ixrt_common/config/EFFICIENTNETV2_RW_T_CONFIG similarity index 100% rename from models/cv/classification/efficientnetv2_rw_t/ixrt/config/EFFICIENTNETV2_CONFIG rename to models/cv/classification/ixrt_common/config/EFFICIENTNETV2_RW_T_CONFIG diff --git a/models/cv/classification/efficientnet_b0/ixrt/config/EFFICIENTNET_B0_CONFIG b/models/cv/classification/ixrt_common/config/EFFICIENTNET_B0_CONFIG similarity index 100% rename from models/cv/classification/efficientnet_b0/ixrt/config/EFFICIENTNET_B0_CONFIG rename to models/cv/classification/ixrt_common/config/EFFICIENTNET_B0_CONFIG diff --git a/models/cv/classification/efficientnet_b1/ixrt/config/EFFICIENTNET_B1_CONFIG b/models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG similarity index 100% rename from models/cv/classification/efficientnet_b1/ixrt/config/EFFICIENTNET_B1_CONFIG rename to models/cv/classification/ixrt_common/config/EFFICIENTNET_B1_CONFIG diff --git a/models/cv/classification/efficientnet_b2/ixrt/config/EFFICIENTNET_B2_CONFIG b/models/cv/classification/ixrt_common/config/EFFICIENTNET_B2_CONFIG similarity index 100% rename from models/cv/classification/efficientnet_b2/ixrt/config/EFFICIENTNET_B2_CONFIG rename to models/cv/classification/ixrt_common/config/EFFICIENTNET_B2_CONFIG diff --git a/models/cv/classification/efficientnet_b3/ixrt/config/EFFICIENTNET_B3_CONFIG b/models/cv/classification/ixrt_common/config/EFFICIENTNET_B3_CONFIG similarity index 100% rename from models/cv/classification/efficientnet_b3/ixrt/config/EFFICIENTNET_B3_CONFIG rename to models/cv/classification/ixrt_common/config/EFFICIENTNET_B3_CONFIG diff --git a/models/cv/classification/efficientnet_v2/ixrt/config/EFFICIENTNET_V2_CONFIG b/models/cv/classification/ixrt_common/config/EFFICIENTNET_V2_CONFIG similarity index 100% rename from models/cv/classification/efficientnet_v2/ixrt/config/EFFICIENTNET_V2_CONFIG rename to models/cv/classification/ixrt_common/config/EFFICIENTNET_V2_CONFIG diff --git a/models/cv/classification/efficientnet_v2_s/ixrt/config/EFFICIENTNETV2_S_CONFIG b/models/cv/classification/ixrt_common/config/EFFICIENTNET_V2_S_CONFIG similarity index 100% rename from models/cv/classification/efficientnet_v2_s/ixrt/config/EFFICIENTNETV2_S_CONFIG rename to models/cv/classification/ixrt_common/config/EFFICIENTNET_V2_S_CONFIG diff --git a/models/cv/classification/googlenet/ixrt/config/GOOGLENET_CONFIG b/models/cv/classification/ixrt_common/config/GOOGLENET_CONFIG similarity index 100% rename from models/cv/classification/googlenet/ixrt/config/GOOGLENET_CONFIG rename to models/cv/classification/ixrt_common/config/GOOGLENET_CONFIG diff --git a/models/cv/classification/inception_v3/ixrt/config/INCEPTION_V3_CONFIG b/models/cv/classification/ixrt_common/config/HRNET_W18_CONFIG similarity index 49% rename from models/cv/classification/inception_v3/ixrt/config/INCEPTION_V3_CONFIG rename to models/cv/classification/ixrt_common/config/HRNET_W18_CONFIG index 041b0910..567fbedf 100644 --- a/models/cv/classification/inception_v3/ixrt/config/INCEPTION_V3_CONFIG +++ b/models/cv/classification/ixrt_common/config/HRNET_W18_CONFIG @@ -1,24 +1,9 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - # IMGSIZE : 模型输入hw大小 # MODEL_NAME : 生成onnx/engine的basename # ORIGINE_MODEL : 原始onnx文件名称 IMGSIZE=224 -MODEL_NAME=Inception_v3 -ORIGINE_MODEL=inception_v3.onnx +MODEL_NAME=hrnet_w18 +ORIGINE_MODEL=hrnet_w18.onnx # QUANT CONFIG (仅PRECISION为int8时生效) # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] @@ -32,4 +17,3 @@ QUANT_STEP=32 QUANT_SEED=42 DISABLE_QUANT_LIST= QUANT_EXIST_ONNX= - diff --git a/models/cv/classification/inception_resnet_v2/ixrt/config/INCEPTIONRESNETV2_CONFIG b/models/cv/classification/ixrt_common/config/INCEPTION_RESNET_V2_CONFIG similarity index 50% rename from models/cv/classification/inception_resnet_v2/ixrt/config/INCEPTIONRESNETV2_CONFIG rename to models/cv/classification/ixrt_common/config/INCEPTION_RESNET_V2_CONFIG index 1a5e6158..eafce555 100644 --- a/models/cv/classification/inception_resnet_v2/ixrt/config/INCEPTIONRESNETV2_CONFIG +++ b/models/cv/classification/ixrt_common/config/INCEPTION_RESNET_V2_CONFIG @@ -1,23 +1,8 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - # IMGSIZE : 模型输入hw大小 # MODEL_NAME : 生成onnx/engine的basename # ORIGINE_MODEL : 原始onnx文件名称 -IMGSIZE=300 -MODEL_NAME=InceptionResnetV2 +IMGSIZE=299 +MODEL_NAME=inceptionresnetv2 ORIGINE_MODEL=inceptionresnetv2.onnx # QUANT CONFIG (仅PRECISION为int8时生效) diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/config/SQUEEZENET_V10_CONFIG b/models/cv/classification/ixrt_common/config/INCEPTION_V3_CONFIG similarity index 92% rename from models/cv/classification/squeezenet_v1_0/ixrt/config/SQUEEZENET_V10_CONFIG rename to models/cv/classification/ixrt_common/config/INCEPTION_V3_CONFIG index 0242c207..7204e676 100644 --- a/models/cv/classification/squeezenet_v1_0/ixrt/config/SQUEEZENET_V10_CONFIG +++ b/models/cv/classification/ixrt_common/config/INCEPTION_V3_CONFIG @@ -2,8 +2,8 @@ # MODEL_NAME : 生成onnx/engine的basename # ORIGINE_MODEL : 原始onnx文件名称 IMGSIZE=224 -MODEL_NAME=SqueezeNet_v10 -ORIGINE_MODEL=squeezenetv10.onnx +MODEL_NAME=inception_v3 +ORIGINE_MODEL=inception_v3.onnx # QUANT CONFIG (仅PRECISION为int8时生效) # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] @@ -17,3 +17,4 @@ QUANT_STEP=32 QUANT_SEED=42 DISABLE_QUANT_LIST= QUANT_EXIST_ONNX= + diff --git a/models/cv/classification/ixrt_common/config/MOBILENET_V2_CONFIG b/models/cv/classification/ixrt_common/config/MOBILENET_V2_CONFIG new file mode 100644 index 00000000..e37a8fd0 --- /dev/null +++ b/models/cv/classification/ixrt_common/config/MOBILENET_V2_CONFIG @@ -0,0 +1,19 @@ +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=mobilenet_v2 +ORIGINE_MODEL=mobilenet_v2.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=32 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= \ No newline at end of file diff --git a/models/cv/classification/mobilenet_v3/ixrt/config/MOBILENET_V3_CONFIG b/models/cv/classification/ixrt_common/config/MOBILENET_V3_CONFIG similarity index 100% rename from models/cv/classification/mobilenet_v3/ixrt/config/MOBILENET_V3_CONFIG rename to models/cv/classification/ixrt_common/config/MOBILENET_V3_CONFIG diff --git a/models/cv/classification/repvgg/ixrt/config/REPVGG_CONFIG b/models/cv/classification/ixrt_common/config/REPVGG_CONFIG similarity index 100% rename from models/cv/classification/repvgg/ixrt/config/REPVGG_CONFIG rename to models/cv/classification/ixrt_common/config/REPVGG_CONFIG diff --git a/models/cv/classification/ixrt_common/config/REPVIT_CONFIG b/models/cv/classification/ixrt_common/config/REPVIT_CONFIG new file mode 100644 index 00000000..d33db2e7 --- /dev/null +++ b/models/cv/classification/ixrt_common/config/REPVIT_CONFIG @@ -0,0 +1,19 @@ +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=RepViT +ORIGINE_MODEL=repvit.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/res2net50/ixrt/config/RES2NET50_CONFIG b/models/cv/classification/ixrt_common/config/RES2NET50_CONFIG similarity index 100% rename from models/cv/classification/res2net50/ixrt/config/RES2NET50_CONFIG rename to models/cv/classification/ixrt_common/config/RES2NET50_CONFIG diff --git a/models/cv/classification/resnet101/ixrt/config/RESNET101_CONFIG b/models/cv/classification/ixrt_common/config/RESNET101_CONFIG similarity index 100% rename from models/cv/classification/resnet101/ixrt/config/RESNET101_CONFIG rename to models/cv/classification/ixrt_common/config/RESNET101_CONFIG diff --git a/models/cv/classification/resnet18/ixrt/config/RESNET18_CONFIG b/models/cv/classification/ixrt_common/config/RESNET18_CONFIG similarity index 100% rename from models/cv/classification/resnet18/ixrt/config/RESNET18_CONFIG rename to models/cv/classification/ixrt_common/config/RESNET18_CONFIG diff --git a/models/cv/classification/resnet34/ixrt/config/RESNET34_CONFIG b/models/cv/classification/ixrt_common/config/RESNET34_CONFIG similarity index 100% rename from models/cv/classification/resnet34/ixrt/config/RESNET34_CONFIG rename to models/cv/classification/ixrt_common/config/RESNET34_CONFIG diff --git a/models/cv/classification/resnet50/ixrt/config/RESNET50_CONFIG b/models/cv/classification/ixrt_common/config/RESNET50_CONFIG similarity index 100% rename from models/cv/classification/resnet50/ixrt/config/RESNET50_CONFIG rename to models/cv/classification/ixrt_common/config/RESNET50_CONFIG diff --git a/models/cv/classification/resnetv1d50/ixrt/config/RESNETV1D50_CONFIG b/models/cv/classification/ixrt_common/config/RESNETV1D50_CONFIG similarity index 98% rename from models/cv/classification/resnetv1d50/ixrt/config/RESNETV1D50_CONFIG rename to models/cv/classification/ixrt_common/config/RESNETV1D50_CONFIG index 5d1cb05d..2c87e1d7 100644 --- a/models/cv/classification/resnetv1d50/ixrt/config/RESNETV1D50_CONFIG +++ b/models/cv/classification/ixrt_common/config/RESNETV1D50_CONFIG @@ -16,7 +16,7 @@ # MODEL_NAME : 生成onnx/engine的basename # ORIGINE_MODEL : 原始onnx文件名称 IMGSIZE=224 -MODEL_NAME=Resnet_V1_D50 +MODEL_NAME=resnet_v1_d50 ORIGINE_MODEL=resnet_v1_d50.onnx # QUANT CONFIG (仅PRECISION为int8时生效) diff --git a/models/cv/classification/resnext101_32x8d/ixrt/config/RESNEXT101_32X8D_CONFIG b/models/cv/classification/ixrt_common/config/RESNEXT101_32X8D_CONFIG similarity index 100% rename from models/cv/classification/resnext101_32x8d/ixrt/config/RESNEXT101_32X8D_CONFIG rename to models/cv/classification/ixrt_common/config/RESNEXT101_32X8D_CONFIG diff --git a/models/cv/classification/resnext101_64x4d/ixrt/config/RESNEXT101_64X4D_CONFIG b/models/cv/classification/ixrt_common/config/RESNEXT101_64X4D_CONFIG similarity index 100% rename from models/cv/classification/resnext101_64x4d/ixrt/config/RESNEXT101_64X4D_CONFIG rename to models/cv/classification/ixrt_common/config/RESNEXT101_64X4D_CONFIG diff --git a/models/cv/classification/resnext50_32x4d/ixrt/config/RESNEXT50_CONFIG b/models/cv/classification/ixrt_common/config/RESNEXT50_32X4D_CONFIG similarity index 100% rename from models/cv/classification/resnext50_32x4d/ixrt/config/RESNEXT50_CONFIG rename to models/cv/classification/ixrt_common/config/RESNEXT50_32X4D_CONFIG diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/config/SHUFFLENET_V2_X0_5_CONFIG b/models/cv/classification/ixrt_common/config/SHUFFLENETV2_X0_5_CONFIG similarity index 100% rename from models/cv/classification/shufflenetv2_x0_5/ixrt/config/SHUFFLENET_V2_X0_5_CONFIG rename to models/cv/classification/ixrt_common/config/SHUFFLENETV2_X0_5_CONFIG diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/config/SHUFFLENETV2_X1_0_CONFIG b/models/cv/classification/ixrt_common/config/SHUFFLENETV2_X1_0_CONFIG similarity index 100% rename from models/cv/classification/shufflenetv2_x1_0/ixrt/config/SHUFFLENETV2_X1_0_CONFIG rename to models/cv/classification/ixrt_common/config/SHUFFLENETV2_X1_0_CONFIG diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/config/SHUFFLENETV2_X1_5_CONFIG b/models/cv/classification/ixrt_common/config/SHUFFLENETV2_X1_5_CONFIG similarity index 100% rename from models/cv/classification/shufflenetv2_x1_5/ixrt/config/SHUFFLENETV2_X1_5_CONFIG rename to models/cv/classification/ixrt_common/config/SHUFFLENETV2_X1_5_CONFIG diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/config/SHUFFLENETV2_X2_0_CONFIG b/models/cv/classification/ixrt_common/config/SHUFFLENETV2_X2_0_CONFIG similarity index 100% rename from models/cv/classification/shufflenetv2_x2_0/ixrt/config/SHUFFLENETV2_X2_0_CONFIG rename to models/cv/classification/ixrt_common/config/SHUFFLENETV2_X2_0_CONFIG diff --git a/models/cv/classification/shufflenet_v1/ixrt/config/SHUFFLENET_V1_CONFIG b/models/cv/classification/ixrt_common/config/SHUFFLENET_V1_CONFIG similarity index 100% rename from models/cv/classification/shufflenet_v1/ixrt/config/SHUFFLENET_V1_CONFIG rename to models/cv/classification/ixrt_common/config/SHUFFLENET_V1_CONFIG diff --git a/models/cv/classification/ixrt_common/config/SHUFFLENET_V1_CONFIG copy b/models/cv/classification/ixrt_common/config/SHUFFLENET_V1_CONFIG copy new file mode 100644 index 00000000..35f0f8be --- /dev/null +++ b/models/cv/classification/ixrt_common/config/SHUFFLENET_V1_CONFIG copy @@ -0,0 +1,19 @@ +# IMGSIZE : 模型输入hw大小 +# MODEL_NAME : 生成onnx/engine的basename +# ORIGINE_MODEL : 原始onnx文件名称 +IMGSIZE=224 +MODEL_NAME=ShuffleNet_v1 +ORIGINE_MODEL=shufflenet_v1.onnx + +# QUANT CONFIG (仅PRECISION为int8时生效) + # QUANT_OBSERVER : 量化策略,可选 [hist_percentile, percentile, minmax, entropy, ema] + # QUANT_BATCHSIZE : 量化时组dataloader的batchsize, 最好和onnx中的batchsize保持一致,有些op可能推导shape错误(比如Reshape) + # QUANT_STEP : 量化步数 + # QUANT_SEED : 随机种子 保证量化结果可复现 + # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 +QUANT_OBSERVER=hist_percentile +QUANT_BATCHSIZE=1 +QUANT_STEP=32 +QUANT_SEED=42 +DISABLE_QUANT_LIST= +QUANT_EXIST_ONNX= diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/config/SQUEEZENET_V1_0_CONFIG b/models/cv/classification/ixrt_common/config/SQUEEZENET_V1_0_CONFIG similarity index 100% rename from models/cv/classification/squeezenet_v1_0/ixrt/config/SQUEEZENET_V1_0_CONFIG rename to models/cv/classification/ixrt_common/config/SQUEEZENET_V1_0_CONFIG diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/config/SQUEEZENET_V1_1_CONFIG b/models/cv/classification/ixrt_common/config/SQUEEZENET_V1_1_CONFIG similarity index 100% rename from models/cv/classification/squeezenet_v1_1/ixrt/config/SQUEEZENET_V1_1_CONFIG rename to models/cv/classification/ixrt_common/config/SQUEEZENET_V1_1_CONFIG diff --git a/models/cv/classification/vgg16/ixrt/config/VGG16_CONFIG b/models/cv/classification/ixrt_common/config/VGG16_CONFIG similarity index 100% rename from models/cv/classification/vgg16/ixrt/config/VGG16_CONFIG rename to models/cv/classification/ixrt_common/config/VGG16_CONFIG diff --git a/models/cv/classification/wide_resnet50/ixrt/config/WIDE_RESNET50_CONFIG b/models/cv/classification/ixrt_common/config/WIDE_RESNET50_CONFIG similarity index 100% rename from models/cv/classification/wide_resnet50/ixrt/config/WIDE_RESNET50_CONFIG rename to models/cv/classification/ixrt_common/config/WIDE_RESNET50_CONFIG diff --git a/models/cv/classification/wide_resnet50/ixrt/export.py b/models/cv/classification/ixrt_common/export.py similarity index 78% rename from models/cv/classification/wide_resnet50/ixrt/export.py rename to models/cv/classification/ixrt_common/export.py index 1d3c64c8..506ecac2 100644 --- a/models/cv/classification/wide_resnet50/ixrt/export.py +++ b/models/cv/classification/ixrt_common/export.py @@ -20,6 +20,11 @@ import argparse def parse_args(): parser = argparse.ArgumentParser() + parser.add_argument("--model-name", + type=str, + required=True, + help="Name of the model from torchvision.models.") + parser.add_argument("--weight", type=str, required=True, @@ -35,8 +40,13 @@ def parse_args(): def main(): args = parse_args() - - model = torchvision.models.wide_resnet50_2() + print(f"Loading model: {args.model_name}...") + try: + model = getattr(torchvision.models, args.model_name)(pretrained=False) + except TypeError: + # Fallback for models that do not accept 'pretrained' parameter + model = getattr(torchvision.models, args.model_name)() + model.load_state_dict(torch.load(args.weight)) model.eval() diff --git a/models/cv/classification/convnext_small/ixrt/inference.py b/models/cv/classification/ixrt_common/inference.py similarity index 48% rename from models/cv/classification/convnext_small/ixrt/inference.py rename to models/cv/classification/ixrt_common/inference.py index 3d3cf572..0a730c59 100644 --- a/models/cv/classification/convnext_small/ixrt/inference.py +++ b/models/cv/classification/ixrt_common/inference.py @@ -1,19 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. + import argparse import json import os @@ -27,33 +14,115 @@ from cuda import cuda, cudart import torch import tensorrt -from calibration_dataset import getdataloader +from calibration_dataset import getdataloader, getmobilenetv1dataloader from common import eval_batch, create_engine_context, get_io_bindings -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) +TRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) +from load_ixrt_plugin import load_ixrt_plugin +load_ixrt_plugin(TRT_LOGGER) - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) +class ModelRunner: + def __init__(self, model_path, logger): + self.model_path = model_path + self.logger = logger - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) + if model_path.endswith(".onnx"): + self.backend = "onnxruntime" + elif model_path.endswith(".engine"): + self.backend = "ixrt" + else: + raise Exception("No supported backend for executing ", model_path, "only support engine/onnx format") - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") + if self.is_ixrt_backend(): + self.init_ixrt() + elif self.is_ort_backend(): + self.init_onnxruntime() + else: + raise Exception("No supported backend for", self.backend) + def is_ixrt_backend(self): + return self.backend == "ixrt" + + def is_ort_backend(self): + return self.backend == "onnxruntime" + def init_ixrt(self): + self.engine, self.context = create_engine_context(self.model_path, self.logger) + self.inputs, self.outputs, self.allocations = get_io_bindings(self.engine) + + def init_onnxruntime(self): + import onnxruntime, onnx + raw_onnx = onnx.load(self.model_path) + self.ort_session = onnxruntime.InferenceSession( + raw_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + self.inputs, self.outputs, self.allocations = get_io_bindings(self.ort_session) + + def run(self): + if self.is_ixrt_backend(): + self.run_ixrt() + elif self.is_ort_backend(): + self.run_onnxruntime() + else: + raise Exception("No supported backend for", self.backend) + + def run_onnxruntime(self): + input_buffers = {} + for input in self.inputs: + input_buffers[input["name"]] = input["allocation"] + + output_names = [output["name"] for output in self.outputs] + ort_outs = self.ort_session.run(output_names, input_buffers) + + for i in range(len(output_names)): + self.outputs[i]["allocation"] = ort_outs[i] + def run_ixrt(self): + self.context.execute_v2(self.allocations) + +class ClassificationRunner(ModelRunner): + def load_input(self, batch_data): + if self.is_ixrt_backend(): + err, = cuda.cuMemcpyHtoD(self.inputs[0]["allocation"], batch_data, batch_data.nbytes) + assert(err == cuda.CUresult.CUDA_SUCCESS) + + elif self.is_ort_backend(): + self.inputs[0]["allocation"] = batch_data + else: + raise + + def fetch_output(self): + if self.is_ixrt_backend(): + output = self.outputs[0] + result = np.zeros(output["shape"],output["dtype"]) + err, = cuda.cuMemcpyDtoH(result, output["allocation"], output["nbytes"]) + assert(err == cuda.CUresult.CUDA_SUCCESS) + return [result] + + elif self.is_ort_backend(): + return [output["allocation"] for output in self.outputs] + else: + raise + +def main(config): + if "MobileNet_v1" in config.engine_file: + dataloader = getmobilenetv1dataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + else: + dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) + + logger = tensorrt.Logger(tensorrt.Logger.ERROR) + runner = ClassificationRunner(config.engine_file, logger) # Inference if config.test_mode == "FPS": + # Warm up + if config.warm_up > 0: + print("\nWarm Start.") + for i in range(config.warm_up): + runner.run() + print("Warm Done.") torch.cuda.synchronize() start_time = time.time() for i in range(config.loop_count): - context.execute_v2(allocations) + runner.run() torch.cuda.synchronize() end_time = time.time() @@ -74,37 +143,19 @@ def main(config): exit(1) elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - total_sample = 0 acc_top1, acc_top5 = 0, 0 start_time = time.time() with tqdm(total= len(dataloader)) as _tqdm: for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) + batch_data = batch_data.numpy().astype(runner.inputs[0]["dtype"]) batch_data = np.ascontiguousarray(batch_data) total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) + + runner.load_input(batch_data) + runner.run() + output = runner.fetch_output()[0] # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model if len(output.shape) == 4: @@ -117,13 +168,11 @@ def main(config): _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), acc_5='{:.4f}'.format(acc_top5/total_sample)) _tqdm.update(1) + end_time = time.time() end2end_time = end_time - start_time + print(F"E2E time : {end2end_time:.3f} seconds") - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") acc1 = acc_top1/total_sample diff --git a/models/cv/classification/ixrt_common/load_ixrt_plugin.py b/models/cv/classification/ixrt_common/load_ixrt_plugin.py new file mode 100644 index 00000000..b40f6910 --- /dev/null +++ b/models/cv/classification/ixrt_common/load_ixrt_plugin.py @@ -0,0 +1,13 @@ +from os.path import join, dirname, exists +import tensorrt as trt +import ctypes + +def load_ixrt_plugin(logger=trt.Logger(trt.Logger.WARNING), namespace="", dynamic_path=""): + if not dynamic_path: + dynamic_path = join(dirname(trt.__file__), "lib", "libixrt_plugin.so") + if not exists(dynamic_path): + raise FileNotFoundError( + f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!") + ctypes.CDLL(dynamic_path, mode=ctypes.RTLD_GLOBAL) + trt.init_libnvinfer_plugins(logger, namespace) + print(f"Loaded plugin from {dynamic_path}") \ No newline at end of file diff --git a/models/cv/classification/efficientnet_b0/ixrt/modify_batchsize.py b/models/cv/classification/ixrt_common/modify_batchsize.py similarity index 59% rename from models/cv/classification/efficientnet_b0/ixrt/modify_batchsize.py rename to models/cv/classification/ixrt_common/modify_batchsize.py index 4ffc02b0..bbe7d72c 100644 --- a/models/cv/classification/efficientnet_b0/ixrt/modify_batchsize.py +++ b/models/cv/classification/ixrt_common/modify_batchsize.py @@ -1,22 +1,6 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - import onnx import argparse - def change_input_dim(model, bsz): batch_size = bsz @@ -39,7 +23,6 @@ def change_input_dim(model, bsz): # set batch size of 1 dim1.dim_value = 1 - def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--batch_size", type=int) @@ -48,8 +31,8 @@ def parse_args(): args = parser.parse_args() return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) \ No newline at end of file +if __name__ == "__main__": + args = parse_args() + model = onnx.load(args.origin_model) + change_input_dim(model, args.batch_size) + onnx.save(model, args.output_model) diff --git a/models/cv/classification/res2net50/ixrt/quant.py b/models/cv/classification/ixrt_common/quant.py similarity index 71% rename from models/cv/classification/res2net50/ixrt/quant.py rename to models/cv/classification/ixrt_common/quant.py index 7c7860c9..2726bf02 100644 --- a/models/cv/classification/res2net50/ixrt/quant.py +++ b/models/cv/classification/ixrt_common/quant.py @@ -1,17 +1,3 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. import os import cv2 import random diff --git a/models/cv/classification/googlenet/ixrt/refine_model.py b/models/cv/classification/ixrt_common/refine_model.py similarity index 91% rename from models/cv/classification/googlenet/ixrt/refine_model.py rename to models/cv/classification/ixrt_common/refine_model.py index 6f1e6c2f..b2eef21d 100644 --- a/models/cv/classification/googlenet/ixrt/refine_model.py +++ b/models/cv/classification/ixrt_common/refine_model.py @@ -1,17 +1,3 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. import os import argparse import dataclasses @@ -249,12 +235,12 @@ class RenamePass(BasePass): def create_pipeline(example_inputs): return PassSequence( - # FuseLayerNormPass(), + FuseLayerNormPass(), FusedGeluPass(), - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), + ClearUnsedVariables(), + FormatLayerNorm(), + FormatReshape(), # FormatScalar(), # RenamePass() ) diff --git a/models/cv/classification/alexnet/ixrt/refine_utils/__init__.py b/models/cv/classification/ixrt_common/refine_utils/__init__.py similarity index 100% rename from models/cv/classification/alexnet/ixrt/refine_utils/__init__.py rename to models/cv/classification/ixrt_common/refine_utils/__init__.py diff --git a/models/cv/classification/cspdarknet53/ixrt/refine_utils/common.py b/models/cv/classification/ixrt_common/refine_utils/common.py similarity index 63% rename from models/cv/classification/cspdarknet53/ixrt/refine_utils/common.py rename to models/cv/classification/ixrt_common/refine_utils/common.py index 2af19a14..a7a2370c 100644 --- a/models/cv/classification/cspdarknet53/ixrt/refine_utils/common.py +++ b/models/cv/classification/ixrt_common/refine_utils/common.py @@ -1,17 +1,3 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. from typing import Union, Callable, List from tensorrt.deploy.api import * diff --git a/models/cv/classification/res2net50/ixrt/refine_utils/linear_pass.py b/models/cv/classification/ixrt_common/refine_utils/linear_pass.py similarity index 81% rename from models/cv/classification/res2net50/ixrt/refine_utils/linear_pass.py rename to models/cv/classification/ixrt_common/refine_utils/linear_pass.py index 29b5e4a9..d3f1b4a0 100644 --- a/models/cv/classification/res2net50/ixrt/refine_utils/linear_pass.py +++ b/models/cv/classification/ixrt_common/refine_utils/linear_pass.py @@ -1,17 +1,3 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. import dataclasses from refine_utils.common import * diff --git a/models/cv/classification/alexnet/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/ixrt_common/refine_utils/matmul_to_gemm_pass.py similarity index 62% rename from models/cv/classification/alexnet/ixrt/refine_utils/matmul_to_gemm_pass.py rename to models/cv/classification/ixrt_common/refine_utils/matmul_to_gemm_pass.py index 4ebfac4d..ca13c1ee 100644 --- a/models/cv/classification/alexnet/ixrt/refine_utils/matmul_to_gemm_pass.py +++ b/models/cv/classification/ixrt_common/refine_utils/matmul_to_gemm_pass.py @@ -1,17 +1,3 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. from refine_utils.common import * # diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/requirements.txt b/models/cv/classification/ixrt_common/requirements.txt similarity index 93% rename from models/cv/classification/squeezenet_v1_1/ixrt/requirements.txt rename to models/cv/classification/ixrt_common/requirements.txt index e4d443b8..560b910c 100644 --- a/models/cv/classification/squeezenet_v1_1/ixrt/requirements.txt +++ b/models/cv/classification/ixrt_common/requirements.txt @@ -1,7 +1,6 @@ tqdm -onnx -onnxsim tabulate -ppq pycuda +onnx +onnxsim opencv-python==4.6.0.66 \ No newline at end of file diff --git a/models/cv/classification/convnext_base/ixrt/simplify_model.py b/models/cv/classification/ixrt_common/simplify_model.py similarity index 49% rename from models/cv/classification/convnext_base/ixrt/simplify_model.py rename to models/cv/classification/ixrt_common/simplify_model.py index 9948a9fa..9efb5f82 100644 --- a/models/cv/classification/convnext_base/ixrt/simplify_model.py +++ b/models/cv/classification/ixrt_common/simplify_model.py @@ -1,17 +1,3 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. import onnx import argparse from onnxsim import simplify diff --git a/models/cv/classification/mobilenet_v2/ixrt/README.md b/models/cv/classification/mobilenet_v2/ixrt/README.md index f42b47a3..81226ca8 100644 --- a/models/cv/classification/mobilenet_v2/ixrt/README.md +++ b/models/cv/classification/mobilenet_v2/ixrt/README.md @@ -21,14 +21,20 @@ Download the [imagenet](https://www.image-net.org/download.php) to download the ### Install Dependencies ```bash -pip3 install -r requirements.txt +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export_onnx.py --origin_model /path/to/mobilenet_v2-b0353104 --output_model checkpoints/mobilenet_v2.onnx +python3 ../../ixrt_common/export.py --model-name mobilenet_v2 --weight mobilenet_v2-b0353104.pth --output checkpoints/mobilenet_v2.onnx ``` ## Model Inference @@ -37,7 +43,8 @@ python3 export_onnx.py --origin_model /path/to/mobilenet_v2-b0353104 --output_mo export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/MOBILENET_V2_CONFIG ``` ### FP16 diff --git a/models/cv/classification/mobilenet_v2/ixrt/build_engine.py b/models/cv/classification/mobilenet_v2/ixrt/build_engine.py deleted file mode 100644 index d52a9368..00000000 --- a/models/cv/classification/mobilenet_v2/ixrt/build_engine.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt -from calibration_dataset import getdataloader -import cuda.cudart as cudart - - -def assertSuccess(err): - assert(err == cudart.cudaError_t.cudaSuccess) - - -class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): - - def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): - super().__init__() - self.cache_file = cache_file - self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) - self.batch_generator = iter(self.image_batcher) - size = img_sz*img_sz*3*bsz - # __import__('pdb').set_trace() - err, self.batch_allocation = cudart.cudaMalloc(size) - assertSuccess(err) - - def __del__(self): - err,= cudart.cudaFree(self.batch_allocation) - assertSuccess(err) - - def get_batch_size(self): - return self.image_batcher.batch_size - - def get_batch(self, names): - try: - batch, _ = next(self.batch_generator) - batch = batch.numpy() - __import__('pdb').set_trace() - cudart.cudaMemcpy(self.batch_allocation, - np.ascontiguousarray(batch), - batch.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) - return [int(self.batch_allocation)] - except StopIteration: - return None - - def read_calibration_cache(self): - if os.path.exists(self.cache_file): - with open(self.cache_file, "rb") as f: - return f.read() - - def write_calibration_cache(self, cache): - with open(self.cache_file, "wb") as f: - f.write(cache) - - -def main(config): - # IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - print("precision : ", precision) - build_config.set_flag(precision) - if config.precision == "int8": - build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - args = parser.parse_args() - return args - - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/models/cv/classification/mobilenet_v2/ixrt/build_engine_by_write_qparams.py b/models/cv/classification/mobilenet_v2/ixrt/build_engine_by_write_qparams.py deleted file mode 100644 index ea29ccc6..00000000 --- a/models/cv/classification/mobilenet_v2/ixrt/build_engine_by_write_qparams.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os - -import tensorrt -import tensorrt as trt - -TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) - -EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - - -def GiB(val): - return val * 1 << 30 - - -def json_load(filename): - with open(filename) as json_file: - data = json.load(json_file) - return data - - -def setDynamicRange(network, json_file): - """Sets ranges for network layers.""" - quant_param_json = json_load(json_file) - act_quant = quant_param_json["act_quant_info"] - - for i in range(network.num_inputs): - input_tensor = network.get_input(i) - if act_quant.__contains__(input_tensor.name): - print(input_tensor.name) - value = act_quant[input_tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - input_tensor.dynamic_range = (tensor_min, tensor_max) - - for i in range(network.num_layers): - layer = network.get_layer(i) - - for output_index in range(layer.num_outputs): - tensor = layer.get_output(output_index) - - if act_quant.__contains__(tensor.name): - value = act_quant[tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - tensor.dynamic_range = (tensor_min, tensor_max) - else: - print("\033[1;32m%s\033[0m" % tensor.name) - - -def build_engine(onnx_file, json_file, engine_file): - builder = trt.Builder(TRT_LOGGER) - network = builder.create_network(EXPLICIT_BATCH) - - config = builder.create_builder_config() - - # If it is a dynamic onnx model , you need to add the following. - # profile = builder.create_optimization_profile() - # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) - # config.add_optimization_profile(profile) - - parser = trt.OnnxParser(network, TRT_LOGGER) - # config.max_workspace_size = GiB(1) - - if not os.path.exists(onnx_file): - quit("ONNX file {} not found".format(onnx_file)) - - with open(onnx_file, "rb") as model: - if not parser.parse(model.read()): - print("ERROR: Failed to parse the ONNX file.") - for error in range(parser.num_errors): - print(parser.get_error(error)) - return None - - config.set_flag(trt.BuilderFlag.INT8) - - setDynamicRange(network, json_file) - - engine = builder.build_engine(network, config) - - with open(engine_file, "wb") as f: - f.write(engine.serialize()) - - -if __name__ == "__main__": - # Add plugins if needed - # import ctypes - # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") - parser = argparse.ArgumentParser( - description="Writing qparams to onnx to convert tensorrt engine." - ) - parser.add_argument("--onnx", type=str, default=None) - parser.add_argument("--qparam_json", type=str, default=None) - parser.add_argument("--engine", type=str, default=None) - arg = parser.parse_args() - - build_engine(arg.onnx, arg.qparam_json, arg.engine) - print("\033[1;32mgenerate %s\033[0m" % arg.engine) diff --git a/models/cv/classification/mobilenet_v2/ixrt/ci/prepare.sh b/models/cv/classification/mobilenet_v2/ixrt/ci/prepare.sh index 277cd41a..0f0fcc09 100644 --- a/models/cv/classification/mobilenet_v2/ixrt/ci/prepare.sh +++ b/models/cv/classification/mobilenet_v2/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export_onnx.py --origin_model /root/data/checkpoints/mobilenet_v2.pth --output_model checkpoints/mobilenet_v2.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name mobilenet_v2 --weight mobilenet_v2-b0353104.pth --output checkpoints/mobilenet_v2.onnx \ No newline at end of file diff --git a/models/cv/classification/mobilenet_v2/ixrt/common.py b/models/cv/classification/mobilenet_v2/ixrt/common.py deleted file mode 100644 index 9500c8d1..00000000 --- a/models/cv/classification/mobilenet_v2/ixrt/common.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/mobilenet_v2/ixrt/export_onnx.py b/models/cv/classification/mobilenet_v2/ixrt/export_onnx.py deleted file mode 100644 index 4881a4c1..00000000 --- a/models/cv/classification/mobilenet_v2/ixrt/export_onnx.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision.models as models -import argparse - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - - -args = parse_args() -model = models.mobilenet_v2() -model.load_state_dict(torch.load(args.origin_model)) -model.cuda() -model.eval() -inputx = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - - -torch.onnx.export(model, - inputx, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(f"Convert onnx model in {export_onnx_file}") -exit() diff --git a/models/cv/classification/mobilenet_v2/ixrt/inference.py b/models/cv/classification/mobilenet_v2/ixrt/inference.py deleted file mode 100644 index ea3f7f6b..00000000 --- a/models/cv/classification/mobilenet_v2/ixrt/inference.py +++ /dev/null @@ -1,164 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS ACC") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/mobilenet_v2/ixrt/modify_batchsize.py b/models/cv/classification/mobilenet_v2/ixrt/modify_batchsize.py deleted file mode 100644 index f0f0e84f..00000000 --- a/models/cv/classification/mobilenet_v2/ixrt/modify_batchsize.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) diff --git a/models/cv/classification/mobilenet_v2/ixrt/quant.py b/models/cv/classification/mobilenet_v2/ixrt/quant.py deleted file mode 100644 index 4b397451..00000000 --- a/models/cv/classification/mobilenet_v2/ixrt/quant.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from ppq import * -from ppq.api import * -import os -from calibration_dataset import getdataloader -import argparse -import random -import numpy as np -import torch - - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], - default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - - -config = parse_args() - -# modify configuration below: -WORKING_DIRECTORY = 'checkpoints' # choose your working directory -TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform -MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE -INPUT_LAYOUT = 'chw' # input data layout, chw or hwc -NETWORK_INPUTSHAPE = [32, 3, 224, 224] # input shape of your network -EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. -REQUIRE_ANALYSE = False -TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 -# ------------------------------------------------------------------- -# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 -# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx -# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 -# ------------------------------------------------------------------- -graph = None -if MODEL_TYPE == NetworkFramework.ONNX: - graph = load_onnx_graph(onnx_import_file=config.model) -if MODEL_TYPE == NetworkFramework.CAFFE: - graph = load_caffe_graph( - caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), - prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) -assert graph is not None, 'Graph Loading Error, Check your input again.' - -# ------------------------------------------------------------------- -# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 -# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 -# ------------------------------------------------------------------- -QS = QuantizationSettingFactory.default_setting() - -# ------------------------------------------------------------------- -# 下面向你展示了如何使用 finetuning 过程提升量化精度 -# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 -# 开启他们的方式都是 QS.xxxx = True -# 按需使用,不要全部打开,容易起飞 -# ------------------------------------------------------------------- -if TRAINING_YOUR_NETWORK: - QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 - QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 - QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' - - -dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) -# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 -# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 -with ENABLE_CUDA_KERNEL(): - print('网络正量化中,根据你的量化配置,这将需要一段时间:') - quantized = quantize_native_model( - setting=QS, # setting 对象用来控制标准量化逻辑 - model=graph, - calib_dataloader=dataloader, - calib_steps=config.step, - input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 - inputs=None, - # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] - collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, - # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None - platform=TARGET_PLATFORM, - device=EXECUTING_DEVICE, - do_quantize=True) - - # ------------------------------------------------------------------- - # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor - # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 - # 请注意,必须在 export 之前执行此操作。 - # ------------------------------------------------------------------- - executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) - # output = executor.forward(input) - - # ------------------------------------------------------------------- - # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 - # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% - # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 - # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 - # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 - # ------------------------------------------------------------------- - print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') - reports = graphwise_error_analyse( - graph=quantized, running_device=EXECUTING_DEVICE, steps=32, - dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) - for op, snr in reports.items(): - if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') - - if REQUIRE_ANALYSE: - print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') - layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, - interested_outputs=None, - dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) - - # ------------------------------------------------------------------- - # 使用 export_ppq_graph 函数来导出量化后的模型 - # PPQ 会根据你所选择的导出平台来修改模型格式 - # ------------------------------------------------------------------- - print('网络量化结束,正在生成目标文件:') - export_ppq_graph( - graph=quantized, platform=TARGET_PLATFORM, - graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), - config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/mobilenet_v2/ixrt/requirements.txt b/models/cv/classification/mobilenet_v2/ixrt/requirements.txt deleted file mode 100644 index b68b421f..00000000 --- a/models/cv/classification/mobilenet_v2/ixrt/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -tqdm -onnxsim -opencv-python==4.6.0.66 -ppq -protobuf==3.20.0 -pycuda \ No newline at end of file diff --git a/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_fp16_accuracy.sh b/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_fp16_accuracy.sh index 09108e32..e62cc5d7 100644 --- a/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_fp16_accuracy.sh +++ b/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_fp16_accuracy.sh @@ -29,7 +29,6 @@ WARM_UP=0 LOOP_COUNT=-1 RUN_MODE=ACC PRECISION=float16 -IMGSIZE=224 # Update arguments index=0 @@ -44,18 +43,8 @@ do esac done -RUN_DIR=${RUN_DIR} -PROJ_DIR=${PROJ_DIR} -DATASETS_DIR=${DATASETS_DIR} -CHECKPOINTS_DIR=${CHECKPOINTS_DIR} - -if [ ! -d $CHECKPOINTS_DIR ]; then - mkdir -p $CHECKPOINTS_DIR -fi - - -MODEL_NAME="mobilenet_v2" -ORIGINE_MODEL="${CHECKPOINTS_DIR}/mobilenet_v2.onnx" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} echo DATASETS_DIR : ${DATASETS_DIR} @@ -63,14 +52,13 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 -# Export Onnx Model -let step++ -echo; - SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + # Simplify Model let step++ echo; @@ -78,12 +66,40 @@ echo [STEP ${step}] : Simplify Model if [ -f ${SIM_MODEL} ];then echo " "Simplify Model, ${SIM_MODEL} has been existed else - python3 ${RUN_DIR}simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} echo " "Generate ${SIM_MODEL} fi +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + # Change Batchsize let step++ echo; @@ -92,10 +108,8 @@ FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx if [ -f $FINAL_MODEL ];then echo " "Change Batchsize Skip, $FINAL_MODEL has been existed else - python3 ${RUN_DIR}modify_batchsize.py \ - --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} \ - --output_model ${FINAL_MODEL} + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} echo " "Generate ${FINAL_MODEL} fi @@ -107,9 +121,9 @@ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine if [ -f $ENGINE_FILE ];then echo " "Build Engine Skip, $ENGINE_FILE has been existed else - python3 ${RUN_DIR}build_engine.py \ + python3 ${RUN_DIR}/build_engine.py \ --precision ${PRECISION} \ - --model ${FINAL_MODEL} \ + --model ${FINAL_MODEL} \ --engine ${ENGINE_FILE} echo " "Generate Engine ${ENGINE_FILE} fi @@ -118,7 +132,7 @@ fi let step++ echo; echo [STEP ${step}] : Inference -python3 ${RUN_DIR}inference.py \ +python3 ${RUN_DIR}/inference.py \ --engine_file=${ENGINE_FILE} \ --datasets_dir=${DATASETS_DIR} \ --imgsz=${IMGSIZE} \ @@ -128,4 +142,4 @@ python3 ${RUN_DIR}inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_fp16_performance.sh b/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_fp16_performance.sh index 63a1d6bd..55daf64a 100644 --- a/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_fp16_performance.sh +++ b/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_fp16_performance.sh @@ -1,5 +1,4 @@ -#!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +#!/bin/bash# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may @@ -25,11 +24,10 @@ check_status() # Run paraments BSZ=32 TGT=-1 -WARM_UP=5 +WARM_UP=3 LOOP_COUNT=20 RUN_MODE=FPS PRECISION=float16 -IMGSIZE=224 # Update arguments index=0 @@ -44,18 +42,8 @@ do esac done -RUN_DIR=${RUN_DIR} -PROJ_DIR=${PROJ_DIR} -DATASETS_DIR=${DATASETS_DIR} -CHECKPOINTS_DIR=${CHECKPOINTS_DIR} - -if [ ! -d $CHECKPOINTS_DIR ]; then - mkdir -p $CHECKPOINTS_DIR -fi - - -MODEL_NAME="mobilenet_v2" -ORIGINE_MODEL="${CHECKPOINTS_DIR}/mobilenet_v2.onnx" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} echo DATASETS_DIR : ${DATASETS_DIR} @@ -63,14 +51,13 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 -# Export Onnx Model -let step++ -echo; - SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + # Simplify Model let step++ echo; @@ -78,12 +65,40 @@ echo [STEP ${step}] : Simplify Model if [ -f ${SIM_MODEL} ];then echo " "Simplify Model, ${SIM_MODEL} has been existed else - python3 ${RUN_DIR}simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} echo " "Generate ${SIM_MODEL} fi +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + # Change Batchsize let step++ echo; @@ -92,10 +107,8 @@ FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx if [ -f $FINAL_MODEL ];then echo " "Change Batchsize Skip, $FINAL_MODEL has been existed else - python3 ${RUN_DIR}modify_batchsize.py \ - --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} \ - --output_model ${FINAL_MODEL} + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} echo " "Generate ${FINAL_MODEL} fi @@ -107,9 +120,9 @@ ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine if [ -f $ENGINE_FILE ];then echo " "Build Engine Skip, $ENGINE_FILE has been existed else - python3 ${RUN_DIR}build_engine.py \ + python3 ${RUN_DIR}/build_engine.py \ --precision ${PRECISION} \ - --model ${FINAL_MODEL} \ + --model ${FINAL_MODEL} \ --engine ${ENGINE_FILE} echo " "Generate Engine ${ENGINE_FILE} fi @@ -118,7 +131,7 @@ fi let step++ echo; echo [STEP ${step}] : Inference -python3 ${RUN_DIR}inference.py \ +python3 ${RUN_DIR}/inference.py \ --engine_file=${ENGINE_FILE} \ --datasets_dir=${DATASETS_DIR} \ --imgsz=${IMGSIZE} \ @@ -128,4 +141,4 @@ python3 ${RUN_DIR}inference.py \ --fps_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_int8_accuracy.sh b/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_int8_accuracy.sh index f1348818..062a04cb 100644 --- a/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_int8_accuracy.sh +++ b/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_int8_accuracy.sh @@ -1,5 +1,4 @@ -#!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +#!/bin/bash# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may @@ -29,7 +28,6 @@ WARM_UP=0 LOOP_COUNT=-1 RUN_MODE=ACC PRECISION=int8 -IMGSIZE=224 # Update arguments index=0 @@ -44,18 +42,8 @@ do esac done -RUN_DIR=${RUN_DIR} -PROJ_DIR=${PROJ_DIR} -DATASETS_DIR=${DATASETS_DIR} -CHECKPOINTS_DIR=${CHECKPOINTS_DIR} - -if [ ! -d $CHECKPOINTS_DIR ]; then - mkdir -p $CHECKPOINTS_DIR -fi - - -MODEL_NAME="mobilenet_v2" -ORIGINE_MODEL="${CHECKPOINTS_DIR}/mobilenet_v2.onnx" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} echo DATASETS_DIR : ${DATASETS_DIR} @@ -63,14 +51,13 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 -# Export Onnx Model -let step++ -echo; - SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + # Simplify Model let step++ echo; @@ -78,55 +65,64 @@ echo [STEP ${step}] : Simplify Model if [ -f ${SIM_MODEL} ];then echo " "Simplify Model, ${SIM_MODEL} has been existed else - python3 ${RUN_DIR}simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} echo " "Generate ${SIM_MODEL} fi +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + # Change Batchsize let step++ echo; echo [STEP ${step}] : Change Batchsize -FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx if [ -f $FINAL_MODEL ];then echo " "Change Batchsize Skip, $FINAL_MODEL has been existed else - python3 ${RUN_DIR}modify_batchsize.py \ - --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} \ - --output_model ${FINAL_MODEL} + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} echo " "Generate ${FINAL_MODEL} fi -# Quantize Model -let step++ -echo; -echo [STEP ${step}] : Quantize Model By PPQ -QUANTIZED_MODEL=${CHECKPOINTS_DIR}/quantized_${MODEL_NAME}.onnx -QUANTIZED_Q_PARAMS_JSON=${CHECKPOINTS_DIR}/quant_cfg.json -if [ -f $QUANTIZED_MODEL ];then - echo " "Quantized Model Skip By PPQ, $QUANTIZED_MODEL has been existed -else - python3 ${RUN_DIR}quant.py \ - --model_name ${MODEL_NAME} \ - --model ${FINAL_MODEL} \ - --dataset_dir ${DATASETS_DIR} \ - --save_dir ${CHECKPOINTS_DIR} - echo " "Generate ${QUANTIZED_MODEL} -fi - # Build Engine let step++ echo; -echo [STEP ${step}] : Build Engine By writing the ppq params onnx +echo [STEP ${step}] : Build Engine ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine if [ -f $ENGINE_FILE ];then echo " "Build Engine Skip, $ENGINE_FILE has been existed else - python3 ${RUN_DIR}build_engine_by_write_qparams.py \ - --onnx ${QUANTIZED_MODEL} \ - --qparam_json ${QUANTIZED_Q_PARAMS_JSON} \ + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ --engine ${ENGINE_FILE} echo " "Generate Engine ${ENGINE_FILE} fi @@ -135,7 +131,7 @@ fi let step++ echo; echo [STEP ${step}] : Inference -python3 ${RUN_DIR}inference.py \ +python3 ${RUN_DIR}/inference.py \ --engine_file=${ENGINE_FILE} \ --datasets_dir=${DATASETS_DIR} \ --imgsz=${IMGSIZE} \ @@ -145,4 +141,4 @@ python3 ${RUN_DIR}inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_int8_performance.sh b/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_int8_performance.sh index efd5b930..c526b81a 100644 --- a/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_int8_performance.sh +++ b/models/cv/classification/mobilenet_v2/ixrt/scripts/infer_mobilenet_v2_int8_performance.sh @@ -25,11 +25,10 @@ check_status() # Run paraments BSZ=32 TGT=-1 -WARM_UP=5 +WARM_UP=3 LOOP_COUNT=20 RUN_MODE=FPS PRECISION=int8 -IMGSIZE=224 # Update arguments index=0 @@ -44,18 +43,8 @@ do esac done -RUN_DIR=${RUN_DIR} -PROJ_DIR=${PROJ_DIR} -DATASETS_DIR=${DATASETS_DIR} -CHECKPOINTS_DIR=${CHECKPOINTS_DIR} - -if [ ! -d $CHECKPOINTS_DIR ]; then - mkdir -p $CHECKPOINTS_DIR -fi - - -MODEL_NAME="mobilenet_v2" -ORIGINE_MODEL="${CHECKPOINTS_DIR}/mobilenet_v2.onnx" +source ${CONFIG_DIR} +ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} echo DATASETS_DIR : ${DATASETS_DIR} @@ -63,14 +52,13 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 -# Export Onnx Model -let step++ -echo; - SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx + # Simplify Model let step++ echo; @@ -78,55 +66,64 @@ echo [STEP ${step}] : Simplify Model if [ -f ${SIM_MODEL} ];then echo " "Simplify Model, ${SIM_MODEL} has been existed else - python3 ${RUN_DIR}simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} echo " "Generate ${SIM_MODEL} fi +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + # Change Batchsize let step++ echo; echo [STEP ${step}] : Change Batchsize -FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx if [ -f $FINAL_MODEL ];then echo " "Change Batchsize Skip, $FINAL_MODEL has been existed else - python3 ${RUN_DIR}modify_batchsize.py \ - --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} \ - --output_model ${FINAL_MODEL} + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} echo " "Generate ${FINAL_MODEL} fi -# Quantize Model -let step++ -echo; -echo [STEP ${step}] : Quantize Model By PPQ -QUANTIZED_MODEL=${CHECKPOINTS_DIR}/quantized_${MODEL_NAME}.onnx -QUANTIZED_Q_PARAMS_JSON=${CHECKPOINTS_DIR}/quant_cfg.json -if [ -f $QUANTIZED_MODEL ];then - echo " "Quantized Model Skip By PPQ, $QUANTIZED_MODEL has been existed -else - python3 ${RUN_DIR}quant.py \ - --model_name ${MODEL_NAME} \ - --model ${FINAL_MODEL} \ - --dataset_dir ${DATASETS_DIR} \ - --save_dir ${CHECKPOINTS_DIR} - echo " "Generate ${QUANTIZED_MODEL} -fi - # Build Engine let step++ echo; -echo [STEP ${step}] : Build Engine By writing the ppq params onnx +echo [STEP ${step}] : Build Engine ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine if [ -f $ENGINE_FILE ];then echo " "Build Engine Skip, $ENGINE_FILE has been existed else - python3 ${RUN_DIR}build_engine_by_write_qparams.py \ - --onnx ${QUANTIZED_MODEL} \ - --qparam_json ${QUANTIZED_Q_PARAMS_JSON} \ + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ --engine ${ENGINE_FILE} echo " "Generate Engine ${ENGINE_FILE} fi @@ -135,14 +132,14 @@ fi let step++ echo; echo [STEP ${step}] : Inference -python3 ${RUN_DIR}inference.py \ +python3 ${RUN_DIR}/inference.py \ --engine_file=${ENGINE_FILE} \ --datasets_dir=${DATASETS_DIR} \ --imgsz=${IMGSIZE} \ --warm_up=${WARM_UP} \ --loop_count ${LOOP_COUNT} \ --test_mode ${RUN_MODE} \ - --acc_target ${TGT} \ + --fps_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/mobilenet_v2/ixrt/simplify_model.py b/models/cv/classification/mobilenet_v2/ixrt/simplify_model.py deleted file mode 100644 index 36a3b056..00000000 --- a/models/cv/classification/mobilenet_v2/ixrt/simplify_model.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) diff --git a/models/cv/classification/mobilenet_v3/ixrt/README.md b/models/cv/classification/mobilenet_v3/ixrt/README.md index fa01f303..f08c982e 100644 --- a/models/cv/classification/mobilenet_v3/ixrt/README.md +++ b/models/cv/classification/mobilenet_v3/ixrt/README.md @@ -27,14 +27,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export_onnx.py --origin_model /path/to/mobilenet_v3_small-047dcff4.pth --output_model checkpoints/mobilenet_v3.onnx +python3 ../../ixrt_common/export.py --model-name mobilenet_v3_small --weight mobilenet_v3_small-047dcff4.pth --output checkpoints/mobilenet_v3.onnx ``` ## Model Inference @@ -43,8 +43,8 @@ python3 export_onnx.py --origin_model /path/to/mobilenet_v3_small-047dcff4.pth - export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/MOBILENET_V3_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/MOBILENET_V3_CONFIG ``` ### FP16 diff --git a/models/cv/classification/mobilenet_v3/ixrt/build_engine.py b/models/cv/classification/mobilenet_v3/ixrt/build_engine.py deleted file mode 100644 index 126da5e6..00000000 --- a/models/cv/classification/mobilenet_v3/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/mobilenet_v3/ixrt/calibration_dataset.py b/models/cv/classification/mobilenet_v3/ixrt/calibration_dataset.py deleted file mode 100644 index 442a5602..00000000 --- a/models/cv/classification/mobilenet_v3/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/mobilenet_v3/ixrt/ci/prepare.sh b/models/cv/classification/mobilenet_v3/ixrt/ci/prepare.sh index 31817b28..32a8454e 100644 --- a/models/cv/classification/mobilenet_v3/ixrt/ci/prepare.sh +++ b/models/cv/classification/mobilenet_v3/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export_onnx.py --origin_model /root/data/checkpoints/mobilenet_v3.pth --output_model checkpoints/mobilenet_v3.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name mobilenet_v3_small --weight mobilenet_v3_small-047dcff4.pth --output checkpoints/mobilenet_v3.onnx \ No newline at end of file diff --git a/models/cv/classification/mobilenet_v3/ixrt/common.py b/models/cv/classification/mobilenet_v3/ixrt/common.py deleted file mode 100644 index 0458195e..00000000 --- a/models/cv/classification/mobilenet_v3/ixrt/common.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/mobilenet_v3/ixrt/export_onnx.py b/models/cv/classification/mobilenet_v3/ixrt/export_onnx.py deleted file mode 100644 index ebbd111a..00000000 --- a/models/cv/classification/mobilenet_v3/ixrt/export_onnx.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -import torchvision.models as models -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = models.mobilenet_v3_small() -model.load_state_dict(torch.load(args.origin_model)) -model.cuda() -model.eval() -input = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'], - dynamic_axes={'input' : {0 : 'batch_size'}, - 'output' : {0 : 'batch_size'}}) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() \ No newline at end of file diff --git a/models/cv/classification/mobilenet_v3/ixrt/inference.py b/models/cv/classification/mobilenet_v3/ixrt/inference.py deleted file mode 100644 index 50aafd4f..00000000 --- a/models/cv/classification/mobilenet_v3/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/mobilenet_v3/ixrt/modify_batchsize.py b/models/cv/classification/mobilenet_v3/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/mobilenet_v3/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/mobilenet_v3/ixrt/requirements.txt b/models/cv/classification/mobilenet_v3/ixrt/requirements.txt deleted file mode 100644 index a3ef1a19..00000000 --- a/models/cv/classification/mobilenet_v3/ixrt/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -pycuda -tqdm -onnx -onnxsim -tabulate \ No newline at end of file diff --git a/models/cv/classification/mobilenet_v3/ixrt/simplify_model.py b/models/cv/classification/mobilenet_v3/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/mobilenet_v3/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/repvgg/ixrt/README.md b/models/cv/classification/repvgg/ixrt/README.md index 6830c1c5..335ac98f 100644 --- a/models/cv/classification/repvgg/ixrt/README.md +++ b/models/cv/classification/repvgg/ixrt/README.md @@ -26,7 +26,8 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt +pip3 install mmcls==0.24.0 mmcv==1.5.3 ``` ### Model Conversion @@ -47,9 +48,8 @@ python3 export_onnx.py \ export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/REPVGG_CONFIG - +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/REPVGG_CONFIG ``` ### FP16 diff --git a/models/cv/classification/repvgg/ixrt/calibration_dataset.py b/models/cv/classification/repvgg/ixrt/calibration_dataset.py deleted file mode 100644 index b394c76b..00000000 --- a/models/cv/classification/repvgg/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/repvgg/ixrt/ci/prepare.sh b/models/cv/classification/repvgg/ixrt/ci/prepare.sh index 221eb5dd..55ad046b 100644 --- a/models/cv/classification/repvgg/ixrt/ci/prepare.sh +++ b/models/cv/classification/repvgg/ixrt/ci/prepare.sh @@ -25,7 +25,8 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt +pip3 install mmcls==0.24.0 mmcv==1.5.3 unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ mkdir -p checkpoints python3 export_onnx.py \ diff --git a/models/cv/classification/repvgg/ixrt/common.py b/models/cv/classification/repvgg/ixrt/common.py deleted file mode 100644 index abdc147c..00000000 --- a/models/cv/classification/repvgg/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/repvgg/ixrt/inference.py b/models/cv/classification/repvgg/ixrt/inference.py deleted file mode 100644 index 11a90c79..00000000 --- a/models/cv/classification/repvgg/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/repvgg/ixrt/modify_batchsize.py b/models/cv/classification/repvgg/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/repvgg/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/repvgg/ixrt/quant.py b/models/cv/classification/repvgg/ixrt/quant.py deleted file mode 100644 index 8006db24..00000000 --- a/models/cv/classification/repvgg/ixrt/quant.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/repvgg/ixrt/refine_model.py b/models/cv/classification/repvgg/ixrt/refine_model.py deleted file mode 100644 index 000ee4dc..00000000 --- a/models/cv/classification/repvgg/ixrt/refine_model.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/repvgg/ixrt/requirements.txt b/models/cv/classification/repvgg/ixrt/requirements.txt deleted file mode 100644 index 566974bb..00000000 --- a/models/cv/classification/repvgg/ixrt/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -tqdm -tabulate -onnx -onnxsim -opencv-python==4.6.0.66 -mmcls==0.24.0 -mmcv==1.5.3 -pycuda \ No newline at end of file diff --git a/models/cv/classification/repvgg/ixrt/simplify_model.py b/models/cv/classification/repvgg/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/repvgg/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/res2net50/ixrt/README.md b/models/cv/classification/res2net50/ixrt/README.md index 1dfee284..03a3abda 100644 --- a/models/cv/classification/res2net50/ixrt/README.md +++ b/models/cv/classification/res2net50/ixrt/README.md @@ -27,7 +27,7 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion @@ -43,8 +43,8 @@ python3 export_onnx.py --origin_model /path/to/res2net50_14w_8s-6527dddc.pth --o export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/RES2NET50_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/RES2NET50_CONFIG ``` ### FP16 diff --git a/models/cv/classification/res2net50/ixrt/build_engine.py b/models/cv/classification/res2net50/ixrt/build_engine.py deleted file mode 100644 index 126da5e6..00000000 --- a/models/cv/classification/res2net50/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/res2net50/ixrt/calibration_dataset.py b/models/cv/classification/res2net50/ixrt/calibration_dataset.py deleted file mode 100644 index 442a5602..00000000 --- a/models/cv/classification/res2net50/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/res2net50/ixrt/ci/prepare.sh b/models/cv/classification/res2net50/ixrt/ci/prepare.sh index 9725f366..c3e5f566 100644 --- a/models/cv/classification/res2net50/ixrt/ci/prepare.sh +++ b/models/cv/classification/res2net50/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints python3 export_onnx.py --origin_model /root/data/checkpoints/res2net50.pth --output_model checkpoints/res2net50.onnx \ No newline at end of file diff --git a/models/cv/classification/res2net50/ixrt/common.py b/models/cv/classification/res2net50/ixrt/common.py deleted file mode 100644 index 0458195e..00000000 --- a/models/cv/classification/res2net50/ixrt/common.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/res2net50/ixrt/inference.py b/models/cv/classification/res2net50/ixrt/inference.py deleted file mode 100644 index 50aafd4f..00000000 --- a/models/cv/classification/res2net50/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/res2net50/ixrt/modify_batchsize.py b/models/cv/classification/res2net50/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/res2net50/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/res2net50/ixrt/refine_model.py b/models/cv/classification/res2net50/ixrt/refine_model.py deleted file mode 100644 index 6f1e6c2f..00000000 --- a/models/cv/classification/res2net50/ixrt/refine_model.py +++ /dev/null @@ -1,290 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/res2net50/ixrt/refine_utils/__init__.py b/models/cv/classification/res2net50/ixrt/refine_utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/models/cv/classification/res2net50/ixrt/refine_utils/common.py b/models/cv/classification/res2net50/ixrt/refine_utils/common.py deleted file mode 100644 index 2af19a14..00000000 --- a/models/cv/classification/res2net50/ixrt/refine_utils/common.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from typing import Union, Callable, List - -from tensorrt.deploy.api import * -from tensorrt.deploy.backend.onnx.converter import default_converter -from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type -from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr -from tensorrt.deploy.ir.operator_type import OperatorType as OP -from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name -from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence -from tensorrt.deploy.ir import Graph -from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator -from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator - -def find_sequence_subgraph(graph, - pattern: Union[List[str], PatternGraph], - callback: Callable[[Graph, PatternGraph], None], - strict=True): - if isinstance(pattern, List): - pattern = build_sequence_graph(pattern) - - matcher = GraphMatcher(pattern, strict=strict) - return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/classification/res2net50/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/res2net50/ixrt/refine_utils/matmul_to_gemm_pass.py deleted file mode 100644 index 4ebfac4d..00000000 --- a/models/cv/classification/res2net50/ixrt/refine_utils/matmul_to_gemm_pass.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from refine_utils.common import * - -# -# Common pattern Matmul to Gemm -# -class FusedGemmPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True - ) - return graph - - def to_gemm(self, graph, pattern: PatternGraph): - matmul_op = pattern.nodes[0] - inputs = matmul_op.operator.inputs - outputs = matmul_op.operator.outputs - - if len(inputs)!=2 and len(outputs)!=1: - return - - for input in inputs: - if self.transform.is_leaf_variable(input): - return - - print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") - self.transform.delete_operator(matmul_op.operator) - - op = self.transform.make_operator( - op_type = "Gemm", - inputs = inputs, - outputs = outputs, - alpha = 1, - beta = 1, - transB = 1 - ) - - self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/classification/res2net50/ixrt/requirements.txt b/models/cv/classification/res2net50/ixrt/requirements.txt deleted file mode 100644 index a3ef1a19..00000000 --- a/models/cv/classification/res2net50/ixrt/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -pycuda -tqdm -onnx -onnxsim -tabulate \ No newline at end of file diff --git a/models/cv/classification/res2net50/ixrt/simplify_model.py b/models/cv/classification/res2net50/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/res2net50/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/resnet101/ixrt/README.md b/models/cv/classification/resnet101/ixrt/README.md index e44db6db..fef44e37 100644 --- a/models/cv/classification/resnet101/ixrt/README.md +++ b/models/cv/classification/resnet101/ixrt/README.md @@ -14,6 +14,8 @@ ResNet-101 is a variant of the ResNet (Residual Network) architecture, and it be ### Prepare Resources +Pretrained model: + Dataset: to download the validation dataset. ### Install Dependencies @@ -25,14 +27,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r reuirements.txt +pip3 install -r ../../ixrt_common/reuirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export_onnx.py --output_model checkpoints/resnet101.onnx +python3 ../../ixrt_common/export.py --model-name resnet101 --weight resnet101-63fe2227.pth --output checkpoints/resnet101.onnx ``` ## Model Inference @@ -41,8 +43,8 @@ python3 export_onnx.py --output_model checkpoints/resnet101.onnx export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/RESNET101_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/RESNET101_CONFIG ``` ### FP16 diff --git a/models/cv/classification/resnet101/ixrt/build_engine.py b/models/cv/classification/resnet101/ixrt/build_engine.py deleted file mode 100644 index 32f549d8..00000000 --- a/models/cv/classification/resnet101/ixrt/build_engine.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/resnet101/ixrt/calibration_dataset.py b/models/cv/classification/resnet101/ixrt/calibration_dataset.py deleted file mode 100644 index d7525d51..00000000 --- a/models/cv/classification/resnet101/ixrt/calibration_dataset.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/resnet101/ixrt/ci/prepare.sh b/models/cv/classification/resnet101/ixrt/ci/prepare.sh index 701c19ee..846e94d3 100644 --- a/models/cv/classification/resnet101/ixrt/ci/prepare.sh +++ b/models/cv/classification/resnet101/ixrt/ci/prepare.sh @@ -25,8 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -mkdir -p /root/.cache/torch/hub/checkpoints/ -ln -s /root/data/checkpoints/resnet101-63fe2227.pth /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth -python3 export_onnx.py --output_model checkpoints/resnet101.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name resnet101 --weight resnet101-63fe2227.pth --output checkpoints/resnet101.onnx \ No newline at end of file diff --git a/models/cv/classification/resnet101/ixrt/common.py b/models/cv/classification/resnet101/ixrt/common.py deleted file mode 100644 index abdc147c..00000000 --- a/models/cv/classification/resnet101/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/resnet101/ixrt/export_onnx.py b/models/cv/classification/resnet101/ixrt/export_onnx.py deleted file mode 100644 index 47d215a5..00000000 --- a/models/cv/classification/resnet101/ixrt/export_onnx.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -import torchvision.models as models -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = models.resnet101(pretrained=True) -model.cuda() -model.eval() -input = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() \ No newline at end of file diff --git a/models/cv/classification/resnet101/ixrt/inference.py b/models/cv/classification/resnet101/ixrt/inference.py deleted file mode 100644 index 11a90c79..00000000 --- a/models/cv/classification/resnet101/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/resnet101/ixrt/modify_batchsize.py b/models/cv/classification/resnet101/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/resnet101/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/resnet101/ixrt/quant.py b/models/cv/classification/resnet101/ixrt/quant.py deleted file mode 100644 index 8006db24..00000000 --- a/models/cv/classification/resnet101/ixrt/quant.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/resnet101/ixrt/refine_model.py b/models/cv/classification/resnet101/ixrt/refine_model.py deleted file mode 100644 index 000ee4dc..00000000 --- a/models/cv/classification/resnet101/ixrt/refine_model.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/resnet101/ixrt/refine_utils/__init__.py b/models/cv/classification/resnet101/ixrt/refine_utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/models/cv/classification/resnet101/ixrt/refine_utils/common.py b/models/cv/classification/resnet101/ixrt/refine_utils/common.py deleted file mode 100644 index b19dccfc..00000000 --- a/models/cv/classification/resnet101/ixrt/refine_utils/common.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from typing import Union, Callable, List - -from tensorrt.deploy.api import * -from tensorrt.deploy.backend.onnx.converter import default_converter -from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type -from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr -from tensorrt.deploy.ir.operator_type import OperatorType as OP -from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name -from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence -from tensorrt.deploy.ir import Graph -from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator -from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator - -def find_sequence_subgraph(graph, - pattern: Union[List[str], PatternGraph], - callback: Callable[[Graph, PatternGraph], None], - strict=True): - if isinstance(pattern, List): - pattern = build_sequence_graph(pattern) - - matcher = GraphMatcher(pattern, strict=strict) - return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/classification/resnet101/ixrt/refine_utils/linear_pass.py b/models/cv/classification/resnet101/ixrt/refine_utils/linear_pass.py deleted file mode 100644 index bab7e575..00000000 --- a/models/cv/classification/resnet101/ixrt/refine_utils/linear_pass.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import dataclasses - -from refine_utils.common import * - -# AXB=C, Only for B is initializer - -class FusedLinearPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True - ) - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True - ) - return graph - - def to_linear_with_bias(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - add = pattern.nodes[1] - if len(add.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - bias_var = None - for input in add.operator.inputs: - if input not in matmul.operator.outputs: - bias_var = input - - inputs = matmul.operator.inputs - inputs.append(bias_var) - outputs = add.operator.outputs - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 1, - "act_type":"none" - } - - self.transform.make_operator( - "LinearFP16", - inputs=inputs, - outputs=outputs, - **attributes - ) - - self.transform.delete_operator(add.operator) - self.transform.delete_operator(matmul.operator) - - def to_linear(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - if len(matmul.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 0, - "act_type": "none" - } - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - op = self.transform.make_operator( - op_type = "LinearFP16", - inputs = pattern.nodes[0].operator.inputs, - outputs=[pattern.nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(op) - - self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/classification/resnet101/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/resnet101/ixrt/refine_utils/matmul_to_gemm_pass.py deleted file mode 100644 index 5823c4a5..00000000 --- a/models/cv/classification/resnet101/ixrt/refine_utils/matmul_to_gemm_pass.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from refine_utils.common import * - -# -# Common pattern Matmul to Gemm -# -class FusedGemmPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True - ) - return graph - - def to_gemm(self, graph, pattern: PatternGraph): - matmul_op = pattern.nodes[0] - inputs = matmul_op.operator.inputs - outputs = matmul_op.operator.outputs - - if len(inputs)!=2 and len(outputs)!=1: - return - - for input in inputs: - if self.transform.is_leaf_variable(input): - return - - print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") - self.transform.delete_operator(matmul_op.operator) - - op = self.transform.make_operator( - op_type = "Gemm", - inputs = inputs, - outputs = outputs, - alpha = 1, - beta = 1, - transB = 1 - ) - - self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/classification/resnet101/ixrt/requirements.txt b/models/cv/classification/resnet101/ixrt/requirements.txt deleted file mode 100644 index bc645b22..00000000 --- a/models/cv/classification/resnet101/ixrt/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -pycuda \ No newline at end of file diff --git a/models/cv/classification/resnet101/ixrt/simplify_model.py b/models/cv/classification/resnet101/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/resnet101/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/resnet18/ixrt/README.md b/models/cv/classification/resnet18/ixrt/README.md index 406182b4..9f534d6d 100644 --- a/models/cv/classification/resnet18/ixrt/README.md +++ b/models/cv/classification/resnet18/ixrt/README.md @@ -27,14 +27,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export_onnx.py --origin_model /path/to/resnet18-f37072fd.pth --output_model checkpoints/resnet18.onnx +python3 ../../ixrt_common/export.py --model-name resnet18 --weight resnet18-f37072fd.pth --output checkpoints/resnet18.onnx ``` ## Model Inference @@ -43,8 +43,8 @@ python3 export_onnx.py --origin_model /path/to/resnet18-f37072fd.pth --output_mo export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/RESNET18_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/RESNET18_CONFIG ``` ### FP16 diff --git a/models/cv/classification/resnet18/ixrt/build_engine.py b/models/cv/classification/resnet18/ixrt/build_engine.py deleted file mode 100644 index 32f549d8..00000000 --- a/models/cv/classification/resnet18/ixrt/build_engine.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/resnet18/ixrt/calibration_dataset.py b/models/cv/classification/resnet18/ixrt/calibration_dataset.py deleted file mode 100644 index d7525d51..00000000 --- a/models/cv/classification/resnet18/ixrt/calibration_dataset.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/resnet18/ixrt/ci/prepare.sh b/models/cv/classification/resnet18/ixrt/ci/prepare.sh index eadb2c8f..43470329 100644 --- a/models/cv/classification/resnet18/ixrt/ci/prepare.sh +++ b/models/cv/classification/resnet18/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export_onnx.py --origin_model /root/data/checkpoints/resnet18.pth --output_model checkpoints/resnet18.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name resnet18 --weight resnet18-f37072fd.pth --output checkpoints/resnet18.onnx \ No newline at end of file diff --git a/models/cv/classification/resnet18/ixrt/common.py b/models/cv/classification/resnet18/ixrt/common.py deleted file mode 100644 index abdc147c..00000000 --- a/models/cv/classification/resnet18/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/resnet18/ixrt/export_onnx.py b/models/cv/classification/resnet18/ixrt/export_onnx.py deleted file mode 100644 index 708a205a..00000000 --- a/models/cv/classification/resnet18/ixrt/export_onnx.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -import torchvision.models as models -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = models.resnet18() -model.load_state_dict(torch.load(args.origin_model)) -model.cuda() -model.eval() -input = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() \ No newline at end of file diff --git a/models/cv/classification/resnet18/ixrt/inference.py b/models/cv/classification/resnet18/ixrt/inference.py deleted file mode 100644 index 4e178df4..00000000 --- a/models/cv/classification/resnet18/ixrt/inference.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/resnet18/ixrt/modify_batchsize.py b/models/cv/classification/resnet18/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/resnet18/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/resnet18/ixrt/quant.py b/models/cv/classification/resnet18/ixrt/quant.py deleted file mode 100644 index 8006db24..00000000 --- a/models/cv/classification/resnet18/ixrt/quant.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/resnet18/ixrt/refine_model.py b/models/cv/classification/resnet18/ixrt/refine_model.py deleted file mode 100644 index 000ee4dc..00000000 --- a/models/cv/classification/resnet18/ixrt/refine_model.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/resnet18/ixrt/refine_utils/__init__.py b/models/cv/classification/resnet18/ixrt/refine_utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/models/cv/classification/resnet18/ixrt/refine_utils/common.py b/models/cv/classification/resnet18/ixrt/refine_utils/common.py deleted file mode 100644 index b19dccfc..00000000 --- a/models/cv/classification/resnet18/ixrt/refine_utils/common.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from typing import Union, Callable, List - -from tensorrt.deploy.api import * -from tensorrt.deploy.backend.onnx.converter import default_converter -from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type -from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr -from tensorrt.deploy.ir.operator_type import OperatorType as OP -from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name -from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence -from tensorrt.deploy.ir import Graph -from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator -from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator - -def find_sequence_subgraph(graph, - pattern: Union[List[str], PatternGraph], - callback: Callable[[Graph, PatternGraph], None], - strict=True): - if isinstance(pattern, List): - pattern = build_sequence_graph(pattern) - - matcher = GraphMatcher(pattern, strict=strict) - return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/classification/resnet18/ixrt/refine_utils/linear_pass.py b/models/cv/classification/resnet18/ixrt/refine_utils/linear_pass.py deleted file mode 100644 index bab7e575..00000000 --- a/models/cv/classification/resnet18/ixrt/refine_utils/linear_pass.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import dataclasses - -from refine_utils.common import * - -# AXB=C, Only for B is initializer - -class FusedLinearPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True - ) - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True - ) - return graph - - def to_linear_with_bias(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - add = pattern.nodes[1] - if len(add.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - bias_var = None - for input in add.operator.inputs: - if input not in matmul.operator.outputs: - bias_var = input - - inputs = matmul.operator.inputs - inputs.append(bias_var) - outputs = add.operator.outputs - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 1, - "act_type":"none" - } - - self.transform.make_operator( - "LinearFP16", - inputs=inputs, - outputs=outputs, - **attributes - ) - - self.transform.delete_operator(add.operator) - self.transform.delete_operator(matmul.operator) - - def to_linear(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - if len(matmul.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 0, - "act_type": "none" - } - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - op = self.transform.make_operator( - op_type = "LinearFP16", - inputs = pattern.nodes[0].operator.inputs, - outputs=[pattern.nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(op) - - self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/classification/resnet18/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/resnet18/ixrt/refine_utils/matmul_to_gemm_pass.py deleted file mode 100644 index 5823c4a5..00000000 --- a/models/cv/classification/resnet18/ixrt/refine_utils/matmul_to_gemm_pass.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from refine_utils.common import * - -# -# Common pattern Matmul to Gemm -# -class FusedGemmPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True - ) - return graph - - def to_gemm(self, graph, pattern: PatternGraph): - matmul_op = pattern.nodes[0] - inputs = matmul_op.operator.inputs - outputs = matmul_op.operator.outputs - - if len(inputs)!=2 and len(outputs)!=1: - return - - for input in inputs: - if self.transform.is_leaf_variable(input): - return - - print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") - self.transform.delete_operator(matmul_op.operator) - - op = self.transform.make_operator( - op_type = "Gemm", - inputs = inputs, - outputs = outputs, - alpha = 1, - beta = 1, - transB = 1 - ) - - self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/classification/resnet18/ixrt/requirements.txt b/models/cv/classification/resnet18/ixrt/requirements.txt deleted file mode 100644 index a3ef1a19..00000000 --- a/models/cv/classification/resnet18/ixrt/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -pycuda -tqdm -onnx -onnxsim -tabulate \ No newline at end of file diff --git a/models/cv/classification/resnet18/ixrt/simplify_model.py b/models/cv/classification/resnet18/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/resnet18/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/resnet34/ixrt/README.md b/models/cv/classification/resnet34/ixrt/README.md index 4aeeab38..85f8f9d1 100644 --- a/models/cv/classification/resnet34/ixrt/README.md +++ b/models/cv/classification/resnet34/ixrt/README.md @@ -14,6 +14,8 @@ Residual Networks, or ResNets, learn residual functions with reference to the la ### Prepare Resources +Pretrained model: + Dataset: to download the validation dataset. ### Install Dependencies @@ -25,14 +27,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export_onnx.py --output_model checkpoints/resnet34.onnx +python3 ../../ixrt_common/export.py --model-name resnet34 --weight resnet34-b627a593.pth --output checkpoints/resnet34.onnx ``` ## Model Inference @@ -41,8 +43,8 @@ python3 export_onnx.py --output_model checkpoints/resnet34.onnx export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/RESNET34_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/RESNET34_CONFIG ``` ### FP16 diff --git a/models/cv/classification/resnet34/ixrt/build_engine.py b/models/cv/classification/resnet34/ixrt/build_engine.py deleted file mode 100644 index 126da5e6..00000000 --- a/models/cv/classification/resnet34/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/resnet34/ixrt/calibration_dataset.py b/models/cv/classification/resnet34/ixrt/calibration_dataset.py deleted file mode 100644 index 442a5602..00000000 --- a/models/cv/classification/resnet34/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/resnet34/ixrt/ci/prepare.sh b/models/cv/classification/resnet34/ixrt/ci/prepare.sh index 7916795d..7d7763ca 100644 --- a/models/cv/classification/resnet34/ixrt/ci/prepare.sh +++ b/models/cv/classification/resnet34/ixrt/ci/prepare.sh @@ -25,8 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -mkdir -p /root/.cache/torch/hub/checkpoints/ -ln -s /root/data/checkpoints/resnet34-b627a593.pth /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth -python3 export_onnx.py --output_model checkpoints/resnet34.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name resnet34 --weight resnet34-b627a593.pth --output checkpoints/resnet34.onnx \ No newline at end of file diff --git a/models/cv/classification/resnet34/ixrt/common.py b/models/cv/classification/resnet34/ixrt/common.py deleted file mode 100644 index 0458195e..00000000 --- a/models/cv/classification/resnet34/ixrt/common.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/resnet34/ixrt/export_onnx.py b/models/cv/classification/resnet34/ixrt/export_onnx.py deleted file mode 100644 index 182532bc..00000000 --- a/models/cv/classification/resnet34/ixrt/export_onnx.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -import torchvision.models as models -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = models.resnet34(pretrained=True) -model.cuda() -model.eval() -input = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() \ No newline at end of file diff --git a/models/cv/classification/resnet34/ixrt/inference.py b/models/cv/classification/resnet34/ixrt/inference.py deleted file mode 100644 index 77a1888d..00000000 --- a/models/cv/classification/resnet34/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/resnet34/ixrt/modify_batchsize.py b/models/cv/classification/resnet34/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/resnet34/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/resnet34/ixrt/quant.py b/models/cv/classification/resnet34/ixrt/quant.py deleted file mode 100644 index 7c7860c9..00000000 --- a/models/cv/classification/resnet34/ixrt/quant.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/resnet34/ixrt/refine_model.py b/models/cv/classification/resnet34/ixrt/refine_model.py deleted file mode 100644 index 6f1e6c2f..00000000 --- a/models/cv/classification/resnet34/ixrt/refine_model.py +++ /dev/null @@ -1,290 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/resnet34/ixrt/refine_utils/__init__.py b/models/cv/classification/resnet34/ixrt/refine_utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/models/cv/classification/resnet34/ixrt/refine_utils/common.py b/models/cv/classification/resnet34/ixrt/refine_utils/common.py deleted file mode 100644 index 2af19a14..00000000 --- a/models/cv/classification/resnet34/ixrt/refine_utils/common.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from typing import Union, Callable, List - -from tensorrt.deploy.api import * -from tensorrt.deploy.backend.onnx.converter import default_converter -from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type -from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr -from tensorrt.deploy.ir.operator_type import OperatorType as OP -from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name -from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence -from tensorrt.deploy.ir import Graph -from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator -from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator - -def find_sequence_subgraph(graph, - pattern: Union[List[str], PatternGraph], - callback: Callable[[Graph, PatternGraph], None], - strict=True): - if isinstance(pattern, List): - pattern = build_sequence_graph(pattern) - - matcher = GraphMatcher(pattern, strict=strict) - return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/classification/resnet34/ixrt/refine_utils/linear_pass.py b/models/cv/classification/resnet34/ixrt/refine_utils/linear_pass.py deleted file mode 100644 index 29b5e4a9..00000000 --- a/models/cv/classification/resnet34/ixrt/refine_utils/linear_pass.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import dataclasses - -from refine_utils.common import * - -# AXB=C, Only for B is initializer - -class FusedLinearPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True - ) - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True - ) - return graph - - def to_linear_with_bias(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - add = pattern.nodes[1] - if len(add.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - bias_var = None - for input in add.operator.inputs: - if input not in matmul.operator.outputs: - bias_var = input - - inputs = matmul.operator.inputs - inputs.append(bias_var) - outputs = add.operator.outputs - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 1, - "act_type":"none" - } - - self.transform.make_operator( - "LinearFP16", - inputs=inputs, - outputs=outputs, - **attributes - ) - - self.transform.delete_operator(add.operator) - self.transform.delete_operator(matmul.operator) - - def to_linear(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - if len(matmul.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 0, - "act_type": "none" - } - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - op = self.transform.make_operator( - op_type = "LinearFP16", - inputs = pattern.nodes[0].operator.inputs, - outputs=[pattern.nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(op) - - self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/classification/resnet34/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/resnet34/ixrt/refine_utils/matmul_to_gemm_pass.py deleted file mode 100644 index 4ebfac4d..00000000 --- a/models/cv/classification/resnet34/ixrt/refine_utils/matmul_to_gemm_pass.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from refine_utils.common import * - -# -# Common pattern Matmul to Gemm -# -class FusedGemmPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True - ) - return graph - - def to_gemm(self, graph, pattern: PatternGraph): - matmul_op = pattern.nodes[0] - inputs = matmul_op.operator.inputs - outputs = matmul_op.operator.outputs - - if len(inputs)!=2 and len(outputs)!=1: - return - - for input in inputs: - if self.transform.is_leaf_variable(input): - return - - print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") - self.transform.delete_operator(matmul_op.operator) - - op = self.transform.make_operator( - op_type = "Gemm", - inputs = inputs, - outputs = outputs, - alpha = 1, - beta = 1, - transB = 1 - ) - - self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/classification/resnet34/ixrt/requirements.txt b/models/cv/classification/resnet34/ixrt/requirements.txt deleted file mode 100644 index bc645b22..00000000 --- a/models/cv/classification/resnet34/ixrt/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -pycuda \ No newline at end of file diff --git a/models/cv/classification/resnet34/ixrt/simplify_model.py b/models/cv/classification/resnet34/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/resnet34/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/resnet50/ixrt/README.md b/models/cv/classification/resnet50/ixrt/README.md index 8a72bafc..254a4bf9 100644 --- a/models/cv/classification/resnet50/ixrt/README.md +++ b/models/cv/classification/resnet50/ixrt/README.md @@ -27,23 +27,24 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export_onnx.py --origin_model /path/to/resnet50-0676ba61.pth --output_model checkpoints/resnet50.onnx +python3 ../../ixrt_common/export.py --model-name resnet50 --weight resnet50-0676ba61.pth --output checkpoints/resnet50.onnx ``` ## Model Inference ```bash +export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/RESNET50_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/RESNET50_CONFIG ``` ### FP16 diff --git a/models/cv/classification/resnet50/ixrt/build_engine.py b/models/cv/classification/resnet50/ixrt/build_engine.py deleted file mode 100644 index 01e126bc..00000000 --- a/models/cv/classification/resnet50/ixrt/build_engine.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt -from calibration_dataset import getdataloader -import cuda.cudart as cudart - -def assertSuccess(err): - assert(err == cudart.cudaError_t.cudaSuccess) - -class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): - - def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): - super().__init__() - self.cache_file = cache_file - self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) - self.batch_generator = iter(self.image_batcher) - size = img_sz*img_sz*3*bsz - __import__('pdb').set_trace() - err, self.batch_allocation = cudart.cudaMalloc(size) - assertSuccess(err) - - def __del__(self): - err,= cudart.cudaFree(self.batch_allocation) - assertSuccess(err) - - def get_batch_size(self): - return self.image_batcher.batch_size - - def get_batch(self, names): - try: - batch, _ = next(self.batch_generator) - batch = batch.numpy() - __import__('pdb').set_trace() - cudart.cudaMemcpy(self.batch_allocation, - np.ascontiguousarray(batch), - batch.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) - return [int(self.batch_allocation)] - except StopIteration: - return None - - def read_calibration_cache(self): - if os.path.exists(self.cache_file): - with open(self.cache_file, "rb") as f: - return f.read() - - def write_calibration_cache(self, cache): - with open(self.cache_file, "wb") as f: - f.write(cache) - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - print("precision : ", precision) - build_config.set_flag(precision) - if config.precision == "int8": - build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - args = parser.parse_args() - return args - -if __name__ == "__main__": - # cali = EngineCalibrator("tmp", "/home/qiang.zhang/data/imagenet_val/") - # print(cali.get_batch_size()) - # print(cali.get_batch("hello")) - args = parse_args() - main(args) diff --git a/models/cv/classification/resnet50/ixrt/build_i8_engine.py b/models/cv/classification/resnet50/ixrt/build_i8_engine.py deleted file mode 100644 index 6038b33f..00000000 --- a/models/cv/classification/resnet50/ixrt/build_i8_engine.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os - -import tensorrt -import tensorrt as trt - -TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) - -EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - - -def GiB(val): - return val * 1 << 30 - - -def json_load(filename): - with open(filename) as json_file: - data = json.load(json_file) - return data - - -def setDynamicRange(network, json_file): - """Sets ranges for network layers.""" - quant_param_json = json_load(json_file) - act_quant = quant_param_json["act_quant_info"] - - for i in range(network.num_inputs): - input_tensor = network.get_input(i) - if act_quant.__contains__(input_tensor.name): - print(input_tensor.name) - value = act_quant[input_tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - input_tensor.dynamic_range = (tensor_min, tensor_max) - - for i in range(network.num_layers): - layer = network.get_layer(i) - - for output_index in range(layer.num_outputs): - tensor = layer.get_output(output_index) - - if act_quant.__contains__(tensor.name): - value = act_quant[tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - tensor.dynamic_range = (tensor_min, tensor_max) - else: - print("\033[1;32m%s\033[0m" % tensor.name) - - -def build_engine(onnx_file, json_file, engine_file): - builder = trt.Builder(TRT_LOGGER) - network = builder.create_network(EXPLICIT_BATCH) - - config = builder.create_builder_config() - - # If it is a dynamic onnx model , you need to add the following. - # profile = builder.create_optimization_profile() - # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) - # config.add_optimization_profile(profile) - - parser = trt.OnnxParser(network, TRT_LOGGER) - # config.max_workspace_size = GiB(1) - if not os.path.exists(onnx_file): - quit("ONNX file {} not found".format(onnx_file)) - - with open(onnx_file, "rb") as model: - if not parser.parse(model.read()): - print("ERROR: Failed to parse the ONNX file.") - for error in range(parser.num_errors): - print(parser.get_error(error)) - return None - - config.set_flag(trt.BuilderFlag.INT8) - - setDynamicRange(network, json_file) - - engine = builder.build_engine(network, config) - - with open(engine_file, "wb") as f: - f.write(engine.serialize()) - - -if __name__ == "__main__": - # Add plugins if needed - # import ctypes - # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") - parser = argparse.ArgumentParser( - description="Writing qparams to onnx to convert tensorrt engine." - ) - parser.add_argument("--onnx", type=str, default=None) - parser.add_argument("--qparam_json", type=str, default=None) - parser.add_argument("--engine", type=str, default=None) - arg = parser.parse_args() - - build_engine(arg.onnx, arg.qparam_json, arg.engine) - print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/resnet50/ixrt/calibration_dataset.py b/models/cv/classification/resnet50/ixrt/calibration_dataset.py deleted file mode 100644 index ec931c65..00000000 --- a/models/cv/classification/resnet50/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=True, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader diff --git a/models/cv/classification/resnet50/ixrt/ci/prepare.sh b/models/cv/classification/resnet50/ixrt/ci/prepare.sh index 7bdae79a..0a43d88a 100644 --- a/models/cv/classification/resnet50/ixrt/ci/prepare.sh +++ b/models/cv/classification/resnet50/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export_onnx.py --origin_model /root/data/checkpoints/resnet50.pth --output_model checkpoints/resnet50.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name resnet50 --weight resnet50-0676ba61.pth --output checkpoints/resnet50.onnx \ No newline at end of file diff --git a/models/cv/classification/resnet50/ixrt/common.py b/models/cv/classification/resnet50/ixrt/common.py deleted file mode 100644 index 0458195e..00000000 --- a/models/cv/classification/resnet50/ixrt/common.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/resnet50/ixrt/export_onnx.py b/models/cv/classification/resnet50/ixrt/export_onnx.py deleted file mode 100644 index 88e85c33..00000000 --- a/models/cv/classification/resnet50/ixrt/export_onnx.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -import torchvision.models as models -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = models.resnet50() -model.load_state_dict(torch.load(args.origin_model)) -model.cuda() -model.eval() -input = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() diff --git a/models/cv/classification/resnet50/ixrt/inference.py b/models/cv/classification/resnet50/ixrt/inference.py deleted file mode 100644 index 50aafd4f..00000000 --- a/models/cv/classification/resnet50/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/resnet50/ixrt/modify_batchsize.py b/models/cv/classification/resnet50/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/resnet50/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/resnet50/ixrt/quant.py b/models/cv/classification/resnet50/ixrt/quant.py deleted file mode 100644 index 5d71c828..00000000 --- a/models/cv/classification/resnet50/ixrt/quant.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: - -在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 - -This file will show you how to quantize your network with PPQ - You should prepare your model and calibration dataset as follow: - - ~/working/model.onnx <-- your model - ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset - -if you are using caffe model: - ~/working/model.caffemdoel <-- your model - ~/working/model.prototext <-- your model - -### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### - -quantized model will be generated at: ~/working/quantized.onnx -""" -from ppq import * -from ppq.api import * -import os -from calibration_dataset import getdataloader -import argparse -import random -import numpy as np -import torch - - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], - default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - - -config = parse_args() - -# modify configuration below: -WORKING_DIRECTORY = 'checkpoints' # choose your working directory -TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform -MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE -INPUT_LAYOUT = 'chw' # input data layout, chw or hwc -NETWORK_INPUTSHAPE = [1, 3, 224, 224] # input shape of your network -EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. -REQUIRE_ANALYSE = False -TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 -# ------------------------------------------------------------------- -# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 -# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx -# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 -# ------------------------------------------------------------------- -graph = None -if MODEL_TYPE == NetworkFramework.ONNX: - graph = load_onnx_graph(onnx_import_file=config.model) -if MODEL_TYPE == NetworkFramework.CAFFE: - graph = load_caffe_graph( - caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), - prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) -assert graph is not None, 'Graph Loading Error, Check your input again.' - -# ------------------------------------------------------------------- -# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 -# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 -# ------------------------------------------------------------------- -QS = QuantizationSettingFactory.default_setting() - -# ------------------------------------------------------------------- -# 下面向你展示了如何使用 finetuning 过程提升量化精度 -# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 -# 开启他们的方式都是 QS.xxxx = True -# 按需使用,不要全部打开,容易起飞 -# ------------------------------------------------------------------- -if TRAINING_YOUR_NETWORK: - QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 - QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 - QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' - - -dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) -# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 -# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 -with ENABLE_CUDA_KERNEL(): - print('网络正量化中,根据你的量化配置,这将需要一段时间:') - quantized = quantize_native_model( - setting=QS, # setting 对象用来控制标准量化逻辑 - model=graph, - calib_dataloader=dataloader, - calib_steps=config.step, - input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 - inputs=None, - # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] - collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, - # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None - platform=TARGET_PLATFORM, - device=EXECUTING_DEVICE, - do_quantize=True) - - # ------------------------------------------------------------------- - # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor - # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 - # 请注意,必须在 export 之前执行此操作。 - # ------------------------------------------------------------------- - executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) - # output = executor.forward(input) - - # ------------------------------------------------------------------- - # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 - # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% - # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 - # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 - # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 - # ------------------------------------------------------------------- - print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') - reports = graphwise_error_analyse( - graph=quantized, running_device=EXECUTING_DEVICE, steps=32, - dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) - for op, snr in reports.items(): - if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') - - if REQUIRE_ANALYSE: - print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') - layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, - interested_outputs=None, - dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) - - # ------------------------------------------------------------------- - # 使用 export_ppq_graph 函数来导出量化后的模型 - # PPQ 会根据你所选择的导出平台来修改模型格式 - # ------------------------------------------------------------------- - print('网络量化结束,正在生成目标文件:') - export_ppq_graph( - graph=quantized, platform=TARGET_PLATFORM, - graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), - config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/resnet50/ixrt/refine_model.py b/models/cv/classification/resnet50/ixrt/refine_model.py deleted file mode 100644 index 6f1e6c2f..00000000 --- a/models/cv/classification/resnet50/ixrt/refine_model.py +++ /dev/null @@ -1,290 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/resnet50/ixrt/refine_utils/__init__.py b/models/cv/classification/resnet50/ixrt/refine_utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/models/cv/classification/resnet50/ixrt/refine_utils/common.py b/models/cv/classification/resnet50/ixrt/refine_utils/common.py deleted file mode 100644 index 2af19a14..00000000 --- a/models/cv/classification/resnet50/ixrt/refine_utils/common.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from typing import Union, Callable, List - -from tensorrt.deploy.api import * -from tensorrt.deploy.backend.onnx.converter import default_converter -from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type -from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr -from tensorrt.deploy.ir.operator_type import OperatorType as OP -from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name -from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence -from tensorrt.deploy.ir import Graph -from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator -from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator - -def find_sequence_subgraph(graph, - pattern: Union[List[str], PatternGraph], - callback: Callable[[Graph, PatternGraph], None], - strict=True): - if isinstance(pattern, List): - pattern = build_sequence_graph(pattern) - - matcher = GraphMatcher(pattern, strict=strict) - return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/classification/resnet50/ixrt/refine_utils/linear_pass.py b/models/cv/classification/resnet50/ixrt/refine_utils/linear_pass.py deleted file mode 100644 index 29b5e4a9..00000000 --- a/models/cv/classification/resnet50/ixrt/refine_utils/linear_pass.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import dataclasses - -from refine_utils.common import * - -# AXB=C, Only for B is initializer - -class FusedLinearPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True - ) - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True - ) - return graph - - def to_linear_with_bias(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - add = pattern.nodes[1] - if len(add.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - bias_var = None - for input in add.operator.inputs: - if input not in matmul.operator.outputs: - bias_var = input - - inputs = matmul.operator.inputs - inputs.append(bias_var) - outputs = add.operator.outputs - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 1, - "act_type":"none" - } - - self.transform.make_operator( - "LinearFP16", - inputs=inputs, - outputs=outputs, - **attributes - ) - - self.transform.delete_operator(add.operator) - self.transform.delete_operator(matmul.operator) - - def to_linear(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - if len(matmul.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 0, - "act_type": "none" - } - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - op = self.transform.make_operator( - op_type = "LinearFP16", - inputs = pattern.nodes[0].operator.inputs, - outputs=[pattern.nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(op) - - self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/classification/resnet50/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/resnet50/ixrt/refine_utils/matmul_to_gemm_pass.py deleted file mode 100644 index 4ebfac4d..00000000 --- a/models/cv/classification/resnet50/ixrt/refine_utils/matmul_to_gemm_pass.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from refine_utils.common import * - -# -# Common pattern Matmul to Gemm -# -class FusedGemmPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True - ) - return graph - - def to_gemm(self, graph, pattern: PatternGraph): - matmul_op = pattern.nodes[0] - inputs = matmul_op.operator.inputs - outputs = matmul_op.operator.outputs - - if len(inputs)!=2 and len(outputs)!=1: - return - - for input in inputs: - if self.transform.is_leaf_variable(input): - return - - print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") - self.transform.delete_operator(matmul_op.operator) - - op = self.transform.make_operator( - op_type = "Gemm", - inputs = inputs, - outputs = outputs, - alpha = 1, - beta = 1, - transB = 1 - ) - - self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/classification/resnet50/ixrt/requirements.txt b/models/cv/classification/resnet50/ixrt/requirements.txt deleted file mode 100644 index 01ec4f11..00000000 --- a/models/cv/classification/resnet50/ixrt/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -ppq -pycuda \ No newline at end of file diff --git a/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_fp16_accuracy.sh b/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_fp16_accuracy.sh index b743d708..02f61a26 100644 --- a/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_fp16_accuracy.sh +++ b/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_fp16_accuracy.sh @@ -23,7 +23,7 @@ check_status() # Run paraments BSZ=32 -TGT=-1 +TGT=0.76 WARM_UP=0 LOOP_COUNT=-1 RUN_MODE=ACC @@ -69,6 +69,34 @@ else echo " "Generate ${SIM_MODEL} fi +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + # Change Batchsize let step++ echo; @@ -111,4 +139,4 @@ python3 ${RUN_DIR}/inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_fp16_performance.sh b/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_fp16_performance.sh index e7a4f1a7..61051638 100644 --- a/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_fp16_performance.sh +++ b/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_fp16_performance.sh @@ -69,6 +69,34 @@ else echo " "Generate ${SIM_MODEL} fi +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + # Change Batchsize let step++ echo; @@ -111,4 +139,4 @@ python3 ${RUN_DIR}/inference.py \ --fps_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_int8_accuracy.sh b/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_int8_accuracy.sh index 2b2db01a..b1d66597 100644 --- a/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_int8_accuracy.sh +++ b/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_int8_accuracy.sh @@ -24,12 +24,12 @@ check_status() # Run paraments BSZ=32 -TGT=-1 +TGT=0.757 WARM_UP=0 LOOP_COUNT=-1 RUN_MODE=ACC PRECISION=int8 -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + # Update arguments index=0 options=$@ @@ -44,7 +44,6 @@ do done source ${CONFIG_DIR} -echo ${QUANT_OBSERVER} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} @@ -62,15 +61,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -100,36 +100,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -142,4 +142,4 @@ python3 ${RUN_DIR}/inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_int8_performance.sh b/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_int8_performance.sh index bec51520..9e246604 100644 --- a/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_int8_performance.sh +++ b/models/cv/classification/resnet50/ixrt/scripts/infer_resnet50_int8_performance.sh @@ -28,7 +28,7 @@ WARM_UP=3 LOOP_COUNT=20 RUN_MODE=FPS PRECISION=int8 -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + # Update arguments index=0 options=$@ @@ -43,7 +43,6 @@ do done source ${CONFIG_DIR} -echo ${QUANT_OBSERVER} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} @@ -52,8 +51,6 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} -echo Model Input Name : ${MODEL_INPUT_NAME} -echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 @@ -61,15 +58,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -99,36 +97,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -138,7 +136,7 @@ python3 ${RUN_DIR}/inference.py \ --warm_up=${WARM_UP} \ --loop_count ${LOOP_COUNT} \ --test_mode ${RUN_MODE} \ - --acc_target ${TGT} \ + --fps_target ${TGT} \ --bsz ${BSZ}; check_status exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnet50/ixrt/simplify_model.py b/models/cv/classification/resnet50/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/resnet50/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/resnetv1d50/ixrt/README.md b/models/cv/classification/resnetv1d50/ixrt/README.md index 1b195efc..b81fadc6 100644 --- a/models/cv/classification/resnetv1d50/ixrt/README.md +++ b/models/cv/classification/resnetv1d50/ixrt/README.md @@ -14,6 +14,8 @@ Residual Networks, or ResNets, learn residual functions with reference to the la ### Prepare Resources +Pretrained model: + Dataset: to download the validation dataset. ### Install Dependencies @@ -25,7 +27,8 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirments.txt +pip3 install -r ../../ixrt_common/requirments.txt +pip3 install mmpretrain ``` ### Model Conversion @@ -38,10 +41,11 @@ python3 export_onnx.py --output_model checkpoints/resnet_v1_d50.onnx ## Model Inference ```bash +export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/RESNETV1D50_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/config/RESNETV1D50_CONFIG ``` ### FP16 diff --git a/models/cv/classification/resnetv1d50/ixrt/build_engine.py b/models/cv/classification/resnetv1d50/ixrt/build_engine.py deleted file mode 100644 index 4fbcc015..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/build_engine.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt -from calibration_dataset import getdataloader -import cuda.cudart as cudart - -def assertSuccess(err): - assert(err == cudart.cudaError_t.cudaSuccess) - -class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): - - def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): - super().__init__() - self.cache_file = cache_file - self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) - self.batch_generator = iter(self.image_batcher) - size = img_sz*img_sz*3*bsz - __import__('pdb').set_trace() - err, self.batch_allocation = cudart.cudaMalloc(size) - assertSuccess(err) - - def __del__(self): - err,= cudart.cudaFree(self.batch_allocation) - assertSuccess(err) - - def get_batch_size(self): - return self.image_batcher.batch_size - - def get_batch(self, names): - try: - batch, _ = next(self.batch_generator) - batch = batch.numpy() - __import__('pdb').set_trace() - cudart.cudaMemcpy(self.batch_allocation, - np.ascontiguousarray(batch), - batch.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) - return [int(self.batch_allocation)] - except StopIteration: - return None - - def read_calibration_cache(self): - if os.path.exists(self.cache_file): - with open(self.cache_file, "rb") as f: - return f.read() - - def write_calibration_cache(self, cache): - with open(self.cache_file, "wb") as f: - f.write(cache) - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - print("precision : ", precision) - build_config.set_flag(precision) - if config.precision == "int8": - build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - args = parser.parse_args() - return args - -if __name__ == "__main__": - # cali = EngineCalibrator("tmp", "/home/qiang.zhang/data/imagenet_val/") - # print(cali.get_batch_size()) - # print(cali.get_batch("hello")) - args = parse_args() - main(args) diff --git a/models/cv/classification/resnetv1d50/ixrt/build_i8_engine.py b/models/cv/classification/resnetv1d50/ixrt/build_i8_engine.py deleted file mode 100644 index 6038b33f..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/build_i8_engine.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os - -import tensorrt -import tensorrt as trt - -TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) - -EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - - -def GiB(val): - return val * 1 << 30 - - -def json_load(filename): - with open(filename) as json_file: - data = json.load(json_file) - return data - - -def setDynamicRange(network, json_file): - """Sets ranges for network layers.""" - quant_param_json = json_load(json_file) - act_quant = quant_param_json["act_quant_info"] - - for i in range(network.num_inputs): - input_tensor = network.get_input(i) - if act_quant.__contains__(input_tensor.name): - print(input_tensor.name) - value = act_quant[input_tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - input_tensor.dynamic_range = (tensor_min, tensor_max) - - for i in range(network.num_layers): - layer = network.get_layer(i) - - for output_index in range(layer.num_outputs): - tensor = layer.get_output(output_index) - - if act_quant.__contains__(tensor.name): - value = act_quant[tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - tensor.dynamic_range = (tensor_min, tensor_max) - else: - print("\033[1;32m%s\033[0m" % tensor.name) - - -def build_engine(onnx_file, json_file, engine_file): - builder = trt.Builder(TRT_LOGGER) - network = builder.create_network(EXPLICIT_BATCH) - - config = builder.create_builder_config() - - # If it is a dynamic onnx model , you need to add the following. - # profile = builder.create_optimization_profile() - # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) - # config.add_optimization_profile(profile) - - parser = trt.OnnxParser(network, TRT_LOGGER) - # config.max_workspace_size = GiB(1) - if not os.path.exists(onnx_file): - quit("ONNX file {} not found".format(onnx_file)) - - with open(onnx_file, "rb") as model: - if not parser.parse(model.read()): - print("ERROR: Failed to parse the ONNX file.") - for error in range(parser.num_errors): - print(parser.get_error(error)) - return None - - config.set_flag(trt.BuilderFlag.INT8) - - setDynamicRange(network, json_file) - - engine = builder.build_engine(network, config) - - with open(engine_file, "wb") as f: - f.write(engine.serialize()) - - -if __name__ == "__main__": - # Add plugins if needed - # import ctypes - # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") - parser = argparse.ArgumentParser( - description="Writing qparams to onnx to convert tensorrt engine." - ) - parser.add_argument("--onnx", type=str, default=None) - parser.add_argument("--qparam_json", type=str, default=None) - parser.add_argument("--engine", type=str, default=None) - arg = parser.parse_args() - - build_engine(arg.onnx, arg.qparam_json, arg.engine) - print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/ixrt/calibration_dataset.py b/models/cv/classification/resnetv1d50/ixrt/calibration_dataset.py deleted file mode 100644 index ec931c65..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=True, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader diff --git a/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh index e4f74d0f..30873aff 100644 --- a/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh +++ b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh @@ -25,7 +25,8 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt +pip install mmpretrain mkdir checkpoints mkdir -p /root/.cache/torch/hub/checkpoints/ ln -s /root/data/checkpoints/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth /root/.cache/torch/hub/checkpoints/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth diff --git a/models/cv/classification/resnetv1d50/ixrt/common.py b/models/cv/classification/resnetv1d50/ixrt/common.py deleted file mode 100644 index 0458195e..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/common.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/resnetv1d50/ixrt/inference.py b/models/cv/classification/resnetv1d50/ixrt/inference.py deleted file mode 100644 index 6d34c88c..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/inference.py +++ /dev/null @@ -1,159 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/resnetv1d50/ixrt/modify_batchsize.py b/models/cv/classification/resnetv1d50/ixrt/modify_batchsize.py deleted file mode 100644 index 5466cb07..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/resnetv1d50/ixrt/quant.py b/models/cv/classification/resnetv1d50/ixrt/quant.py deleted file mode 100644 index c728c7a1..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/quant.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: - -在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 - -This file will show you how to quantize your network with PPQ - You should prepare your model and calibration dataset as follow: - - ~/working/model.onnx <-- your model - ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset - -if you are using caffe model: - ~/working/model.caffemdoel <-- your model - ~/working/model.prototext <-- your model - -### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### - -quantized model will be generated at: ~/working/quantized.onnx -""" -from ppq import * -from ppq.api import * -import os -from calibration_dataset import getdataloader -import argparse -import random -import numpy as np -import torch - - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], - default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - - -config = parse_args() - -# modify configuration below: -WORKING_DIRECTORY = 'checkpoints' # choose your working directory -TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform -MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE -INPUT_LAYOUT = 'chw' # input data layout, chw or hwc -NETWORK_INPUTSHAPE = [32, 3, 224, 224] # input shape of your network -EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. -REQUIRE_ANALYSE = False -TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 -# ------------------------------------------------------------------- -# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 -# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx -# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 -# ------------------------------------------------------------------- -graph = None -if MODEL_TYPE == NetworkFramework.ONNX: - graph = load_onnx_graph(onnx_import_file=config.model) -if MODEL_TYPE == NetworkFramework.CAFFE: - graph = load_caffe_graph( - caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), - prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) -assert graph is not None, 'Graph Loading Error, Check your input again.' - -# ------------------------------------------------------------------- -# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 -# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 -# ------------------------------------------------------------------- -QS = QuantizationSettingFactory.default_setting() - -# ------------------------------------------------------------------- -# 下面向你展示了如何使用 finetuning 过程提升量化精度 -# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 -# 开启他们的方式都是 QS.xxxx = True -# 按需使用,不要全部打开,容易起飞 -# ------------------------------------------------------------------- -if TRAINING_YOUR_NETWORK: - QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 - QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 - QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' - - -dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) -# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 -# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 -with ENABLE_CUDA_KERNEL(): - print('网络正量化中,根据你的量化配置,这将需要一段时间:') - quantized = quantize_native_model( - setting=QS, # setting 对象用来控制标准量化逻辑 - model=graph, - calib_dataloader=dataloader, - calib_steps=config.step, - input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 - inputs=None, - # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] - collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, - # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None - platform=TARGET_PLATFORM, - device=EXECUTING_DEVICE, - do_quantize=True) - - # ------------------------------------------------------------------- - # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor - # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 - # 请注意,必须在 export 之前执行此操作。 - # ------------------------------------------------------------------- - executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) - # output = executor.forward(input) - - # ------------------------------------------------------------------- - # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 - # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% - # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 - # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 - # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 - # ------------------------------------------------------------------- - print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') - reports = graphwise_error_analyse( - graph=quantized, running_device=EXECUTING_DEVICE, steps=32, - dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) - for op, snr in reports.items(): - if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') - - if REQUIRE_ANALYSE: - print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') - layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, - interested_outputs=None, - dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) - - # ------------------------------------------------------------------- - # 使用 export_ppq_graph 函数来导出量化后的模型 - # PPQ 会根据你所选择的导出平台来修改模型格式 - # ------------------------------------------------------------------- - print('网络量化结束,正在生成目标文件:') - export_ppq_graph( - graph=quantized, platform=TARGET_PLATFORM, - graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), - config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/resnetv1d50/ixrt/refine_model.py b/models/cv/classification/resnetv1d50/ixrt/refine_model.py deleted file mode 100644 index 6f1e6c2f..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/refine_model.py +++ /dev/null @@ -1,290 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/resnetv1d50/ixrt/refine_utils/__init__.py b/models/cv/classification/resnetv1d50/ixrt/refine_utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/models/cv/classification/resnetv1d50/ixrt/refine_utils/common.py b/models/cv/classification/resnetv1d50/ixrt/refine_utils/common.py deleted file mode 100644 index 2af19a14..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/refine_utils/common.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from typing import Union, Callable, List - -from tensorrt.deploy.api import * -from tensorrt.deploy.backend.onnx.converter import default_converter -from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type -from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr -from tensorrt.deploy.ir.operator_type import OperatorType as OP -from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name -from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence -from tensorrt.deploy.ir import Graph -from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator -from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator - -def find_sequence_subgraph(graph, - pattern: Union[List[str], PatternGraph], - callback: Callable[[Graph, PatternGraph], None], - strict=True): - if isinstance(pattern, List): - pattern = build_sequence_graph(pattern) - - matcher = GraphMatcher(pattern, strict=strict) - return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/ixrt/refine_utils/linear_pass.py b/models/cv/classification/resnetv1d50/ixrt/refine_utils/linear_pass.py deleted file mode 100644 index 29b5e4a9..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/refine_utils/linear_pass.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import dataclasses - -from refine_utils.common import * - -# AXB=C, Only for B is initializer - -class FusedLinearPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True - ) - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True - ) - return graph - - def to_linear_with_bias(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - add = pattern.nodes[1] - if len(add.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - bias_var = None - for input in add.operator.inputs: - if input not in matmul.operator.outputs: - bias_var = input - - inputs = matmul.operator.inputs - inputs.append(bias_var) - outputs = add.operator.outputs - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 1, - "act_type":"none" - } - - self.transform.make_operator( - "LinearFP16", - inputs=inputs, - outputs=outputs, - **attributes - ) - - self.transform.delete_operator(add.operator) - self.transform.delete_operator(matmul.operator) - - def to_linear(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - if len(matmul.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 0, - "act_type": "none" - } - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - op = self.transform.make_operator( - op_type = "LinearFP16", - inputs = pattern.nodes[0].operator.inputs, - outputs=[pattern.nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(op) - - self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/resnetv1d50/ixrt/refine_utils/matmul_to_gemm_pass.py deleted file mode 100644 index 4ebfac4d..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/refine_utils/matmul_to_gemm_pass.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from refine_utils.common import * - -# -# Common pattern Matmul to Gemm -# -class FusedGemmPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True - ) - return graph - - def to_gemm(self, graph, pattern: PatternGraph): - matmul_op = pattern.nodes[0] - inputs = matmul_op.operator.inputs - outputs = matmul_op.operator.outputs - - if len(inputs)!=2 and len(outputs)!=1: - return - - for input in inputs: - if self.transform.is_leaf_variable(input): - return - - print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") - self.transform.delete_operator(matmul_op.operator) - - op = self.transform.make_operator( - op_type = "Gemm", - inputs = inputs, - outputs = outputs, - alpha = 1, - beta = 1, - transB = 1 - ) - - self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/ixrt/requirements.txt b/models/cv/classification/resnetv1d50/ixrt/requirements.txt deleted file mode 100644 index 7d0f090a..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -tqdm -onnx -onnxsim -tabulate -ppq -mmpretrain -mmcv-lite -pycuda -transformers==4.37.1 \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh index b743d708..48292493 100644 --- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh +++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_accuracy.sh @@ -51,6 +51,8 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 @@ -69,6 +71,34 @@ else echo " "Generate ${SIM_MODEL} fi +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + # Change Batchsize let step++ echo; @@ -111,4 +141,4 @@ python3 ${RUN_DIR}/inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh index e7a4f1a7..c843057d 100644 --- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh +++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_fp16_performance.sh @@ -51,6 +51,8 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 @@ -69,6 +71,34 @@ else echo " "Generate ${SIM_MODEL} fi +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + # Change Batchsize let step++ echo; @@ -111,4 +141,4 @@ python3 ${RUN_DIR}/inference.py \ --fps_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh index 2b2db01a..a66d6a25 100644 --- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh +++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_accuracy.sh @@ -13,7 +13,6 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -set -x EXIT_STATUS=0 check_status() { @@ -29,7 +28,7 @@ WARM_UP=0 LOOP_COUNT=-1 RUN_MODE=ACC PRECISION=int8 -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + # Update arguments index=0 options=$@ @@ -44,7 +43,6 @@ do done source ${CONFIG_DIR} -echo ${QUANT_OBSERVER} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} @@ -62,15 +60,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -100,36 +99,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -142,4 +141,4 @@ python3 ${RUN_DIR}/inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh index bec51520..e578762e 100644 --- a/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh +++ b/models/cv/classification/resnetv1d50/ixrt/scripts/infer_resnetv1d50_int8_performance.sh @@ -28,7 +28,7 @@ WARM_UP=3 LOOP_COUNT=20 RUN_MODE=FPS PRECISION=int8 -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + # Update arguments index=0 options=$@ @@ -43,7 +43,6 @@ do done source ${CONFIG_DIR} -echo ${QUANT_OBSERVER} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} @@ -61,15 +60,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -99,36 +99,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -138,7 +138,7 @@ python3 ${RUN_DIR}/inference.py \ --warm_up=${WARM_UP} \ --loop_count ${LOOP_COUNT} \ --test_mode ${RUN_MODE} \ - --acc_target ${TGT} \ + --fps_target ${TGT} \ --bsz ${BSZ}; check_status exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/ixrt/simplify_model.py b/models/cv/classification/resnetv1d50/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/resnext101_32x8d/ixrt/README.md b/models/cv/classification/resnext101_32x8d/ixrt/README.md index 520a0b7f..82bc2db8 100644 --- a/models/cv/classification/resnext101_32x8d/ixrt/README.md +++ b/models/cv/classification/resnext101_32x8d/ixrt/README.md @@ -21,19 +21,30 @@ Dataset: to download the validation dat ### Install Dependencies ```bash -pip3 install -r requirements.txt +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -python3 export.py --weight resnext101_32x8d-8ba56ff5.pth --output resnext101_32x8d.onnx +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name resnext101_32x8d --weight resnext101_32x8d-8ba56ff5.pth --output checkpoints/resnext101_32x8d.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/RESNEXT101_32X8D_CONFIG ``` ### FP16 diff --git a/models/cv/classification/resnext101_32x8d/ixrt/build_engine.py b/models/cv/classification/resnext101_32x8d/ixrt/build_engine.py deleted file mode 100644 index 109ad08c..00000000 --- a/models/cv/classification/resnext101_32x8d/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/resnext101_32x8d/ixrt/calibration_dataset.py b/models/cv/classification/resnext101_32x8d/ixrt/calibration_dataset.py deleted file mode 100644 index 46449ba9..00000000 --- a/models/cv/classification/resnext101_32x8d/ixrt/calibration_dataset.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/resnext101_32x8d/ixrt/ci/prepare.sh b/models/cv/classification/resnext101_32x8d/ixrt/ci/prepare.sh index 5fd42f16..1e6a5001 100644 --- a/models/cv/classification/resnext101_32x8d/ixrt/ci/prepare.sh +++ b/models/cv/classification/resnext101_32x8d/ixrt/ci/prepare.sh @@ -25,5 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt -python3 export.py --weight resnext101_32x8d-8ba56ff5.pth --output resnext101_32x8d.onnx \ No newline at end of file +pip3 install -r ../../ixrt_common/requirements.txt +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name resnext101_32x8d --weight resnext101_32x8d-8ba56ff5.pth --output checkpoints/resnext101_32x8d.onnx \ No newline at end of file diff --git a/models/cv/classification/resnext101_32x8d/ixrt/export.py b/models/cv/classification/resnext101_32x8d/ixrt/export.py deleted file mode 100644 index e692c84c..00000000 --- a/models/cv/classification/resnext101_32x8d/ixrt/export.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.resnext101_32x8d() - model.load_state_dict(torch.load(args.weight)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = dynamic_axes, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/resnext101_32x8d/ixrt/inference.py b/models/cv/classification/resnext101_32x8d/ixrt/inference.py deleted file mode 100644 index ef56158a..00000000 --- a/models/cv/classification/resnext101_32x8d/ixrt/inference.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/resnext101_32x8d/ixrt/modify_batchsize.py b/models/cv/classification/resnext101_32x8d/ixrt/modify_batchsize.py deleted file mode 100644 index 2e8d086b..00000000 --- a/models/cv/classification/resnext101_32x8d/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/resnext101_32x8d/ixrt/requirements.txt b/models/cv/classification/resnext101_32x8d/ixrt/requirements.txt deleted file mode 100644 index 54599ec2..00000000 --- a/models/cv/classification/resnext101_32x8d/ixrt/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -onnx -tqdm -tqdm -onnx -onnxsim -tabulate -ppq -cuda-python \ No newline at end of file diff --git a/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_accuracy.sh b/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_accuracy.sh index ce017c77..5e591a30 100644 --- a/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_accuracy.sh +++ b/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/RESNEXT101_32X8D_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_performance.sh b/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_performance.sh index 48cd1de5..01aeb89b 100644 --- a/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_performance.sh +++ b/models/cv/classification/resnext101_32x8d/ixrt/scripts/infer_resnext101_32x8d_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/RESNEXT101_32X8D_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/resnext101_32x8d/ixrt/simplify_model.py b/models/cv/classification/resnext101_32x8d/ixrt/simplify_model.py deleted file mode 100644 index bef33576..00000000 --- a/models/cv/classification/resnext101_32x8d/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/resnext101_64x4d/ixrt/README.md b/models/cv/classification/resnext101_64x4d/ixrt/README.md index 6551c222..30191dd4 100644 --- a/models/cv/classification/resnext101_64x4d/ixrt/README.md +++ b/models/cv/classification/resnext101_64x4d/ixrt/README.md @@ -21,19 +21,30 @@ Dataset: to download the validation dat ### Install Dependencies ```bash -pip3 install -r requirements.txt +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -python3 export.py --weight resnext101_64x4d-173b62eb.pth --output resnext101_64x4d.onnx +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name resnext101_64x4d --weight resnext101_64x4d-173b62eb.pth --output checkpoints/resnext101_64x4d.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/RESNEXT101_64X4D_CONFIG ``` ### FP16 diff --git a/models/cv/classification/resnext101_64x4d/ixrt/build_engine.py b/models/cv/classification/resnext101_64x4d/ixrt/build_engine.py deleted file mode 100644 index 109ad08c..00000000 --- a/models/cv/classification/resnext101_64x4d/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/ixrt/calibration_dataset.py b/models/cv/classification/resnext101_64x4d/ixrt/calibration_dataset.py deleted file mode 100644 index 46449ba9..00000000 --- a/models/cv/classification/resnext101_64x4d/ixrt/calibration_dataset.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/ixrt/ci/prepare.sh b/models/cv/classification/resnext101_64x4d/ixrt/ci/prepare.sh index 74cb6057..fbd526ab 100644 --- a/models/cv/classification/resnext101_64x4d/ixrt/ci/prepare.sh +++ b/models/cv/classification/resnext101_64x4d/ixrt/ci/prepare.sh @@ -25,5 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt -python3 export.py --weight resnext101_64x4d-173b62eb.pth --output resnext101_64x4d.onnx \ No newline at end of file +pip3 install -r ../../ixrt_common/requirements.txt +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name resnext101_64x4d --weight resnext101_64x4d-173b62eb.pth --output checkpoints/resnext101_64x4d.onnx \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/ixrt/common.py b/models/cv/classification/resnext101_64x4d/ixrt/common.py deleted file mode 100644 index 5abaf512..00000000 --- a/models/cv/classification/resnext101_64x4d/ixrt/common.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes": size, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/resnext101_64x4d/ixrt/export.py b/models/cv/classification/resnext101_64x4d/ixrt/export.py deleted file mode 100644 index 3f7b1c19..00000000 --- a/models/cv/classification/resnext101_64x4d/ixrt/export.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.resnext101_64x4d() - model.load_state_dict(torch.load(args.weight)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = dynamic_axes, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/resnext101_64x4d/ixrt/inference.py b/models/cv/classification/resnext101_64x4d/ixrt/inference.py deleted file mode 100644 index ef56158a..00000000 --- a/models/cv/classification/resnext101_64x4d/ixrt/inference.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/resnext101_64x4d/ixrt/modify_batchsize.py b/models/cv/classification/resnext101_64x4d/ixrt/modify_batchsize.py deleted file mode 100644 index 2e8d086b..00000000 --- a/models/cv/classification/resnext101_64x4d/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/resnext101_64x4d/ixrt/requirements.txt b/models/cv/classification/resnext101_64x4d/ixrt/requirements.txt deleted file mode 100644 index 54599ec2..00000000 --- a/models/cv/classification/resnext101_64x4d/ixrt/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -onnx -tqdm -tqdm -onnx -onnxsim -tabulate -ppq -cuda-python \ No newline at end of file diff --git a/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_accuracy.sh b/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_accuracy.sh index 6be73073..5e591a30 100644 --- a/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_accuracy.sh +++ b/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/RESNEXT101_64X4D_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_performance.sh b/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_performance.sh index d5a0958a..01aeb89b 100644 --- a/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_performance.sh +++ b/models/cv/classification/resnext101_64x4d/ixrt/scripts/infer_resnext101_64x4d_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/RESNEXT101_64X4D_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/resnext101_64x4d/ixrt/simplify_model.py b/models/cv/classification/resnext101_64x4d/ixrt/simplify_model.py deleted file mode 100644 index bef33576..00000000 --- a/models/cv/classification/resnext101_64x4d/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/resnext50_32x4d/ixrt/README.md b/models/cv/classification/resnext50_32x4d/ixrt/README.md index 09cafeea..964dfd5b 100644 --- a/models/cv/classification/resnext50_32x4d/ixrt/README.md +++ b/models/cv/classification/resnext50_32x4d/ixrt/README.md @@ -21,19 +21,30 @@ Dataset: to download the validation dat ### Install Dependencies ```bash -pip3 install -r requirements.txt +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -python3 export.py --weight resnext50_32x4d-7cdf4587.pth --output resnext50_32x4d.onnx +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name resnext50_32x4d --weight resnext50_32x4d-7cdf4587.pth --output checkpoints/resnext50_32x4d.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ +export CHECKPOINTS_DIR=./checkpoints +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/RESNEXT50_32X4D_CONFIG ``` ### FP16 diff --git a/models/cv/classification/resnext50_32x4d/ixrt/build_engine.py b/models/cv/classification/resnext50_32x4d/ixrt/build_engine.py deleted file mode 100644 index 038c15d5..00000000 --- a/models/cv/classification/resnext50_32x4d/ixrt/build_engine.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/resnext50_32x4d/ixrt/calibration_dataset.py b/models/cv/classification/resnext50_32x4d/ixrt/calibration_dataset.py deleted file mode 100644 index d7525d51..00000000 --- a/models/cv/classification/resnext50_32x4d/ixrt/calibration_dataset.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh b/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh index f264fc49..cd826b97 100644 --- a/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh +++ b/models/cv/classification/resnext50_32x4d/ixrt/ci/prepare.sh @@ -25,5 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt -python3 export.py --weight /root/data/checkpoints/resnext50_32x4d-7cdf4587.pth --output resnext50_32x4d.onnx \ No newline at end of file +pip install -r ../../ixrt_common/requirements.txt +mkdir checkpoints +python3 export.py ../../ixrt_common/export.py --model-name resnext50_32x4d --weight resnext50_32x4d-7cdf4587.pth --output checkpoints/resnext50_32x4d.onnx \ No newline at end of file diff --git a/models/cv/classification/resnext50_32x4d/ixrt/common.py b/models/cv/classification/resnext50_32x4d/ixrt/common.py deleted file mode 100644 index 2279dc0c..00000000 --- a/models/cv/classification/resnext50_32x4d/ixrt/common.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -from cuda import cuda, cudart - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - err, allocation = cudart.cudaMalloc(size) - assert err == cudart.cudaError_t.cudaSuccess - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - "nbytes": size, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/resnext50_32x4d/ixrt/export.py b/models/cv/classification/resnext50_32x4d/ixrt/export.py deleted file mode 100644 index 52130c66..00000000 --- a/models/cv/classification/resnext50_32x4d/ixrt/export.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.resnext50_32x4d() - model.load_state_dict(torch.load(args.weight)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = None, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/resnext50_32x4d/ixrt/inference.py b/models/cv/classification/resnext50_32x4d/ixrt/inference.py deleted file mode 100644 index e5a03525..00000000 --- a/models/cv/classification/resnext50_32x4d/ixrt/inference.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -from cuda import cuda, cudart -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - (err,) = cudart.cudaMemcpy( - inputs[0]["allocation"], - batch_data, - batch_data.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - (err,) = cudart.cudaMemcpy( - output, - outputs[0]["allocation"], - outputs[0]["nbytes"], - cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, - ) - assert err == cudart.cudaError_t.cudaSuccess - # cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - err, = cudart.cudaFree(inputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - err, = cudart.cudaFree(outputs[0]["allocation"]) - assert err == cudart.cudaError_t.cudaSuccess - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/resnext50_32x4d/ixrt/modify_batchsize.py b/models/cv/classification/resnext50_32x4d/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/resnext50_32x4d/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/resnext50_32x4d/ixrt/requirements.txt b/models/cv/classification/resnext50_32x4d/ixrt/requirements.txt deleted file mode 100644 index 54599ec2..00000000 --- a/models/cv/classification/resnext50_32x4d/ixrt/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -onnx -tqdm -tqdm -onnx -onnxsim -tabulate -ppq -cuda-python \ No newline at end of file diff --git a/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_accuracy.sh b/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_accuracy.sh index 0bd3fab4..ba64b248 100644 --- a/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_accuracy.sh +++ b/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_accuracy.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/RESNEXT50_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_performance.sh b/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_performance.sh index 511e9cee..dc2dde87 100644 --- a/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_performance.sh +++ b/models/cv/classification/resnext50_32x4d/ixrt/scripts/infer_resnext50_32x4d_fp16_performance.sh @@ -42,11 +42,6 @@ do esac done -PROJ_DIR=${PROJ_DIR:-"."} -DATASETS_DIR="${DATASETS_DIR}" -CHECKPOINTS_DIR="${PROJ_DIR}" -RUN_DIR="${PROJ_DIR}" -CONFIG_DIR="${PROJ_DIR}/config/RESNEXT50_CONFIG" source ${CONFIG_DIR} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} diff --git a/models/cv/classification/resnext50_32x4d/ixrt/simplify_model.py b/models/cv/classification/resnext50_32x4d/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/resnext50_32x4d/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/shufflenet_v1/ixrt/README.md b/models/cv/classification/shufflenet_v1/ixrt/README.md index ea98fed9..efa4c981 100644 --- a/models/cv/classification/shufflenet_v1/ixrt/README.md +++ b/models/cv/classification/shufflenet_v1/ixrt/README.md @@ -28,7 +28,8 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt +pip3 install mmcls==0.24.0 mmcv==1.5.3 ``` ### Model Conversion @@ -49,9 +50,8 @@ python3 export_onnx.py \ export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/SHUFFLENET_V1_CONFIG - +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/SHUFFLENET_V1_CONFIG ``` ### FP16 diff --git a/models/cv/classification/shufflenet_v1/ixrt/build_engine.py b/models/cv/classification/shufflenet_v1/ixrt/build_engine.py deleted file mode 100644 index 32f549d8..00000000 --- a/models/cv/classification/shufflenet_v1/ixrt/build_engine.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/shufflenet_v1/ixrt/calibration_dataset.py b/models/cv/classification/shufflenet_v1/ixrt/calibration_dataset.py deleted file mode 100644 index b394c76b..00000000 --- a/models/cv/classification/shufflenet_v1/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh b/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh index bea7f22b..4f7016ae 100644 --- a/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh +++ b/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh @@ -25,7 +25,8 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt +pip3 install mmcls==0.24.0 mmcv==1.5.3 mkdir -p checkpoints unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ python3 export_onnx.py \ diff --git a/models/cv/classification/shufflenet_v1/ixrt/common.py b/models/cv/classification/shufflenet_v1/ixrt/common.py deleted file mode 100644 index abdc147c..00000000 --- a/models/cv/classification/shufflenet_v1/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/shufflenet_v1/ixrt/inference.py b/models/cv/classification/shufflenet_v1/ixrt/inference.py deleted file mode 100644 index 4e178df4..00000000 --- a/models/cv/classification/shufflenet_v1/ixrt/inference.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/shufflenet_v1/ixrt/modify_batchsize.py b/models/cv/classification/shufflenet_v1/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/shufflenet_v1/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/shufflenet_v1/ixrt/quant.py b/models/cv/classification/shufflenet_v1/ixrt/quant.py deleted file mode 100644 index 8006db24..00000000 --- a/models/cv/classification/shufflenet_v1/ixrt/quant.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/shufflenet_v1/ixrt/refine_model.py b/models/cv/classification/shufflenet_v1/ixrt/refine_model.py deleted file mode 100644 index 000ee4dc..00000000 --- a/models/cv/classification/shufflenet_v1/ixrt/refine_model.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/shufflenet_v1/ixrt/requirements.txt b/models/cv/classification/shufflenet_v1/ixrt/requirements.txt deleted file mode 100644 index 4546592b..00000000 --- a/models/cv/classification/shufflenet_v1/ixrt/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -pycuda -tqdm -tabulate -onnx -onnxsim -opencv-python==4.6.0.66 -mmcls==0.24.0 -mmcv==1.5.3 \ No newline at end of file diff --git a/models/cv/classification/shufflenet_v1/ixrt/simplify_model.py b/models/cv/classification/shufflenet_v1/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/shufflenet_v1/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/README.md b/models/cv/classification/shufflenetv2_x0_5/ixrt/README.md index d6238d35..46cef7c1 100644 --- a/models/cv/classification/shufflenetv2_x0_5/ixrt/README.md +++ b/models/cv/classification/shufflenetv2_x0_5/ixrt/README.md @@ -10,7 +10,7 @@ convolutions, and efficient building blocks to further reduce computational comp | GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | |--------|-----------|---------| -| MR-V100 | 4.3.0 | 25.06 | +| MR-V100 | 4.2.0 | 25.06 | ## Model Preparation @@ -29,14 +29,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export.py --weight shufflenetv2_x0.5-f707e7126e.pth --output checkpoints/shufflenetv2_x0_5.onnx +python3 ../../ixrt_common/export.py --model-name shufflenet_v2_x0_5 --weight shufflenetv2_x0.5-f707e7126e.pth --output checkpoints/shufflenetv2_x0_5.onnx ``` ## Model Inference @@ -45,9 +45,8 @@ python3 export.py --weight shufflenetv2_x0.5-f707e7126e.pth --output checkpoints export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/SHUFFLENET_V2_X0_5_CONFIG - +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/SHUFFLENET_V2_X0_5_CONFIG ``` ### FP16 diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/build_engine.py b/models/cv/classification/shufflenetv2_x0_5/ixrt/build_engine.py deleted file mode 100644 index e0a3bb4b..00000000 --- a/models/cv/classification/shufflenetv2_x0_5/ixrt/build_engine.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/calibration_dataset.py b/models/cv/classification/shufflenetv2_x0_5/ixrt/calibration_dataset.py deleted file mode 100644 index 774ed840..00000000 --- a/models/cv/classification/shufflenetv2_x0_5/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/ci/prepare.sh b/models/cv/classification/shufflenetv2_x0_5/ixrt/ci/prepare.sh index 9cfc3f2c..319a1676 100644 --- a/models/cv/classification/shufflenetv2_x0_5/ixrt/ci/prepare.sh +++ b/models/cv/classification/shufflenetv2_x0_5/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints -python3 export.py --weight shufflenetv2_x0.5-f707e7126e.pth --output checkpoints/shufflenetv2_x0_5.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name shufflenet_v2_x0_5 --weight shufflenetv2_x0.5-f707e7126e.pth --output checkpoints/shufflenetv2_x0_5.onnx \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/common.py b/models/cv/classification/shufflenetv2_x0_5/ixrt/common.py deleted file mode 100644 index f7aed872..00000000 --- a/models/cv/classification/shufflenetv2_x0_5/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/export.py b/models/cv/classification/shufflenetv2_x0_5/ixrt/export.py deleted file mode 100644 index 4733d330..00000000 --- a/models/cv/classification/shufflenetv2_x0_5/ixrt/export.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.shufflenet_v2_x0_5() - model.load_state_dict(torch.load(args.weight)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = dynamic_axes, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/inference.py b/models/cv/classification/shufflenetv2_x0_5/ixrt/inference.py deleted file mode 100644 index 78126cb0..00000000 --- a/models/cv/classification/shufflenetv2_x0_5/ixrt/inference.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/modify_batchsize.py b/models/cv/classification/shufflenetv2_x0_5/ixrt/modify_batchsize.py deleted file mode 100644 index 2e8d086b..00000000 --- a/models/cv/classification/shufflenetv2_x0_5/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/quant.py b/models/cv/classification/shufflenetv2_x0_5/ixrt/quant.py deleted file mode 100644 index 6e974001..00000000 --- a/models/cv/classification/shufflenetv2_x0_5/ixrt/quant.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/refine_model.py b/models/cv/classification/shufflenetv2_x0_5/ixrt/refine_model.py deleted file mode 100644 index d7d8ca29..00000000 --- a/models/cv/classification/shufflenetv2_x0_5/ixrt/refine_model.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/requirements.txt b/models/cv/classification/shufflenetv2_x0_5/ixrt/requirements.txt deleted file mode 100644 index 4546592b..00000000 --- a/models/cv/classification/shufflenetv2_x0_5/ixrt/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -pycuda -tqdm -tabulate -onnx -onnxsim -opencv-python==4.6.0.66 -mmcls==0.24.0 -mmcv==1.5.3 \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x0_5/ixrt/simplify_model.py b/models/cv/classification/shufflenetv2_x0_5/ixrt/simplify_model.py deleted file mode 100644 index bef33576..00000000 --- a/models/cv/classification/shufflenetv2_x0_5/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/README.md b/models/cv/classification/shufflenetv2_x1_0/ixrt/README.md index e22634a7..e73a8c5e 100644 --- a/models/cv/classification/shufflenetv2_x1_0/ixrt/README.md +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/README.md @@ -27,14 +27,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export.py --weight shufflenetv2_x1-5666bf0f80.pth --output checkpoints/shufflenetv2_x1_0.onnx +python3 ../../ixrt_common/export.py --model-name shufflenet_v2_x1_0 --weight shufflenetv2_x1-5666bf0f80.pth --output checkpoints/shufflenetv2_x1_0.onnx ``` ## Model Inference @@ -43,8 +43,8 @@ python3 export.py --weight shufflenetv2_x1-5666bf0f80.pth --output checkpoints/s export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/SHUFFLENETV2_X1_0_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/SHUFFLENETV2_X1_0_CONFIG ``` ### FP16 diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/build_engine.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/build_engine.py deleted file mode 100644 index e0a3bb4b..00000000 --- a/models/cv/classification/shufflenetv2_x1_0/ixrt/build_engine.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/calibration_dataset.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/calibration_dataset.py deleted file mode 100644 index 774ed840..00000000 --- a/models/cv/classification/shufflenetv2_x1_0/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/ci/prepare.sh b/models/cv/classification/shufflenetv2_x1_0/ixrt/ci/prepare.sh index ef18142a..3e5f5484 100644 --- a/models/cv/classification/shufflenetv2_x1_0/ixrt/ci/prepare.sh +++ b/models/cv/classification/shufflenetv2_x1_0/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints -python3 export.py --weight shufflenetv2_x1-5666bf0f80.pth --output checkpoints/shufflenetv2_x1_0.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name shufflenet_v2_x1_0 --weight shufflenetv2_x1-5666bf0f80.pth --output checkpoints/shufflenetv2_x1_0.onnx \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/common.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/common.py deleted file mode 100644 index f7aed872..00000000 --- a/models/cv/classification/shufflenetv2_x1_0/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/export.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/export.py deleted file mode 100644 index 6344afd7..00000000 --- a/models/cv/classification/shufflenetv2_x1_0/ixrt/export.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.shufflenet_v2_x1_0() - model.load_state_dict(torch.load(args.weight)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = dynamic_axes, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/inference.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/inference.py deleted file mode 100644 index 78126cb0..00000000 --- a/models/cv/classification/shufflenetv2_x1_0/ixrt/inference.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/modify_batchsize.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/modify_batchsize.py deleted file mode 100644 index 2e8d086b..00000000 --- a/models/cv/classification/shufflenetv2_x1_0/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/quant.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/quant.py deleted file mode 100644 index 6e974001..00000000 --- a/models/cv/classification/shufflenetv2_x1_0/ixrt/quant.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/requirements.txt b/models/cv/classification/shufflenetv2_x1_0/ixrt/requirements.txt deleted file mode 100644 index fa15a05d..00000000 --- a/models/cv/classification/shufflenetv2_x1_0/ixrt/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -onnx -tqdm -onnxsim -tabulate -ppq -pycuda \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_0/ixrt/simplify_model.py b/models/cv/classification/shufflenetv2_x1_0/ixrt/simplify_model.py deleted file mode 100644 index bef33576..00000000 --- a/models/cv/classification/shufflenetv2_x1_0/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/README.md b/models/cv/classification/shufflenetv2_x1_5/ixrt/README.md index 8b92dab9..0def9972 100644 --- a/models/cv/classification/shufflenetv2_x1_5/ixrt/README.md +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/README.md @@ -27,14 +27,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export.py --weight shufflenetv2_x1_5-3c479a10.pth --output checkpoints/shufflenetv2_x1_5.onnx +python3 ../../ixrt_common/export.py --model-name shufflenet_v2_x1_5 --weight shufflenetv2_x1_5-3c479a10.pth --output checkpoints/shufflenetv2_x1_5.onnx ``` ## Model Inference @@ -43,8 +43,8 @@ python3 export.py --weight shufflenetv2_x1_5-3c479a10.pth --output checkpoints/s export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/SHUFFLENETV2_X1_5_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/SHUFFLENETV2_X1_5_CONFIG ``` ### FP16 diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/build_engine.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/build_engine.py deleted file mode 100644 index e0a3bb4b..00000000 --- a/models/cv/classification/shufflenetv2_x1_5/ixrt/build_engine.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/calibration_dataset.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/calibration_dataset.py deleted file mode 100644 index 774ed840..00000000 --- a/models/cv/classification/shufflenetv2_x1_5/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/ci/prepare.sh b/models/cv/classification/shufflenetv2_x1_5/ixrt/ci/prepare.sh index b526c253..3deaa190 100644 --- a/models/cv/classification/shufflenetv2_x1_5/ixrt/ci/prepare.sh +++ b/models/cv/classification/shufflenetv2_x1_5/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints -python3 export.py --weight shufflenetv2_x1_5-3c479a10.pth --output checkpoints/shufflenetv2_x1_5.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name shufflenet_v2_x1_5 --weight shufflenetv2_x1_5-3c479a10.pth --output checkpoints/shufflenetv2_x1_5.onnx \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/common.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/common.py deleted file mode 100644 index f7aed872..00000000 --- a/models/cv/classification/shufflenetv2_x1_5/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/export.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/export.py deleted file mode 100644 index f68b01c4..00000000 --- a/models/cv/classification/shufflenetv2_x1_5/ixrt/export.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.shufflenet_v2_x1_5() - model.load_state_dict(torch.load(args.weight)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = dynamic_axes, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/inference.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/inference.py deleted file mode 100644 index 78126cb0..00000000 --- a/models/cv/classification/shufflenetv2_x1_5/ixrt/inference.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/modify_batchsize.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/modify_batchsize.py deleted file mode 100644 index 2e8d086b..00000000 --- a/models/cv/classification/shufflenetv2_x1_5/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/quant.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/quant.py deleted file mode 100644 index 6e974001..00000000 --- a/models/cv/classification/shufflenetv2_x1_5/ixrt/quant.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/requirements.txt b/models/cv/classification/shufflenetv2_x1_5/ixrt/requirements.txt deleted file mode 100644 index fa15a05d..00000000 --- a/models/cv/classification/shufflenetv2_x1_5/ixrt/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -onnx -tqdm -onnxsim -tabulate -ppq -pycuda \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x1_5/ixrt/simplify_model.py b/models/cv/classification/shufflenetv2_x1_5/ixrt/simplify_model.py deleted file mode 100644 index bef33576..00000000 --- a/models/cv/classification/shufflenetv2_x1_5/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/README.md b/models/cv/classification/shufflenetv2_x2_0/ixrt/README.md index 186044d0..3bdafbf7 100644 --- a/models/cv/classification/shufflenetv2_x2_0/ixrt/README.md +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/README.md @@ -21,14 +21,14 @@ Dataset: to download the validation dat ### Install Dependencies ```bash -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export.py --weight shufflenetv2_x2_0-8be3c8ee.pth --output checkpoints/shufflenetv2_x2_0.onnx +python3 ../../ixrt_common/export.py --model-name shufflenet_v2_x2_0 --weight shufflenetv2_x2_0-8be3c8ee.pth --output checkpoints/shufflenetv2_x2_0.onnx ``` ## Model Inference @@ -37,8 +37,8 @@ python3 export.py --weight shufflenetv2_x2_0-8be3c8ee.pth --output checkpoints/s export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/SHUFFLENETV2_X2_0_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/SHUFFLENETV2_X2_0_CONFIG ``` ### FP16 diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/build_engine.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/build_engine.py deleted file mode 100644 index e0a3bb4b..00000000 --- a/models/cv/classification/shufflenetv2_x2_0/ixrt/build_engine.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/calibration_dataset.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/calibration_dataset.py deleted file mode 100644 index 774ed840..00000000 --- a/models/cv/classification/shufflenetv2_x2_0/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/ci/prepare.sh b/models/cv/classification/shufflenetv2_x2_0/ixrt/ci/prepare.sh index 4d413635..b272eef6 100644 --- a/models/cv/classification/shufflenetv2_x2_0/ixrt/ci/prepare.sh +++ b/models/cv/classification/shufflenetv2_x2_0/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints -python3 export.py --weight shufflenetv2_x2_0-8be3c8ee.pth --output checkpoints/shufflenetv2_x2_0.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name shufflenet_v2_x2_0 --weight shufflenetv2_x2_0-8be3c8ee.pth --output checkpoints/shufflenetv2_x2_0.onnx \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/common.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/common.py deleted file mode 100644 index f7aed872..00000000 --- a/models/cv/classification/shufflenetv2_x2_0/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/inference.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/inference.py deleted file mode 100644 index 78126cb0..00000000 --- a/models/cv/classification/shufflenetv2_x2_0/ixrt/inference.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/modify_batchsize.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/modify_batchsize.py deleted file mode 100644 index 2e8d086b..00000000 --- a/models/cv/classification/shufflenetv2_x2_0/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/quant.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/quant.py deleted file mode 100644 index 6e974001..00000000 --- a/models/cv/classification/shufflenetv2_x2_0/ixrt/quant.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/requirements.txt b/models/cv/classification/shufflenetv2_x2_0/ixrt/requirements.txt deleted file mode 100644 index fa15a05d..00000000 --- a/models/cv/classification/shufflenetv2_x2_0/ixrt/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -onnx -tqdm -onnxsim -tabulate -ppq -pycuda \ No newline at end of file diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/simplify_model.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/simplify_model.py deleted file mode 100644 index bef33576..00000000 --- a/models/cv/classification/shufflenetv2_x2_0/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/README.md b/models/cv/classification/squeezenet_v1_0/ixrt/README.md index 9051714c..a5f94d76 100644 --- a/models/cv/classification/squeezenet_v1_0/ixrt/README.md +++ b/models/cv/classification/squeezenet_v1_0/ixrt/README.md @@ -29,14 +29,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -mkdir checkpoints -python3 export_onnx.py --origin_model /path/to/squeezenet1_0-b66bff10.pth --output_model checkpoints/squeezenetv10.onnx +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name squeezenet1_0 --weight squeezenet1_0-b66bff10.pth --output checkpoints/squeezenetv10.onnx ``` ## Model Inference @@ -45,8 +45,8 @@ python3 export_onnx.py --origin_model /path/to/squeezenet1_0-b66bff10.pth --out export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/SQUEEZENET_V1_0_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/SQUEEZENET_V1_0_CONFIG ``` ### FP16 diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/build_engine.py b/models/cv/classification/squeezenet_v1_0/ixrt/build_engine.py deleted file mode 100644 index 32f549d8..00000000 --- a/models/cv/classification/squeezenet_v1_0/ixrt/build_engine.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/calibration_dataset.py b/models/cv/classification/squeezenet_v1_0/ixrt/calibration_dataset.py deleted file mode 100644 index b394c76b..00000000 --- a/models/cv/classification/squeezenet_v1_0/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/ci/prepare.sh b/models/cv/classification/squeezenet_v1_0/ixrt/ci/prepare.sh index 063ff0ec..a0769a81 100644 --- a/models/cv/classification/squeezenet_v1_0/ixrt/ci/prepare.sh +++ b/models/cv/classification/squeezenet_v1_0/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export_onnx.py --origin_model /root/data/checkpoints/squeezenet1_0-b66bff10.pth --output_model checkpoints/squeezenetv10.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name squeezenet1_0 --weight squeezenet1_0-b66bff10.pth --output checkpoints/squeezenetv10.onnx \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/common.py b/models/cv/classification/squeezenet_v1_0/ixrt/common.py deleted file mode 100644 index abdc147c..00000000 --- a/models/cv/classification/squeezenet_v1_0/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/export_onnx.py b/models/cv/classification/squeezenet_v1_0/ixrt/export_onnx.py deleted file mode 100644 index 0fc0e6e2..00000000 --- a/models/cv/classification/squeezenet_v1_0/ixrt/export_onnx.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -import torchvision.models as models -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = models.squeezenet1_0() -model.load_state_dict(torch.load(args.origin_model)) -model.cuda() -model.eval() -input = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/inference.py b/models/cv/classification/squeezenet_v1_0/ixrt/inference.py deleted file mode 100644 index 11a90c79..00000000 --- a/models/cv/classification/squeezenet_v1_0/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/modify_batchsize.py b/models/cv/classification/squeezenet_v1_0/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/squeezenet_v1_0/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/quant.py b/models/cv/classification/squeezenet_v1_0/ixrt/quant.py deleted file mode 100644 index 8006db24..00000000 --- a/models/cv/classification/squeezenet_v1_0/ixrt/quant.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/refine_model.py b/models/cv/classification/squeezenet_v1_0/ixrt/refine_model.py deleted file mode 100644 index 000ee4dc..00000000 --- a/models/cv/classification/squeezenet_v1_0/ixrt/refine_model.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt b/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt deleted file mode 100644 index 8ea6ea90..00000000 --- a/models/cv/classification/squeezenet_v1_0/ixrt/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -tqdm -onnxsim -opencv-python==4.6.0.66 -pycuda \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_fp16_accuracy.sh b/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_fp16_accuracy.sh deleted file mode 100644 index 30890ad6..00000000 --- a/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_fp16_accuracy.sh +++ /dev/null @@ -1,143 +0,0 @@ -#!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -EXIT_STATUS=0 -check_status() -{ - if ((${PIPESTATUS[0]} != 0));then - EXIT_STATUS=1 - fi -} - -# Run paraments -BSZ=32 -TGT=-1 -WARM_UP=0 -LOOP_COUNT=-1 -RUN_MODE=ACC -PRECISION=float16 - -# Update arguments -index=0 -options=$@ -arguments=($options) -for argument in $options -do - index=`expr $index + 1` - case $argument in - --bs) BSZ=${arguments[index]};; - --tgt) TGT=${arguments[index]};; - esac -done - -source ${CONFIG_DIR} -ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} - -echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} -echo DATASETS_DIR : ${DATASETS_DIR} -echo RUN_DIR : ${RUN_DIR} -echo CONFIG_DIR : ${CONFIG_DIR} -echo ====================== Model Info ====================== -echo Model Name : ${MODEL_NAME} -echo Onnx Path : ${ORIGINE_MODEL} - -step=0 -SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx - -# Simplify Model -let step++ -echo; -echo [STEP ${step}] : Simplify Model -if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed -else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} -fi - -# Quant Model -if [ $PRECISION == "int8" ];then - let step++ - echo; - echo [STEP ${step}] : Quant Model - if [[ -z ${QUANT_EXIST_ONNX} ]];then - QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx - fi - if [[ -f ${QUANT_EXIST_ONNX} ]];then - SIM_MODEL=${QUANT_EXIST_ONNX} - echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed - else - python3 ${RUN_DIR}/quant.py \ - --model ${SIM_MODEL} \ - --model_name ${MODEL_NAME} \ - --dataset_dir ${DATASETS_DIR} \ - --observer ${QUANT_OBSERVER} \ - --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ - --save_dir $CHECKPOINTS_DIR \ - --bsz ${QUANT_BATCHSIZE} \ - --step ${QUANT_STEP} \ - --seed ${QUANT_SEED} \ - --imgsz ${IMGSIZE} - SIM_MODEL=${QUANT_EXIST_ONNX} - echo " "Generate ${SIM_MODEL} - fi -fi - -# Change Batchsize -let step++ -echo; -echo [STEP ${step}] : Change Batchsize -FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx -if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed -else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} -fi - -# Build Engine -let step++ -echo; -echo [STEP ${step}] : Build Engine -ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine -if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed -else - python3 ${RUN_DIR}/build_engine.py \ - --precision ${PRECISION} \ - --model ${FINAL_MODEL} \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} -fi - -# Inference -let step++ -echo; -echo [STEP ${step}] : Inference -python3 ${RUN_DIR}/inference.py \ - --engine_file=${ENGINE_FILE} \ - --datasets_dir=${DATASETS_DIR} \ - --imgsz=${IMGSIZE} \ - --warm_up=${WARM_UP} \ - --loop_count ${LOOP_COUNT} \ - --test_mode ${RUN_MODE} \ - --acc_target ${TGT} \ - --bsz ${BSZ}; check_status - -exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_fp16_performance.sh b/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_fp16_performance.sh deleted file mode 100644 index dbd6c8c7..00000000 --- a/models/cv/classification/squeezenet_v1_0/ixrt/scripts/infer_squeezenet_v10_fp16_performance.sh +++ /dev/null @@ -1,143 +0,0 @@ -#!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -EXIT_STATUS=0 -check_status() -{ - if ((${PIPESTATUS[0]} != 0));then - EXIT_STATUS=1 - fi -} - -# Run paraments -BSZ=32 -TGT=-1 -WARM_UP=3 -LOOP_COUNT=20 -RUN_MODE=FPS -PRECISION=float16 - -# Update arguments -index=0 -options=$@ -arguments=($options) -for argument in $options -do - index=`expr $index + 1` - case $argument in - --bs) BSZ=${arguments[index]};; - --tgt) TGT=${arguments[index]};; - esac -done - -source ${CONFIG_DIR} -ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} - -echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} -echo DATASETS_DIR : ${DATASETS_DIR} -echo RUN_DIR : ${RUN_DIR} -echo CONFIG_DIR : ${CONFIG_DIR} -echo ====================== Model Info ====================== -echo Model Name : ${MODEL_NAME} -echo Onnx Path : ${ORIGINE_MODEL} - -step=0 -SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx - -# Simplify Model -let step++ -echo; -echo [STEP ${step}] : Simplify Model -if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed -else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} -fi - -# Quant Model -if [ $PRECISION == "int8" ];then - let step++ - echo; - echo [STEP ${step}] : Quant Model - if [[ -z ${QUANT_EXIST_ONNX} ]];then - QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx - fi - if [[ -f ${QUANT_EXIST_ONNX} ]];then - SIM_MODEL=${QUANT_EXIST_ONNX} - echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed - else - python3 ${RUN_DIR}/quant.py \ - --model ${SIM_MODEL} \ - --model_name ${MODEL_NAME} \ - --dataset_dir ${DATASETS_DIR} \ - --observer ${QUANT_OBSERVER} \ - --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ - --save_dir $CHECKPOINTS_DIR \ - --bsz ${QUANT_BATCHSIZE} \ - --step ${QUANT_STEP} \ - --seed ${QUANT_SEED} \ - --imgsz ${IMGSIZE} - SIM_MODEL=${QUANT_EXIST_ONNX} - echo " "Generate ${SIM_MODEL} - fi -fi - -# Change Batchsize -let step++ -echo; -echo [STEP ${step}] : Change Batchsize -FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_${BSZ}.onnx -if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed -else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} -fi - -# Build Engine -let step++ -echo; -echo [STEP ${step}] : Build Engine -ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine -if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed -else - python3 ${RUN_DIR}/build_engine.py \ - --precision ${PRECISION} \ - --model ${FINAL_MODEL} \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} -fi - -# Inference -let step++ -echo; -echo [STEP ${step}] : Inference -python3 ${RUN_DIR}/inference.py \ - --engine_file=${ENGINE_FILE} \ - --datasets_dir=${DATASETS_DIR} \ - --imgsz=${IMGSIZE} \ - --warm_up=${WARM_UP} \ - --loop_count ${LOOP_COUNT} \ - --test_mode ${RUN_MODE} \ - --fps_target ${TGT} \ - --bsz ${BSZ}; check_status - -exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_0/ixrt/simplify_model.py b/models/cv/classification/squeezenet_v1_0/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/squeezenet_v1_0/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/README.md b/models/cv/classification/squeezenet_v1_1/ixrt/README.md index 953f8155..2efb0adb 100644 --- a/models/cv/classification/squeezenet_v1_1/ixrt/README.md +++ b/models/cv/classification/squeezenet_v1_1/ixrt/README.md @@ -29,14 +29,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash -mkdir checkpoints -python3 export_onnx.py --origin_model /path/to/squeezenet1_1-b8a52dc0.pth --output_model checkpoints/squeezenet_v1_1.onnx +mkdir checkpoints +python3 ../../ixrt_common/export.py --model-name squeezenet1_1 --weight squeezenet1_1-b8a52dc0.pth --output checkpoints/squeezenet_v1_1.onnx ``` ## Model Inference @@ -45,8 +45,8 @@ python3 export_onnx.py --origin_model /path/to/squeezenet1_1-b8a52dc0.pth --out export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/SQUEEZENET_V1_1_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/SQUEEZENET_V1_1_CONFIG ``` diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/build_engine.py b/models/cv/classification/squeezenet_v1_1/ixrt/build_engine.py deleted file mode 100644 index 32f549d8..00000000 --- a/models/cv/classification/squeezenet_v1_1/ixrt/build_engine.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/build_i8_engine.py b/models/cv/classification/squeezenet_v1_1/ixrt/build_i8_engine.py deleted file mode 100644 index 6038b33f..00000000 --- a/models/cv/classification/squeezenet_v1_1/ixrt/build_i8_engine.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os - -import tensorrt -import tensorrt as trt - -TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) - -EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - - -def GiB(val): - return val * 1 << 30 - - -def json_load(filename): - with open(filename) as json_file: - data = json.load(json_file) - return data - - -def setDynamicRange(network, json_file): - """Sets ranges for network layers.""" - quant_param_json = json_load(json_file) - act_quant = quant_param_json["act_quant_info"] - - for i in range(network.num_inputs): - input_tensor = network.get_input(i) - if act_quant.__contains__(input_tensor.name): - print(input_tensor.name) - value = act_quant[input_tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - input_tensor.dynamic_range = (tensor_min, tensor_max) - - for i in range(network.num_layers): - layer = network.get_layer(i) - - for output_index in range(layer.num_outputs): - tensor = layer.get_output(output_index) - - if act_quant.__contains__(tensor.name): - value = act_quant[tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - tensor.dynamic_range = (tensor_min, tensor_max) - else: - print("\033[1;32m%s\033[0m" % tensor.name) - - -def build_engine(onnx_file, json_file, engine_file): - builder = trt.Builder(TRT_LOGGER) - network = builder.create_network(EXPLICIT_BATCH) - - config = builder.create_builder_config() - - # If it is a dynamic onnx model , you need to add the following. - # profile = builder.create_optimization_profile() - # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) - # config.add_optimization_profile(profile) - - parser = trt.OnnxParser(network, TRT_LOGGER) - # config.max_workspace_size = GiB(1) - if not os.path.exists(onnx_file): - quit("ONNX file {} not found".format(onnx_file)) - - with open(onnx_file, "rb") as model: - if not parser.parse(model.read()): - print("ERROR: Failed to parse the ONNX file.") - for error in range(parser.num_errors): - print(parser.get_error(error)) - return None - - config.set_flag(trt.BuilderFlag.INT8) - - setDynamicRange(network, json_file) - - engine = builder.build_engine(network, config) - - with open(engine_file, "wb") as f: - f.write(engine.serialize()) - - -if __name__ == "__main__": - # Add plugins if needed - # import ctypes - # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") - parser = argparse.ArgumentParser( - description="Writing qparams to onnx to convert tensorrt engine." - ) - parser.add_argument("--onnx", type=str, default=None) - parser.add_argument("--qparam_json", type=str, default=None) - parser.add_argument("--engine", type=str, default=None) - arg = parser.parse_args() - - build_engine(arg.onnx, arg.qparam_json, arg.engine) - print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/calibration_dataset.py b/models/cv/classification/squeezenet_v1_1/ixrt/calibration_dataset.py deleted file mode 100644 index b394c76b..00000000 --- a/models/cv/classification/squeezenet_v1_1/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/ci/prepare.sh b/models/cv/classification/squeezenet_v1_1/ixrt/ci/prepare.sh index b88bcb1f..ad9f7d98 100644 --- a/models/cv/classification/squeezenet_v1_1/ixrt/ci/prepare.sh +++ b/models/cv/classification/squeezenet_v1_1/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export_onnx.py --origin_model /root/data/checkpoints/squeezenet_v1_1.pth --output_model checkpoints/squeezenet_v1_1.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name squeezenet1_1 --weight squeezenet1_1-b8a52dc0.pth --output checkpoints/squeezenet_v1_1.onnx \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/common.py b/models/cv/classification/squeezenet_v1_1/ixrt/common.py deleted file mode 100644 index abdc147c..00000000 --- a/models/cv/classification/squeezenet_v1_1/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/export_onnx.py b/models/cv/classification/squeezenet_v1_1/ixrt/export_onnx.py deleted file mode 100644 index 2ff4fe09..00000000 --- a/models/cv/classification/squeezenet_v1_1/ixrt/export_onnx.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -import torchvision.models as models -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = models.squeezenet1_1() -model.load_state_dict(torch.load(args.origin_model)) -model.cuda() -model.eval() -input = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/inference.py b/models/cv/classification/squeezenet_v1_1/ixrt/inference.py deleted file mode 100644 index 4e178df4..00000000 --- a/models/cv/classification/squeezenet_v1_1/ixrt/inference.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/modify_batchsize.py b/models/cv/classification/squeezenet_v1_1/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/squeezenet_v1_1/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/quant.py b/models/cv/classification/squeezenet_v1_1/ixrt/quant.py deleted file mode 100644 index 5d71c828..00000000 --- a/models/cv/classification/squeezenet_v1_1/ixrt/quant.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: - -在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 - -This file will show you how to quantize your network with PPQ - You should prepare your model and calibration dataset as follow: - - ~/working/model.onnx <-- your model - ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset - -if you are using caffe model: - ~/working/model.caffemdoel <-- your model - ~/working/model.prototext <-- your model - -### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### - -quantized model will be generated at: ~/working/quantized.onnx -""" -from ppq import * -from ppq.api import * -import os -from calibration_dataset import getdataloader -import argparse -import random -import numpy as np -import torch - - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], - default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - - -config = parse_args() - -# modify configuration below: -WORKING_DIRECTORY = 'checkpoints' # choose your working directory -TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform -MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE -INPUT_LAYOUT = 'chw' # input data layout, chw or hwc -NETWORK_INPUTSHAPE = [1, 3, 224, 224] # input shape of your network -EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. -REQUIRE_ANALYSE = False -TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 -# ------------------------------------------------------------------- -# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 -# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx -# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 -# ------------------------------------------------------------------- -graph = None -if MODEL_TYPE == NetworkFramework.ONNX: - graph = load_onnx_graph(onnx_import_file=config.model) -if MODEL_TYPE == NetworkFramework.CAFFE: - graph = load_caffe_graph( - caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), - prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) -assert graph is not None, 'Graph Loading Error, Check your input again.' - -# ------------------------------------------------------------------- -# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 -# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 -# ------------------------------------------------------------------- -QS = QuantizationSettingFactory.default_setting() - -# ------------------------------------------------------------------- -# 下面向你展示了如何使用 finetuning 过程提升量化精度 -# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 -# 开启他们的方式都是 QS.xxxx = True -# 按需使用,不要全部打开,容易起飞 -# ------------------------------------------------------------------- -if TRAINING_YOUR_NETWORK: - QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 - QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 - QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' - - -dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) -# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 -# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 -with ENABLE_CUDA_KERNEL(): - print('网络正量化中,根据你的量化配置,这将需要一段时间:') - quantized = quantize_native_model( - setting=QS, # setting 对象用来控制标准量化逻辑 - model=graph, - calib_dataloader=dataloader, - calib_steps=config.step, - input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 - inputs=None, - # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] - collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, - # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None - platform=TARGET_PLATFORM, - device=EXECUTING_DEVICE, - do_quantize=True) - - # ------------------------------------------------------------------- - # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor - # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 - # 请注意,必须在 export 之前执行此操作。 - # ------------------------------------------------------------------- - executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) - # output = executor.forward(input) - - # ------------------------------------------------------------------- - # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 - # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% - # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 - # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 - # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 - # ------------------------------------------------------------------- - print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') - reports = graphwise_error_analyse( - graph=quantized, running_device=EXECUTING_DEVICE, steps=32, - dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) - for op, snr in reports.items(): - if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') - - if REQUIRE_ANALYSE: - print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') - layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, - interested_outputs=None, - dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) - - # ------------------------------------------------------------------- - # 使用 export_ppq_graph 函数来导出量化后的模型 - # PPQ 会根据你所选择的导出平台来修改模型格式 - # ------------------------------------------------------------------- - print('网络量化结束,正在生成目标文件:') - export_ppq_graph( - graph=quantized, platform=TARGET_PLATFORM, - graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), - config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/refine_model.py b/models/cv/classification/squeezenet_v1_1/ixrt/refine_model.py deleted file mode 100644 index 000ee4dc..00000000 --- a/models/cv/classification/squeezenet_v1_1/ixrt/refine_model.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_fp16_accuracy.sh b/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_fp16_accuracy.sh index 30890ad6..e62cc5d7 100644 --- a/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_fp16_accuracy.sh +++ b/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_fp16_accuracy.sh @@ -52,6 +52,8 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_fp16_performance.sh b/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_fp16_performance.sh index dbd6c8c7..05c9986f 100644 --- a/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_fp16_performance.sh +++ b/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_fp16_performance.sh @@ -52,6 +52,8 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} +echo Model Input Name : ${MODEL_INPUT_NAME} +echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_int8_accuracy.sh b/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_int8_accuracy.sh index 2b2db01a..e3a8bb78 100644 --- a/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_int8_accuracy.sh +++ b/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_int8_accuracy.sh @@ -13,7 +13,7 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -set -x + EXIT_STATUS=0 check_status() { @@ -29,7 +29,7 @@ WARM_UP=0 LOOP_COUNT=-1 RUN_MODE=ACC PRECISION=int8 -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + # Update arguments index=0 options=$@ @@ -44,7 +44,6 @@ do done source ${CONFIG_DIR} -echo ${QUANT_OBSERVER} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} @@ -62,15 +61,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -100,36 +100,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -142,4 +142,4 @@ python3 ${RUN_DIR}/inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_int8_performance.sh b/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_int8_performance.sh index bec51520..c526b81a 100644 --- a/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_int8_performance.sh +++ b/models/cv/classification/squeezenet_v1_1/ixrt/scripts/infer_squeezenet_v1_1_int8_performance.sh @@ -13,6 +13,7 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. + EXIT_STATUS=0 check_status() { @@ -28,7 +29,7 @@ WARM_UP=3 LOOP_COUNT=20 RUN_MODE=FPS PRECISION=int8 -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + # Update arguments index=0 options=$@ @@ -43,7 +44,6 @@ do done source ${CONFIG_DIR} -echo ${QUANT_OBSERVER} ORIGINE_MODEL=${CHECKPOINTS_DIR}/${ORIGINE_MODEL} echo CHECKPOINTS_DIR : ${CHECKPOINTS_DIR} @@ -61,15 +61,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -99,36 +100,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -138,7 +139,7 @@ python3 ${RUN_DIR}/inference.py \ --warm_up=${WARM_UP} \ --loop_count ${LOOP_COUNT} \ --test_mode ${RUN_MODE} \ - --acc_target ${TGT} \ + --fps_target ${TGT} \ --bsz ${BSZ}; check_status exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/squeezenet_v1_1/ixrt/simplify_model.py b/models/cv/classification/squeezenet_v1_1/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/squeezenet_v1_1/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/vgg16/ixrt/README.md b/models/cv/classification/vgg16/ixrt/README.md index ca59cd19..920c6943 100644 --- a/models/cv/classification/vgg16/ixrt/README.md +++ b/models/cv/classification/vgg16/ixrt/README.md @@ -28,14 +28,14 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-glx -pip3 install -r requirements.txt +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir checkpoints -python3 export_onnx.py --origin_model /path/to/vgg16-397923af.pth --output_model checkpoints/vgg16.onnx +python3 ../../ixrt_common/export.py --model-name vgg16 --weight vgg16-397923af.pth --output checkpoints/vgg16.onnx ``` ## Model Inference @@ -44,8 +44,8 @@ python3 export_onnx.py --origin_model /path/to/vgg16-397923af.pth --output_model export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/VGG16_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/VGG16_CONFIG ``` ### FP16 diff --git a/models/cv/classification/vgg16/ixrt/build_engine.py b/models/cv/classification/vgg16/ixrt/build_engine.py deleted file mode 100644 index 32f549d8..00000000 --- a/models/cv/classification/vgg16/ixrt/build_engine.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - # print("precision : ", precision) - build_config.set_flag(precision) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - args = parser.parse_args() - return args - -if __name__ == "__main__": - args = parse_args() - main(args) \ No newline at end of file diff --git a/models/cv/classification/vgg16/ixrt/calibration_dataset.py b/models/cv/classification/vgg16/ixrt/calibration_dataset.py deleted file mode 100644 index b394c76b..00000000 --- a/models/cv/classification/vgg16/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader \ No newline at end of file diff --git a/models/cv/classification/vgg16/ixrt/ci/prepare.sh b/models/cv/classification/vgg16/ixrt/ci/prepare.sh index 7492df8c..df13e456 100644 --- a/models/cv/classification/vgg16/ixrt/ci/prepare.sh +++ b/models/cv/classification/vgg16/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export_onnx.py --origin_model /root/data/checkpoints/vgg16.pth --output_model checkpoints/vgg16.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name vgg16 --weight vgg16-397923af.pth --output checkpoints/vgg16.onnx \ No newline at end of file diff --git a/models/cv/classification/vgg16/ixrt/common.py b/models/cv/classification/vgg16/ixrt/common.py deleted file mode 100644 index abdc147c..00000000 --- a/models/cv/classification/vgg16/ixrt/common.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/vgg16/ixrt/export_onnx.py b/models/cv/classification/vgg16/ixrt/export_onnx.py deleted file mode 100644 index 17d8bb55..00000000 --- a/models/cv/classification/vgg16/ixrt/export_onnx.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -import torchvision.models as models -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = models.vgg16() -model.load_state_dict(torch.load(args.origin_model)) -model.cuda() -model.eval() -input = torch.randn(1, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() \ No newline at end of file diff --git a/models/cv/classification/vgg16/ixrt/inference.py b/models/cv/classification/vgg16/ixrt/inference.py deleted file mode 100644 index 4e178df4..00000000 --- a/models/cv/classification/vgg16/ixrt/inference.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - end_time = time.time() - end2end_time = end_time - start_time - - print(F"E2E time : {end2end_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/vgg16/ixrt/modify_batchsize.py b/models/cv/classification/vgg16/ixrt/modify_batchsize.py deleted file mode 100644 index 4ac42a30..00000000 --- a/models/cv/classification/vgg16/ixrt/modify_batchsize.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/vgg16/ixrt/quant.py b/models/cv/classification/vgg16/ixrt/quant.py deleted file mode 100644 index 8006db24..00000000 --- a/models/cv/classification/vgg16/ixrt/quant.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import cv2 -import random -import argparse -import numpy as np -from random import shuffle -from tensorrt.deploy import static_quantize - -import torch -import torchvision.datasets -from calibration_dataset import getdataloader - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - -args = parse_args() -setseed(args.seed) -calibration_dataloader = getdataloader(args.dataset_dir, args.step, args.bsz, img_sz=args.imgsz) -static_quantize(args.model, - calibration_dataloader=calibration_dataloader, - save_quant_onnx_path=os.path.join(args.save_dir, f"quantized_{args.model_name}.onnx"), - observer=args.observer, - data_preprocess=lambda x: x[0].to("cuda"), - quant_format="qdq", - disable_quant_names=args.disable_quant_names) \ No newline at end of file diff --git a/models/cv/classification/vgg16/ixrt/refine_model.py b/models/cv/classification/vgg16/ixrt/refine_model.py deleted file mode 100644 index 000ee4dc..00000000 --- a/models/cv/classification/vgg16/ixrt/refine_model.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/vgg16/ixrt/requirements.txt b/models/cv/classification/vgg16/ixrt/requirements.txt deleted file mode 100644 index ee7cf050..00000000 --- a/models/cv/classification/vgg16/ixrt/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -pycuda -tqdm -onnxsim -opencv-python==4.6.0.66 \ No newline at end of file diff --git a/models/cv/classification/vgg16/ixrt/simplify_model.py b/models/cv/classification/vgg16/ixrt/simplify_model.py deleted file mode 100644 index 4d53a474..00000000 --- a/models/cv/classification/vgg16/ixrt/simplify_model.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/models/cv/classification/wide_resnet50/ixrt/README.md b/models/cv/classification/wide_resnet50/ixrt/README.md index 06cb9edd..d8f85cc5 100644 --- a/models/cv/classification/wide_resnet50/ixrt/README.md +++ b/models/cv/classification/wide_resnet50/ixrt/README.md @@ -21,23 +21,30 @@ Dataset: to download the validation dat ### Install Dependencies ```bash -pip3 install -r requirements.txt +# Install libGL +## CentOS +yum install -y mesa-libGL +## Ubuntu +apt install -y libgl1-mesa-glx + +pip3 install -r ../../ixrt_common/requirements.txt ``` ### Model Conversion ```bash mkdir -p checkpoints/ -python3 export.py --weight wide_resnet50_2-95faca4d.pth --output checkpoints/wide_resnet50.onnx +python3 ../../ixrt_common/export.py --model-name wide_resnet50_2 --weight wide_resnet50_2-95faca4d.pth --output checkpoints/wide_resnet50.onnx ``` ## Model Inference ```bash -export DATASETS_DIR=/Path/to/imagenet_val/ +export PROJ_DIR=./ +export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints -export RUN_DIR=./ -export CONFIG_DIR=config/WIDE_RESNET50_CONFIG +export RUN_DIR=../../ixrt_common/ +export CONFIG_DIR=../../ixrt_common/config/WIDE_RESNET50_CONFIG ``` ### FP16 diff --git a/models/cv/classification/wide_resnet50/ixrt/build_engine.py b/models/cv/classification/wide_resnet50/ixrt/build_engine.py deleted file mode 100644 index 01e126bc..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/build_engine.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import argparse -import numpy as np - -import torch -import tensorrt -from calibration_dataset import getdataloader -import cuda.cudart as cudart - -def assertSuccess(err): - assert(err == cudart.cudaError_t.cudaSuccess) - -class EngineCalibrator(tensorrt.IInt8EntropyCalibrator2): - - def __init__(self, cache_file, datasets_dir, loop_count=10, bsz=1, img_sz=224): - super().__init__() - self.cache_file = cache_file - self.image_batcher = getdataloader(datasets_dir, loop_count, batch_size=bsz, img_sz=img_sz) - self.batch_generator = iter(self.image_batcher) - size = img_sz*img_sz*3*bsz - __import__('pdb').set_trace() - err, self.batch_allocation = cudart.cudaMalloc(size) - assertSuccess(err) - - def __del__(self): - err,= cudart.cudaFree(self.batch_allocation) - assertSuccess(err) - - def get_batch_size(self): - return self.image_batcher.batch_size - - def get_batch(self, names): - try: - batch, _ = next(self.batch_generator) - batch = batch.numpy() - __import__('pdb').set_trace() - cudart.cudaMemcpy(self.batch_allocation, - np.ascontiguousarray(batch), - batch.nbytes, - cudart.cudaMemcpyKind.cudaMemcpyHostToDevice) - return [int(self.batch_allocation)] - except StopIteration: - return None - - def read_calibration_cache(self): - if os.path.exists(self.cache_file): - with open(self.cache_file, "rb") as f: - return f.read() - - def write_calibration_cache(self, cache): - with open(self.cache_file, "wb") as f: - f.write(cache) - -def main(config): - IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.VERBOSE) - builder = tensorrt.Builder(IXRT_LOGGER) - EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - network = builder.create_network(EXPLICIT_BATCH) - build_config = builder.create_builder_config() - parser = tensorrt.OnnxParser(network, IXRT_LOGGER) - parser.parse_from_file(config.model) - - precision = tensorrt.BuilderFlag.INT8 if config.precision == "int8" else tensorrt.BuilderFlag.FP16 - print("precision : ", precision) - build_config.set_flag(precision) - if config.precision == "int8": - build_config.int8_calibrator = EngineCalibrator("int8_cache", config.datasets_dir) - - plan = builder.build_serialized_network(network, build_config) - engine_file_path = config.engine - with open(engine_file_path, "wb") as f: - f.write(plan) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str) - parser.add_argument("--precision", type=str, choices=["float16", "int8", "float32"], default="int8", - help="The precision of datatype") - parser.add_argument("--engine", type=str, default=None) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - args = parser.parse_args() - return args - -if __name__ == "__main__": - # cali = EngineCalibrator("tmp", "/home/qiang.zhang/data/imagenet_val/") - # print(cali.get_batch_size()) - # print(cali.get_batch("hello")) - args = parse_args() - main(args) diff --git a/models/cv/classification/wide_resnet50/ixrt/build_i8_engine.py b/models/cv/classification/wide_resnet50/ixrt/build_i8_engine.py deleted file mode 100644 index 6038b33f..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/build_i8_engine.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os - -import tensorrt -import tensorrt as trt - -TRT_LOGGER = trt.Logger(tensorrt.Logger.VERBOSE) - -EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) - - -def GiB(val): - return val * 1 << 30 - - -def json_load(filename): - with open(filename) as json_file: - data = json.load(json_file) - return data - - -def setDynamicRange(network, json_file): - """Sets ranges for network layers.""" - quant_param_json = json_load(json_file) - act_quant = quant_param_json["act_quant_info"] - - for i in range(network.num_inputs): - input_tensor = network.get_input(i) - if act_quant.__contains__(input_tensor.name): - print(input_tensor.name) - value = act_quant[input_tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - input_tensor.dynamic_range = (tensor_min, tensor_max) - - for i in range(network.num_layers): - layer = network.get_layer(i) - - for output_index in range(layer.num_outputs): - tensor = layer.get_output(output_index) - - if act_quant.__contains__(tensor.name): - value = act_quant[tensor.name] - tensor_max = abs(value) - tensor_min = -abs(value) - tensor.dynamic_range = (tensor_min, tensor_max) - else: - print("\033[1;32m%s\033[0m" % tensor.name) - - -def build_engine(onnx_file, json_file, engine_file): - builder = trt.Builder(TRT_LOGGER) - network = builder.create_network(EXPLICIT_BATCH) - - config = builder.create_builder_config() - - # If it is a dynamic onnx model , you need to add the following. - # profile = builder.create_optimization_profile() - # profile.set_shape("input_name", (batch, channels, min_h, min_w), (batch, channels, opt_h, opt_w), (batch, channels, max_h, max_w)) - # config.add_optimization_profile(profile) - - parser = trt.OnnxParser(network, TRT_LOGGER) - # config.max_workspace_size = GiB(1) - if not os.path.exists(onnx_file): - quit("ONNX file {} not found".format(onnx_file)) - - with open(onnx_file, "rb") as model: - if not parser.parse(model.read()): - print("ERROR: Failed to parse the ONNX file.") - for error in range(parser.num_errors): - print(parser.get_error(error)) - return None - - config.set_flag(trt.BuilderFlag.INT8) - - setDynamicRange(network, json_file) - - engine = builder.build_engine(network, config) - - with open(engine_file, "wb") as f: - f.write(engine.serialize()) - - -if __name__ == "__main__": - # Add plugins if needed - # import ctypes - # ctypes.CDLL("libmmdeploy_tensorrt_ops.so") - parser = argparse.ArgumentParser( - description="Writing qparams to onnx to convert tensorrt engine." - ) - parser.add_argument("--onnx", type=str, default=None) - parser.add_argument("--qparam_json", type=str, default=None) - parser.add_argument("--engine", type=str, default=None) - arg = parser.parse_args() - - build_engine(arg.onnx, arg.qparam_json, arg.engine) - print("\033[1;32mgenerate %s\033[0m" % arg.engine) \ No newline at end of file diff --git a/models/cv/classification/wide_resnet50/ixrt/calibration_dataset.py b/models/cv/classification/wide_resnet50/ixrt/calibration_dataset.py deleted file mode 100644 index ec931c65..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/calibration_dataset.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os - -import torch -import torchvision.datasets -from torch.utils.data import DataLoader -from torchvision import models -from torchvision import transforms as T - - -class CalibrationImageNet(torchvision.datasets.ImageFolder): - def __init__(self, *args, **kwargs): - super(CalibrationImageNet, self).__init__(*args, **kwargs) - img2label_path = os.path.join(self.root, "val_map.txt") - if not os.path.exists(img2label_path): - raise FileNotFoundError(f"Not found label file `{img2label_path}`.") - - self.img2label_map = self.make_img2label_map(img2label_path) - - def make_img2label_map(self, path): - with open(path) as f: - lines = f.readlines() - - img2lable_map = dict() - for line in lines: - line = line.lstrip().rstrip().split("\t") - if len(line) != 2: - continue - img_name, label = line - img_name = img_name.strip() - if img_name in [None, ""]: - continue - label = int(label.strip()) - img2lable_map[img_name] = label - return img2lable_map - - def __getitem__(self, index): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - # if self.target_transform is not None: - # target = self.target_transform(target) - img_name = os.path.basename(path) - target = self.img2label_map[img_name] - - return sample, target - - -def create_dataloaders(data_path, num_samples=1024, img_sz=224, batch_size=2, workers=0): - dataset = CalibrationImageNet( - data_path, - transform=T.Compose( - [ - T.Resize(256), - T.CenterCrop(img_sz), - T.ToTensor(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] - ), - ) - - calibration_dataset = dataset - if num_samples is not None: - calibration_dataset = torch.utils.data.Subset( - dataset, indices=range(num_samples) - ) - - calibration_dataloader = DataLoader( - calibration_dataset, - shuffle=True, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - verify_dataloader = DataLoader( - dataset, - shuffle=False, - batch_size=batch_size, - drop_last=False, - num_workers=workers, - ) - - return calibration_dataloader, verify_dataloader - - -def getdataloader(dataset_dir, step=20, batch_size=32, workers=2, img_sz=224, total_sample=50000): - num_samples = min(total_sample, step * batch_size) - if step < 0: - num_samples = None - calibration_dataloader, _ = create_dataloaders( - dataset_dir, - img_sz=img_sz, - batch_size=batch_size, - workers=workers, - num_samples=num_samples, - ) - return calibration_dataloader diff --git a/models/cv/classification/wide_resnet50/ixrt/ci/prepare.sh b/models/cv/classification/wide_resnet50/ixrt/ci/prepare.sh index 8f0dd69b..27b108a6 100644 --- a/models/cv/classification/wide_resnet50/ixrt/ci/prepare.sh +++ b/models/cv/classification/wide_resnet50/ixrt/ci/prepare.sh @@ -25,6 +25,6 @@ else echo "Not Support Os" fi -pip install -r requirements.txt +pip install -r ../../ixrt_common/requirements.txt mkdir -p checkpoints/ -python3 export.py --weight /root/data/checkpoints/wide_resnet50_2-95faca4d.pth --output checkpoints/wide_resnet50.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name wide_resnet50_2 --weight wide_resnet50_2-95faca4d.pth --output checkpoints/wide_resnet50.onnx \ No newline at end of file diff --git a/models/cv/classification/wide_resnet50/ixrt/common.py b/models/cv/classification/wide_resnet50/ixrt/common.py deleted file mode 100644 index 0458195e..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/common.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import cv2 -import glob -import torch -import tensorrt -import numpy as np -import pycuda.driver as cuda - -def eval_batch(batch_score, batch_label): - batch_score = torch.tensor(torch.from_numpy(batch_score), dtype=torch.float32) - values, indices = batch_score.topk(5) - top1, top5 = 0, 0 - for idx, label in enumerate(batch_label): - - if label == indices[idx][0]: - top1 += 1 - if label in indices[idx]: - top5 += 1 - return top1, top5 - -def create_engine_context(engine_path, logger): - with open(engine_path, "rb") as f: - runtime = tensorrt.Runtime(logger) - assert runtime - engine = runtime.deserialize_cuda_engine(f.read()) - assert engine - context = engine.create_execution_context() - assert context - - return engine, context - -def get_io_bindings(engine): - # Setup I/O bindings - inputs = [] - outputs = [] - allocations = [] - - for i in range(engine.num_bindings): - is_input = False - if engine.binding_is_input(i): - is_input = True - name = engine.get_binding_name(i) - dtype = engine.get_binding_dtype(i) - shape = engine.get_binding_shape(i) - if is_input: - batch_size = shape[0] - size = np.dtype(tensorrt.nptype(dtype)).itemsize - for s in shape: - size *= s - allocation = cuda.mem_alloc(size) - binding = { - "index": i, - "name": name, - "dtype": np.dtype(tensorrt.nptype(dtype)), - "shape": list(shape), - "allocation": allocation, - } - print(f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}") - allocations.append(allocation) - if engine.binding_is_input(i): - inputs.append(binding) - else: - outputs.append(binding) - return inputs, outputs, allocations diff --git a/models/cv/classification/wide_resnet50/ixrt/inference.py b/models/cv/classification/wide_resnet50/ixrt/inference.py deleted file mode 100644 index 77a1888d..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/inference.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse -import json -import os -import re -import time -from tqdm import tqdm - -import cv2 -import numpy as np -import pycuda.autoinit -import pycuda.driver as cuda -import torch -import tensorrt - -from calibration_dataset import getdataloader -from common import eval_batch, create_engine_context, get_io_bindings - -def main(config): - dataloader = getdataloader(config.datasets_dir, config.loop_count, config.bsz, img_sz=config.imgsz) - - host_mem = tensorrt.IHostMemory - logger = tensorrt.Logger(tensorrt.Logger.ERROR) - - # Load Engine && I/O bindings - engine, context = create_engine_context(config.engine_file, logger) - inputs, outputs, allocations = get_io_bindings(engine) - - # Warm up - if config.warm_up > 0: - print("\nWarm Start.") - for i in range(config.warm_up): - context.execute_v2(allocations) - print("Warm Done.") - - # Inference - if config.test_mode == "FPS": - torch.cuda.synchronize() - start_time = time.time() - - for i in range(config.loop_count): - context.execute_v2(allocations) - - torch.cuda.synchronize() - end_time = time.time() - forward_time = end_time - start_time - - num_samples = 50000 - if config.loop_count * config.bsz < num_samples: - num_samples = config.loop_count * config.bsz - fps = num_samples / forward_time - - print("FPS : ", fps) - print(f"Performance Check : Test {fps} >= target {config.fps_target}") - if fps >= config.fps_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - - elif config.test_mode == "ACC": - - ## Prepare the output data - output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"]) - print(f"output shape : {output.shape} output type : {output.dtype}") - - total_sample = 0 - acc_top1, acc_top5 = 0, 0 - - start_time = time.time() - with tqdm(total= len(dataloader)) as _tqdm: - for idx, (batch_data, batch_label) in enumerate(dataloader): - batch_data = batch_data.numpy().astype(inputs[0]["dtype"]) - batch_data = np.ascontiguousarray(batch_data) - total_sample += batch_data.shape[0] - - cuda.memcpy_htod(inputs[0]["allocation"], batch_data) - context.execute_v2(allocations) - cuda.memcpy_dtoh(output, outputs[0]["allocation"]) - - # squeeze output shape [32,1000,1,1] to [32,1000] for mobilenet_v2 model - if len(output.shape) == 4: - output = output.squeeze(axis=(2,3)) - - batch_top1, batch_top5 = eval_batch(output, batch_label) - acc_top1 += batch_top1 - acc_top5 += batch_top5 - - _tqdm.set_postfix(acc_1='{:.4f}'.format(acc_top1/total_sample), - acc_5='{:.4f}'.format(acc_top5/total_sample)) - _tqdm.update(1) - - end_time = time.time() - e2e_time = end_time - start_time - print(F"E2E time : {e2e_time:.3f} seconds") - print(F"Acc@1 : {acc_top1/total_sample} = {acc_top1}/{total_sample}") - print(F"Acc@5 : {acc_top5/total_sample} = {acc_top5}/{total_sample}") - acc1 = acc_top1/total_sample - print(f"Accuracy Check : Test {acc1} >= target {config.acc_target}") - if acc1 >= config.acc_target: - print("pass!") - exit() - else: - print("failed!") - exit(1) - -def parse_config(): - parser = argparse.ArgumentParser() - parser.add_argument("--test_mode", type=str, default="FPS", help="FPS MAP") - parser.add_argument( - "--engine_file", - type=str, - help="engine file path" - ) - parser.add_argument( - "--datasets_dir", - type=str, - default="", - help="ImageNet dir", - ) - parser.add_argument("--warm_up", type=int, default=-1, help="warm_up times") - parser.add_argument("--bsz", type=int, default=32, help="test batch size") - parser.add_argument( - "--imgsz", - "--img", - "--img-size", - type=int, - default=224, - help="inference size h,w", - ) - parser.add_argument("--use_async", action="store_true") - parser.add_argument( - "--device", type=int, default=0, help="cuda device, i.e. 0 or 0,1,2,3,4" - ) - parser.add_argument("--fps_target", type=float, default=-1.0) - parser.add_argument("--acc_target", type=float, default=-1.0) - parser.add_argument("--loop_count", type=int, default=-1) - - config = parser.parse_args() - return config - -if __name__ == "__main__": - config = parse_config() - main(config) diff --git a/models/cv/classification/wide_resnet50/ixrt/modify_batchsize.py b/models/cv/classification/wide_resnet50/ixrt/modify_batchsize.py deleted file mode 100644 index 689b7a97..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/modify_batchsize.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse - -def change_input_dim(model, bsz): - batch_size = bsz - - # The following code changes the first dimension of every input to be batch_size - # Modify as appropriate ... note that this requires all inputs to - # have the same batch_size - inputs = model.graph.input - for input in inputs: - # Checks omitted.This assumes that all inputs are tensors and have a shape with first dim. - # Add checks as needed. - dim1 = input.type.tensor_type.shape.dim[0] - # update dim to be a symbolic value - if isinstance(batch_size, str): - # set dynamic batch size - dim1.dim_param = batch_size - elif (isinstance(batch_size, str) and batch_size.isdigit()) or isinstance(batch_size, int): - # set given batch size - dim1.dim_value = int(batch_size) - else: - # set batch size of 1 - dim1.dim_value = 1 - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--batch_size", type=int) - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = onnx.load(args.origin_model) -change_input_dim(model, args.batch_size) -onnx.save(model, args.output_model) - - - - - diff --git a/models/cv/classification/wide_resnet50/ixrt/quant.py b/models/cv/classification/wide_resnet50/ixrt/quant.py deleted file mode 100644 index 5d71c828..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/quant.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -"""这是一个高度自动化的 PPQ 量化的入口脚本,将你的模型和数据按要求进行打包: - -在自动化 API 中,我们使用 QuantizationSetting 对象传递量化参数。 - -This file will show you how to quantize your network with PPQ - You should prepare your model and calibration dataset as follow: - - ~/working/model.onnx <-- your model - ~/working/data/*.npy or ~/working/data/*.bin <-- your dataset - -if you are using caffe model: - ~/working/model.caffemdoel <-- your model - ~/working/model.prototext <-- your model - -### MAKE SURE YOUR INPUT LAYOUT IS [N, C, H, W] or [C, H, W] ### - -quantized model will be generated at: ~/working/quantized.onnx -""" -from ppq import * -from ppq.api import * -import os -from calibration_dataset import getdataloader -import argparse -import random -import numpy as np -import torch - - -def setseed(seed=42): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--model_name", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--dataset_dir", type=str, default="imagenet_val") - parser.add_argument("--observer", type=str, choices=["hist_percentile", "percentile", "minmax", "entropy", "ema"], - default="hist_percentile") - parser.add_argument("--disable_quant_names", nargs='*', type=str) - parser.add_argument("--save_dir", type=str, help="save path", default=None) - parser.add_argument("--bsz", type=int, default=32) - parser.add_argument("--step", type=int, default=20) - parser.add_argument("--seed", type=int, default=42) - parser.add_argument("--imgsz", type=int, default=224) - args = parser.parse_args() - print("Quant config:", args) - print(args.disable_quant_names) - return args - - -config = parse_args() - -# modify configuration below: -WORKING_DIRECTORY = 'checkpoints' # choose your working directory -TARGET_PLATFORM = TargetPlatform.TRT_INT8 # choose your target platform -MODEL_TYPE = NetworkFramework.ONNX # or NetworkFramework.CAFFE -INPUT_LAYOUT = 'chw' # input data layout, chw or hwc -NETWORK_INPUTSHAPE = [1, 3, 224, 224] # input shape of your network -EXECUTING_DEVICE = 'cuda' # 'cuda' or 'cpu'. -REQUIRE_ANALYSE = False -TRAINING_YOUR_NETWORK = False # 是否需要 Finetuning 一下你的网络 -# ------------------------------------------------------------------- -# 加载你的模型文件,PPQ 将会把 onnx 或者 caffe 模型文件解析成自己的格式 -# 如果你正使用 pytorch, tensorflow 等框架,你可以先将模型导出成 onnx -# 使用 torch.onnx.export 即可,如果你在导出 torch 模型时发生错误,欢迎与我们联系。 -# ------------------------------------------------------------------- -graph = None -if MODEL_TYPE == NetworkFramework.ONNX: - graph = load_onnx_graph(onnx_import_file=config.model) -if MODEL_TYPE == NetworkFramework.CAFFE: - graph = load_caffe_graph( - caffemodel_path=os.path.join(WORKING_DIRECTORY, 'model.caffemodel'), - prototxt_path=os.path.join(WORKING_DIRECTORY, 'model.prototxt')) -assert graph is not None, 'Graph Loading Error, Check your input again.' - -# ------------------------------------------------------------------- -# SETTING 对象用于控制 PPQ 的量化逻辑,主要描述了图融合逻辑、调度方案、量化细节策略等 -# 当你的网络量化误差过高时,你需要修改 SETTING 对象中的属性来进行特定的优化 -# ------------------------------------------------------------------- -QS = QuantizationSettingFactory.default_setting() - -# ------------------------------------------------------------------- -# 下面向你展示了如何使用 finetuning 过程提升量化精度 -# 在 PPQ 中我们提供了十余种算法用来帮助你恢复精度 -# 开启他们的方式都是 QS.xxxx = True -# 按需使用,不要全部打开,容易起飞 -# ------------------------------------------------------------------- -if TRAINING_YOUR_NETWORK: - QS.lsq_optimization = True # 启动网络再训练过程,降低量化误差 - QS.lsq_optimization_setting.steps = 500 # 再训练步数,影响训练时间,500 步大概几分钟 - QS.lsq_optimization_setting.collecting_device = 'cuda' # 缓存数据放在那,cuda 就是放在gpu,如果显存超了你就换成 'cpu' - - -dataloader = getdataloader(config.dataset_dir, config.step, batch_size=config.bsz, img_sz=config.imgsz) -# ENABLE CUDA KERNEL 会加速量化效率 3x ~ 10x,但是你如果没有装相应编译环境的话是编译不了的 -# 你可以尝试安装编译环境,或者在不启动 CUDA KERNEL 的情况下完成量化:移除 with ENABLE_CUDA_KERNEL(): 即可 -with ENABLE_CUDA_KERNEL(): - print('网络正量化中,根据你的量化配置,这将需要一段时间:') - quantized = quantize_native_model( - setting=QS, # setting 对象用来控制标准量化逻辑 - model=graph, - calib_dataloader=dataloader, - calib_steps=config.step, - input_shape=NETWORK_INPUTSHAPE, # 如果你的网络只有一个输入,使用这个参数传参 - inputs=None, - # 如果你的网络有多个输入,使用这个参数传参,就是 input_shape=None, inputs=[torch.zeros(1,3,224,224), torch.zeros(1,3,224,224)] - collate_fn=lambda x: x[0].to(EXECUTING_DEVICE), # collate_fn 跟 torch dataloader 的 collate fn 是一样的,用于数据预处理, - # 你当然也可以用 torch dataloader 的那个,然后设置这个为 None - platform=TARGET_PLATFORM, - device=EXECUTING_DEVICE, - do_quantize=True) - - # ------------------------------------------------------------------- - # 如果你需要执行量化后的神经网络并得到结果,则需要创建一个 executor - # 这个 executor 的行为和 torch.Module 是类似的,你可以利用这个东西来获取执行结果 - # 请注意,必须在 export 之前执行此操作。 - # ------------------------------------------------------------------- - executor = TorchExecutor(graph=quantized, device=EXECUTING_DEVICE) - # output = executor.forward(input) - - # ------------------------------------------------------------------- - # PPQ 计算量化误差时,使用信噪比的倒数作为指标,即噪声能量 / 信号能量 - # 量化误差 0.1 表示在整体信号中,量化噪声的能量约为 10% - # 你应当注意,在 graphwise_error_analyse 分析中,我们衡量的是累计误差 - # 网络的最后一层往往都具有较大的累计误差,这些误差是其前面的所有层所共同造成的 - # 你需要使用 layerwise_error_analyse 逐层分析误差的来源 - # ------------------------------------------------------------------- - print('正计算网络量化误差(SNR),最后一层的误差应小于 0.1 以保证量化精度:') - reports = graphwise_error_analyse( - graph=quantized, running_device=EXECUTING_DEVICE, steps=32, - dataloader=dataloader, collate_fn=lambda x: x[0].to(EXECUTING_DEVICE)) - for op, snr in reports.items(): - if snr > 0.1: ppq_warning(f'层 {op} 的累计量化误差显著,请考虑进行优化') - - if REQUIRE_ANALYSE: - print('正计算逐层量化误差(SNR),每一层的独立量化误差应小于 0.1 以保证量化精度:') - layerwise_error_analyse(graph=quantized, running_device=EXECUTING_DEVICE, - interested_outputs=None, - dataloader=dataloader, collate_fn=lambda x: x.to(EXECUTING_DEVICE)) - - # ------------------------------------------------------------------- - # 使用 export_ppq_graph 函数来导出量化后的模型 - # PPQ 会根据你所选择的导出平台来修改模型格式 - # ------------------------------------------------------------------- - print('网络量化结束,正在生成目标文件:') - export_ppq_graph( - graph=quantized, platform=TARGET_PLATFORM, - graph_save_to=os.path.join(config.save_dir, f"quantized_{config.model_name}.onnx"), - config_save_to=os.path.join(config.save_dir, 'quant_cfg.json')) diff --git a/models/cv/classification/wide_resnet50/ixrt/refine_model.py b/models/cv/classification/wide_resnet50/ixrt/refine_model.py deleted file mode 100644 index 6f1e6c2f..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/refine_model.py +++ /dev/null @@ -1,290 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import os -import argparse -import dataclasses - -import torch -import onnx - -from refine_utils.matmul_to_gemm_pass import FusedGemmPass -from refine_utils.linear_pass import FusedLinearPass - -from refine_utils.common import * - -def get_constant_input_name_of_operator(graph: Graph, operator: Operator): - const = None - for input in operator.inputs: - if not graph.containe_var(input): - continue - - if not graph.is_leaf_variable(input): - continue - - input_var = graph.get_variable(input) - if input_var.value is not None: - const = input - return const - -class FuseLayerNormPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - find_sequence_subgraph( - graph, - [OP.REDUCE_MEAN, OP.SUB, OP.POW, OP.REDUCE_MEAN, OP.ADD, OP.SQRT, OP.DIV, OP.MUL, OP.ADD], - self.fuse_layer_norm, - strict=False - ) - return graph - - def fuse_layer_norm(self, graph: Graph, pattern: PatternGraph): - # 检查 REDUCE_MEAN 的输入是否和 SUB 的输入是一致的 - if pattern.nodes[0].operator.inputs[0] != pattern.nodes[1].operator.inputs[0]: - return - - # 检查 POW 的输入是否和 DIV 的输入是一致的 - if pattern.nodes[2].operator.inputs[0] != pattern.nodes[6].operator.inputs[0]: - return - - # 检查部分算子的输出是否被多个算子使用 - nodes = pattern.nodes - for node in [nodes[0]] + nodes[2:-1]: - next_ops = graph.get_next_operators(node.operator) - if len(next_ops) > 1: - return - - eps = None - for input in nodes[4].operator.inputs: - input_var = graph.get_variable(input) - if input_var.value is not None and graph.is_leaf_variable(input): - eps = to_py_type(input_var.value) - - scale = get_constant_input_name_of_operator(graph, nodes[-2].operator) - bias = get_constant_input_name_of_operator(graph, nodes[-1].operator) - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - bias_var = graph.get_variable(bias) - print(bias_var) - - attributes = { - "axis": nodes[0].operator.attributes.axes, - "epsilon": eps, - } - - - layer_norm_op = self.transform.make_operator( - op_type="LayerNormalization", - inputs=[nodes[0].operator.inputs[0], scale, bias], - outputs=[nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(layer_norm_op) - -class FusedGeluPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.DIV, OP.ERF, OP.ADD, OP.MUL, OP.MUL], callback=self.fuse_gelu, strict=True - ) - return graph - - def fuse_gelu(self, graph: Graph, pattern: PatternGraph): - nodes = pattern.nodes - prev_op = self.transform.get_previous_operators(nodes[0].operator)[0] - next_ops = self.transform.get_next_operators(prev_op) - if len(next_ops) != 2: - return - - if nodes[0].operator not in next_ops or nodes[3].operator not in next_ops: - return - - gelu_op_input = None - for input in nodes[3].operator.inputs: - if input in nodes[0].operator.inputs: - gelu_op_input = input - break - - self.transform.delete_operators_between_op_op(nodes[0].operator, nodes[-1].operator) - - gelu_op = self.transform.make_operator( - op_type=OP.GELU, - inputs=[gelu_op_input], - outputs=[nodes[-1].operator.outputs[0]] - ) - self.transform.add_operator(gelu_op) - -@dataclasses.dataclass -class NormalizeAttr(BaseOperatorAttr): - p: float = 2.0 - epsilon: float = 1e-12 - axis: int = 1 - - -@registe_operator(OP.GELU) -class GeluOperator(BaseOperator): - - def call( - self, - executor, - operator: Operator, - inputs: List, - attr: NormalizeAttr, - ): - return F.gelu(inputs[0]) - - def convert_onnx_operator( - self, ir_graph: Graph, onnx_graph: onnx.GraphProto, node: onnx.NodeProto - ) -> Operator: - return default_converter(ir_graph, onnx_graph, node, attr_cls=attr.EmptyAttr) - - def quantize( - self, - graph: Graph, - op: Operator, - operator_observer_config: QuantOperatorObserverConfig, - quant_outputs: bool = False, - ): - return quant_single_input_operator(graph, op, operator_observer_config, quant_outputs=quant_outputs) - - - -class ClearUnsedVariables(BasePass): - - def process(self, graph: Graph) -> Graph: - vars = list(graph.variables) - - for var in vars: - if len(graph.get_dst_operators(var)) == 0 and graph.is_leaf_variable(var): - graph.delete_variable(var) - - quant_params = list(graph.quant_parameters.keys()) - for var in quant_params: - if not graph.containe_var(var): - graph.quant_parameters.pop(var) - - return graph - -class FormatLayerNorm(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if "LayerNorm" in op.op_type: - self.format_layer_norm(graph, op) - return graph - - def format_layer_norm(self, graph, operator): - if not hasattr(operator.attributes, "axis"): - return - if isinstance(operator.attributes.axis, (tuple, list)): - operator.attributes.axis = operator.attributes.axis[0] - -class FormatReshape(BasePass): - - def process(self, graph: Graph) -> Graph: - for op in graph.operators.values(): - if op.op_type == "Reshape": - self.format_reshape(graph, op) - - return graph - - def format_reshape(self, graph, operator): - shape = graph.get_variable(operator.inputs[1]) - shape.value = torch.tensor(shape.value, dtype=torch.int64) - -class FormatScalar(BasePass): - - def process(self, graph: Graph): - for var in graph.variables.values(): - var: Variable - use_ops = graph.get_dst_operators(var) - - if len(use_ops) == 0: - continue - - if use_ops[0].op_type not in [OP.MUL, OP.ADD, OP.GATHER]: - continue - - if var.value is not None and var.value.ndim == 0: - var.value = var.value.reshape(1) - print(f"Reshape scalar to tensor for {var.name}.") - - return graph - -class RenamePass(BasePass): - - def process(self, graph:Graph): - - names = [name for name in graph.operators.keys()] - for old_name in names: - new_name = old_name.replace("/", "#") - - graph.rename_operator(old_name, new_name) - - names = [name for name in graph.variables.keys()] - for name in names: - new_name = name.replace("/", ".").replace("Output", "out").replace("output", "out") - - graph.rename_vaiable(name, new_name, - with_variables=True, - with_operator_outputs=True) - - return graph - -def create_pipeline(example_inputs): - return PassSequence( - # FuseLayerNormPass(), - FusedGeluPass(), - - # ClearUnsedVariables(), - # FormatLayerNorm(), - # FormatReshape(), - # FormatScalar(), - # RenamePass() - ) - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--onnx_path", type=str) - parser.add_argument("--dst_onnx_path", type=str) - - parser.add_argument("--bsz", type=int, default=8, - help="Batch size") - parser.add_argument("--imgsz", type=int, default=224, - help="Image size") - - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - example_inputs = torch.randn(args.bsz, 3, args.imgsz, args.imgsz) - - refine_pipline = Pipeline( - create_source(f"{args.onnx_path}", example_inputs=example_inputs), - create_pipeline(example_inputs), - create_target( - f"{args.dst_onnx_path}", - example_inputs=example_inputs, - ) - ) - refine_pipline.run() - - print(f"refine the model, input shape={example_inputs.shape}") diff --git a/models/cv/classification/wide_resnet50/ixrt/refine_utils/__init__.py b/models/cv/classification/wide_resnet50/ixrt/refine_utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/models/cv/classification/wide_resnet50/ixrt/refine_utils/common.py b/models/cv/classification/wide_resnet50/ixrt/refine_utils/common.py deleted file mode 100644 index 2af19a14..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/refine_utils/common.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from typing import Union, Callable, List - -from tensorrt.deploy.api import * -from tensorrt.deploy.backend.onnx.converter import default_converter -from tensorrt.deploy.backend.torch.executor.operators._operators import to_py_type -from tensorrt.deploy.ir.operator_attr import BaseOperatorAttr, EmptyAttr -from tensorrt.deploy.ir.operator_type import OperatorType as OP -from tensorrt.deploy.ir import operator_attr as attr, Operator, generate_operator_name -from tensorrt.deploy.fusion import BasePass, PatternGraph, build_sequence_graph, GraphMatcher, PassSequence -from tensorrt.deploy.ir import Graph -from tensorrt.deploy.quantizer.quant_operator.base import quant_single_input_operator -from tensorrt.deploy.backend.onnx.converter import convert_onnx_operator - -def find_sequence_subgraph(graph, - pattern: Union[List[str], PatternGraph], - callback: Callable[[Graph, PatternGraph], None], - strict=True): - if isinstance(pattern, List): - pattern = build_sequence_graph(pattern) - - matcher = GraphMatcher(pattern, strict=strict) - return matcher.findall(graph, callback) \ No newline at end of file diff --git a/models/cv/classification/wide_resnet50/ixrt/refine_utils/linear_pass.py b/models/cv/classification/wide_resnet50/ixrt/refine_utils/linear_pass.py deleted file mode 100644 index 29b5e4a9..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/refine_utils/linear_pass.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import dataclasses - -from refine_utils.common import * - -# AXB=C, Only for B is initializer - -class FusedLinearPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL, OP.ADD], callback=self.to_linear_with_bias, strict=True - ) - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_linear, strict=True - ) - return graph - - def to_linear_with_bias(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - add = pattern.nodes[1] - if len(add.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - bias_var = None - for input in add.operator.inputs: - if input not in matmul.operator.outputs: - bias_var = input - - inputs = matmul.operator.inputs - inputs.append(bias_var) - outputs = add.operator.outputs - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1],b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 1, - "act_type":"none" - } - - self.transform.make_operator( - "LinearFP16", - inputs=inputs, - outputs=outputs, - **attributes - ) - - self.transform.delete_operator(add.operator) - self.transform.delete_operator(matmul.operator) - - def to_linear(self, graph, pattern: PatternGraph): - matmul = pattern.nodes[0] - if len(matmul.operator.inputs) != 2: - return - - b_var = graph.get_variable(matmul.operator.inputs[1]) - if not graph.is_leaf_variable(b_var) or b_var.value is None: - return - - if b_var.value.ndim != 2: - return - - attributes = { - "hidden_size": hidden_size, - "linear_dim": linear_dim, - "has_bias": 0, - "act_type": "none" - } - - b_var.value = b_var.value.transpose(1, 0) - b_var.shape[0],b_var.shape[1] = b_var.shape[1], b_var.shape[0] - - hidden_size = b_var.shape[1] - linear_dim = b_var.shape[0] - - op = self.transform.make_operator( - op_type = "LinearFP16", - inputs = pattern.nodes[0].operator.inputs, - outputs=[pattern.nodes[-1].operator.outputs[0]], - **attributes - ) - - self.transform.add_operator(op) - - self.transform.delete_operator(matmul.operator) \ No newline at end of file diff --git a/models/cv/classification/wide_resnet50/ixrt/refine_utils/matmul_to_gemm_pass.py b/models/cv/classification/wide_resnet50/ixrt/refine_utils/matmul_to_gemm_pass.py deleted file mode 100644 index 4ebfac4d..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/refine_utils/matmul_to_gemm_pass.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -from refine_utils.common import * - -# -# Common pattern Matmul to Gemm -# -class FusedGemmPass(BasePass): - - def process(self, graph: Graph) -> Graph: - self.transform = GraphTransform(graph) - - find_sequence_subgraph( - graph, pattern=[OP.MATMUL], callback=self.to_gemm, strict=True - ) - return graph - - def to_gemm(self, graph, pattern: PatternGraph): - matmul_op = pattern.nodes[0] - inputs = matmul_op.operator.inputs - outputs = matmul_op.operator.outputs - - if len(inputs)!=2 and len(outputs)!=1: - return - - for input in inputs: - if self.transform.is_leaf_variable(input): - return - - print(f"{self.transform.get_variable(inputs[0]).shape} {self.transform.get_variable(inputs[1]).shape}") - self.transform.delete_operator(matmul_op.operator) - - op = self.transform.make_operator( - op_type = "Gemm", - inputs = inputs, - outputs = outputs, - alpha = 1, - beta = 1, - transB = 1 - ) - - self.transform.add_operator(op) \ No newline at end of file diff --git a/models/cv/classification/wide_resnet50/ixrt/requirements.txt b/models/cv/classification/wide_resnet50/ixrt/requirements.txt deleted file mode 100644 index 424e6007..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -onnx -tqdm -pycuda -ppq==0.6.6 \ No newline at end of file diff --git a/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_accuracy.sh b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_accuracy.sh index b743d708..792c25ea 100644 --- a/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_accuracy.sh +++ b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_accuracy.sh @@ -69,6 +69,34 @@ else echo " "Generate ${SIM_MODEL} fi +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + # Change Batchsize let step++ echo; @@ -111,4 +139,4 @@ python3 ${RUN_DIR}/inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_performance.sh b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_performance.sh index e7a4f1a7..61051638 100644 --- a/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_performance.sh +++ b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_fp16_performance.sh @@ -69,6 +69,34 @@ else echo " "Generate ${SIM_MODEL} fi +# Quant Model +if [ $PRECISION == "int8" ];then + let step++ + echo; + echo [STEP ${step}] : Quant Model + if [[ -z ${QUANT_EXIST_ONNX} ]];then + QUANT_EXIST_ONNX=$CHECKPOINTS_DIR/quantized_${MODEL_NAME}.onnx + fi + if [[ -f ${QUANT_EXIST_ONNX} ]];then + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Quant Model Skip, ${QUANT_EXIST_ONNX} has been existed + else + python3 ${RUN_DIR}/quant.py \ + --model ${SIM_MODEL} \ + --model_name ${MODEL_NAME} \ + --dataset_dir ${DATASETS_DIR} \ + --observer ${QUANT_OBSERVER} \ + --disable_quant_names ${DISABLE_QUANT_LIST[@]} \ + --save_dir $CHECKPOINTS_DIR \ + --bsz ${QUANT_BATCHSIZE} \ + --step ${QUANT_STEP} \ + --seed ${QUANT_SEED} \ + --imgsz ${IMGSIZE} + SIM_MODEL=${QUANT_EXIST_ONNX} + echo " "Generate ${SIM_MODEL} + fi +fi + # Change Batchsize let step++ echo; @@ -111,4 +139,4 @@ python3 ${RUN_DIR}/inference.py \ --fps_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_accuracy.sh b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_accuracy.sh index 367bdd4b..30c048f5 100644 --- a/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_accuracy.sh +++ b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_accuracy.sh @@ -13,7 +13,6 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. -set -x EXIT_STATUS=0 check_status() { @@ -29,7 +28,7 @@ WARM_UP=0 LOOP_COUNT=-1 RUN_MODE=ACC PRECISION=int8 -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + # Update arguments index=0 options=$@ @@ -52,8 +51,6 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} -echo Model Input Name : ${MODEL_INPUT_NAME} -echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 @@ -61,15 +58,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -99,36 +97,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -141,4 +139,4 @@ python3 ${RUN_DIR}/inference.py \ --acc_target ${TGT} \ --bsz ${BSZ}; check_status -exit ${EXIT_STATUS} +exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_performance.sh b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_performance.sh index 468c557d..9e246604 100644 --- a/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_performance.sh +++ b/models/cv/classification/wide_resnet50/ixrt/scripts/infer_wide_resnet50_int8_performance.sh @@ -28,7 +28,7 @@ WARM_UP=3 LOOP_COUNT=20 RUN_MODE=FPS PRECISION=int8 -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + # Update arguments index=0 options=$@ @@ -51,8 +51,6 @@ echo RUN_DIR : ${RUN_DIR} echo CONFIG_DIR : ${CONFIG_DIR} echo ====================== Model Info ====================== echo Model Name : ${MODEL_NAME} -echo Model Input Name : ${MODEL_INPUT_NAME} -echo Model Output Name : ${MODEL_OUTPUT_NAME} echo Onnx Path : ${ORIGINE_MODEL} step=0 @@ -60,15 +58,16 @@ SIM_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_sim.onnx # Simplify Model let step++ - echo [STEP ${step}] : Simplify Model - if [ -f ${SIM_MODEL} ];then - echo " "Simplify Model, ${SIM_MODEL} has been existed - else - python3 ${RUN_DIR}/simplify_model.py \ - --origin_model $ORIGINE_MODEL \ - --output_model ${SIM_MODEL} - echo " "Generate ${SIM_MODEL} - fi +echo; +echo [STEP ${step}] : Simplify Model +if [ -f ${SIM_MODEL} ];then + echo " "Simplify Model, ${SIM_MODEL} has been existed +else + python3 ${RUN_DIR}/simplify_model.py \ + --origin_model $ORIGINE_MODEL \ + --output_model ${SIM_MODEL} + echo " "Generate ${SIM_MODEL} +fi # Quant Model if [ $PRECISION == "int8" ];then @@ -98,36 +97,36 @@ if [ $PRECISION == "int8" ];then fi fi - # Change Batchsize - let step++ - echo; - echo [STEP ${step}] : Change Batchsize - FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx - if [ -f $FINAL_MODEL ];then - echo " "Change Batchsize Skip, $FINAL_MODEL has been existed - else - python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ - --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} - echo " "Generate ${FINAL_MODEL} - fi +# Change Batchsize +let step++ +echo; +echo [STEP ${step}] : Change Batchsize +FINAL_MODEL=${CHECKPOINTS_DIR}/${MODEL_NAME}_quant_${BSZ}.onnx +if [ -f $FINAL_MODEL ];then + echo " "Change Batchsize Skip, $FINAL_MODEL has been existed +else + python3 ${RUN_DIR}/modify_batchsize.py --batch_size ${BSZ} \ + --origin_model ${SIM_MODEL} --output_model ${FINAL_MODEL} + echo " "Generate ${FINAL_MODEL} +fi - # Build Engine - let step++ - echo; - echo [STEP ${step}] : Build Engine - ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine - if [ -f $ENGINE_FILE ];then - echo " "Build Engine Skip, $ENGINE_FILE has been existed - else - python3 ${RUN_DIR}/build_i8_engine.py \ - --onnx ${FINAL_MODEL} \ - --qparam_json ${CHECKPOINTS_DIR}/quant_cfg.json \ - --engine ${ENGINE_FILE} - echo " "Generate Engine ${ENGINE_FILE} - fi +# Build Engine +let step++ +echo; +echo [STEP ${step}] : Build Engine +ENGINE_FILE=${CHECKPOINTS_DIR}/${MODEL_NAME}_${PRECISION}_bs${BSZ}.engine +if [ -f $ENGINE_FILE ];then + echo " "Build Engine Skip, $ENGINE_FILE has been existed +else + python3 ${RUN_DIR}/build_engine.py \ + --precision ${PRECISION} \ + --model ${FINAL_MODEL} \ + --engine ${ENGINE_FILE} + echo " "Generate Engine ${ENGINE_FILE} +fi # Inference -# let step++ +let step++ echo; echo [STEP ${step}] : Inference python3 ${RUN_DIR}/inference.py \ @@ -137,7 +136,7 @@ python3 ${RUN_DIR}/inference.py \ --warm_up=${WARM_UP} \ --loop_count ${LOOP_COUNT} \ --test_mode ${RUN_MODE} \ - --acc_target ${TGT} \ + --fps_target ${TGT} \ --bsz ${BSZ}; check_status exit ${EXIT_STATUS} \ No newline at end of file diff --git a/models/cv/classification/wide_resnet50/ixrt/simplify_model.py b/models/cv/classification/wide_resnet50/ixrt/simplify_model.py deleted file mode 100644 index 9948a9fa..00000000 --- a/models/cv/classification/wide_resnet50/ixrt/simplify_model.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import onnx -import argparse -from onnxsim import simplify - -# Simplify -def simplify_model(args): - onnx_model = onnx.load(args.origin_model) - model_simp, check = simplify(onnx_model) - model_simp = onnx.shape_inference.infer_shapes(model_simp) - onnx.save(model_simp, args.output_model) - print(" Simplify onnx Done.") - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--origin_model", type=str) - parser.add_argument("--output_model", type=str) - parser.add_argument("--reshape", action="store_true") - args = parser.parse_args() - return args - -args = parse_args() -simplify_model(args) - - - - diff --git a/tests/model_info.json b/tests/model_info.json index 3f90540e..7972ff1a 100644 --- a/tests/model_info.json +++ b/tests/model_info.json @@ -278,10 +278,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "https://www.image-net.org/download.php", + "download_url": "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -566,10 +568,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "https://www.image-net.org/download.php", + "download_url": "https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -629,7 +633,7 @@ "github_branch": "", "github_path": "", "datasets": "https://www.image-net.org/download.php", - "download_url": "https://local/densenet121.pth", + "download_url": "https://download.pytorch.org/models/densenet121-a639ec97.pth", "need_third_part": false, "precisions": [ "fp16" @@ -820,10 +824,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "https://www.image-net.org/download.php", + "download_url": "https://download.pytorch.org/models/densenet201-c1103571.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -882,10 +888,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "https://www.image-net.org/download.php", + "download_url": "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnetv2_t_agc-3620981a.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -1138,10 +1146,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "https://www.image-net.org/download.php", + "download_url": "https://download.pytorch.org/models/efficientnet_b3_rwightman-b3899882.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -1233,7 +1243,7 @@ "github_branch": "", "github_path": "", "datasets": "https://www.image-net.org/download.php", - "download_url": "https://local/efficientnet_v2.pth", + "download_url": "https://download.pytorch.org/models/efficientnet_v2_m-dc08266a.pth", "need_third_part": false, "precisions": [ "fp16", @@ -1297,10 +1307,12 @@ "github_repo": "", "github_branch": "", "github_path": "", - "datasets": "", - "download_url": "", + "datasets": "https://www.image-net.org/download.php", + "download_url": "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -1461,7 +1473,10 @@ "datasets": "", "download_url": "", "need_third_part": "", - "precisions": "", + "precisions": [ + "fp16", + "int8" + ], "type": "inference", "hasDemo": false, "demoType": "" @@ -2135,7 +2150,7 @@ "github_branch": "", "github_path": "", "datasets": "https://www.image-net.org/download.php", - "download_url": "https://download.pytorch.org/models/resnet101.pth", + "download_url": "https://download.pytorch.org/models/resnet101-63fe2227.pth", "need_third_part": false, "precisions": [ "fp16", @@ -2600,6 +2615,38 @@ "hasDemo": false, "demoType": "" }, + { + "model_name": "shufflenetv2_x0_5", + "framework": "ixrt", + "release_version": "25.06", + "release_sdk": "4.2.0", + "release_gpgpu": "BI-V150", + "latest_sdk": "4.2.0", + "latest_gpgpu": "BI-V150", + "category": "cv/classification", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "models/cv/classification/shufflenetv2_x0_5/ixrt/", + "readme_file": "models/cv/classification/shufflenetv2_x0_5/ixrt/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "datasets": "https://www.image-net.org/download.php", + "download_url": "https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth", + "need_third_part": false, + "precisions": [ + "fp16" + ], + "type": "inference", + "hasDemo": false, + "demoType": "" + }, { "model_name": "shufflenetv2_x1_0", "framework": "igie", diff --git a/tests/run_ixrt.py b/tests/run_ixrt.py index f2d94fe8..a19223ad 100644 --- a/tests/run_ixrt.py +++ b/tests/run_ixrt.py @@ -132,6 +132,7 @@ def run_clf_testcase(model): } d_url = model["download_url"] checkpoint_n = d_url.split("/")[-1] + #TODO: need update mount path because mdb use /root/data prepare_script = f""" cd ../{model['model_path']} ln -s /root/data/checkpoints/{checkpoint_n} ./ @@ -160,10 +161,10 @@ def run_clf_testcase(model): script = f""" cd ../{model['model_path']} export DATASETS_DIR=/root/data/datasets/imagenet-val - export PROJ_DIR=./ + export PROJ_DIR=../../ixrt_common/ export CHECKPOINTS_DIR=./checkpoints - export RUN_DIR=./ - export CONFIG_DIR=config/{config_name}_CONFIG + export RUN_DIR=../../ixrt_common/ + export CONFIG_DIR=../../ixrt_common/config/{config_name}_CONFIG bash scripts/infer_{model_name}_{prec}_accuracy.sh bash scripts/infer_{model_name}_{prec}_performance.sh """ -- Gitee From f6e91b644cf139d7ecfac73c3a155ebe3e96965a Mon Sep 17 00:00:00 2001 From: "hongliang.yuan" Date: Wed, 30 Apr 2025 15:36:59 +0800 Subject: [PATCH 2/2] move other export into ixrt common --- .../cspdarknet53/ixrt/README.md | 2 +- .../cspdarknet53/ixrt/ci/prepare.sh | 2 +- .../classification/cspresnet50/ixrt/README.md | 10 ++- .../cspresnet50/ixrt/ci/prepare.sh | 9 ++- .../cspresnet50/ixrt/export_onnx.py | 76 ------------------ .../classification/deit_tiny/ixrt/README.md | 2 +- .../deit_tiny/ixrt/ci/prepare.sh | 2 +- .../classification/deit_tiny/ixrt/export.py | 78 ------------------- .../classification/densenet161/ixrt/README.md | 2 +- .../densenet161/ixrt/ci/prepare.sh | 2 +- .../classification/densenet161/ixrt/export.py | 74 ------------------ .../classification/densenet169/ixrt/README.md | 2 +- .../densenet169/ixrt/ci/prepare.sh | 2 +- .../classification/densenet169/ixrt/export.py | 74 ------------------ .../classification/densenet201/ixrt/README.md | 2 +- .../densenet201/ixrt/ci/prepare.sh | 2 +- .../classification/densenet201/ixrt/export.py | 74 ------------------ .../efficientnet_v2/ixrt/README.md | 2 +- .../efficientnetv2_rw_t/ixrt/README.md | 2 +- .../efficientnetv2_rw_t/ixrt/ci/prepare.sh | 2 +- .../classification/hrnet_w18/ixrt/README.md | 8 +- .../hrnet_w18/ixrt/ci/prepare.sh | 7 +- .../hrnet_w18/ixrt/export_onnx.py | 50 ------------ .../inception_v3/ixrt/README.md | 2 +- .../inception_v3/ixrt/ci/prepare.sh | 2 +- .../inception_v3/ixrt/export_onnx.py | 59 -------------- .../ixrt_common/config/DEIT_TINY_CONFIG | 2 +- .../ixrt_common/config/RESNETV1D50_CONFIG | 2 +- .../cv/classification/ixrt_common/export.py | 15 +++- .../export.py => ixrt_common/export_mmcls.py} | 20 ++++- .../export.py => ixrt_common/export_timm.py} | 8 +- .../cv/classification/repvgg/ixrt/README.md | 10 ++- .../classification/repvgg/ixrt/ci/prepare.sh | 8 +- .../classification/repvgg/ixrt/export_onnx.py | 76 ------------------ .../classification/resnetv1d50/ixrt/README.md | 11 ++- .../resnetv1d50/ixrt/ci/prepare.sh | 7 +- .../resnetv1d50/ixrt/export_onnx.py | 43 ---------- .../shufflenet_v1/ixrt/README.md | 8 +- .../shufflenet_v1/ixrt/ci/prepare.sh | 8 +- .../shufflenet_v1/ixrt/export_onnx.py | 76 ------------------ .../shufflenetv2_x2_0/ixrt/export.py | 61 --------------- 41 files changed, 106 insertions(+), 798 deletions(-) delete mode 100644 models/cv/classification/cspresnet50/ixrt/export_onnx.py delete mode 100644 models/cv/classification/deit_tiny/ixrt/export.py delete mode 100644 models/cv/classification/densenet161/ixrt/export.py delete mode 100644 models/cv/classification/densenet169/ixrt/export.py delete mode 100644 models/cv/classification/densenet201/ixrt/export.py delete mode 100644 models/cv/classification/hrnet_w18/ixrt/export_onnx.py delete mode 100644 models/cv/classification/inception_v3/ixrt/export_onnx.py rename models/cv/classification/{cspdarknet53/ixrt/export.py => ixrt_common/export_mmcls.py} (73%) rename models/cv/classification/{efficientnetv2_rw_t/ixrt/export.py => ixrt_common/export_timm.py} (85%) delete mode 100644 models/cv/classification/repvgg/ixrt/export_onnx.py delete mode 100644 models/cv/classification/resnetv1d50/ixrt/export_onnx.py delete mode 100644 models/cv/classification/shufflenet_v1/ixrt/export_onnx.py delete mode 100644 models/cv/classification/shufflenetv2_x2_0/ixrt/export.py diff --git a/models/cv/classification/cspdarknet53/ixrt/README.md b/models/cv/classification/cspdarknet53/ixrt/README.md index caf39486..1b19bf0b 100644 --- a/models/cv/classification/cspdarknet53/ixrt/README.md +++ b/models/cv/classification/cspdarknet53/ixrt/README.md @@ -41,7 +41,7 @@ git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git ## cspdarknet50 is actually cspdarknet53 wget -O cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth https://download.openmmlab.com/mmclassification/v0/cspnet/cspdarknet50_3rdparty_8xb32_in1k_20220329-bd275287.pth -python3 export.py --cfg mmpretrain/configs/cspnet/cspdarknet50_8xb32_in1k.py --weight cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth --output cspdarknet53.onnx +python3 ../../ixrt_common/export_mmcls.py --cfg mmpretrain/configs/cspnet/cspdarknet50_8xb32_in1k.py --weight cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth --output cspdarknet53.onnx # Use onnxsim optimize onnx model mkdir -p checkpoints diff --git a/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh b/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh index 933dcae6..0461b3a1 100644 --- a/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh +++ b/models/cv/classification/cspdarknet53/ixrt/ci/prepare.sh @@ -29,7 +29,7 @@ pip install -r ../../ixrt_common/requirements.txt pip3 install mmcv==1.5.3 mmcls==0.24.0 unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ -python3 export.py --cfg mmpretrain/configs/cspnet/cspdarknet50_8xb32_in1k.py --weight /root/data/checkpoints/cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth --output cspdarknet53.onnx +python3 ../../ixrt_common/export_mmcls.py --cfg mmpretrain/configs/cspnet/cspdarknet50_8xb32_in1k.py --weight /root/data/checkpoints/cspdarknet53_3rdparty_8xb32_in1k_20220329-bd275287.pth --output cspdarknet53.onnx mkdir -p checkpoints onnxsim cspdarknet53.onnx checkpoints/cspdarknet53_sim.onnx \ No newline at end of file diff --git a/models/cv/classification/cspresnet50/ixrt/README.md b/models/cv/classification/cspresnet50/ixrt/README.md index 73a67292..868a9b2c 100644 --- a/models/cv/classification/cspresnet50/ixrt/README.md +++ b/models/cv/classification/cspresnet50/ixrt/README.md @@ -15,6 +15,8 @@ CSPResNet50 is the one of best models. ### Prepare Resources +Pretrained model: + Dataset: to download the validation dataset. ### Install Dependencies @@ -38,9 +40,11 @@ mkdir checkpoints git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git python3 export_onnx.py \ - --config_file ./mmpretrain/configs/cspnet/cspresnet50_8xb32_in1k.py \ - --checkpoint_file https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth \ - --output_model ./checkpoints/cspresnet50.onnx + --cfg ./mmpretrain/configs/cspnet/cspresnet50_8xb32_in1k.py \ + --weight cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth \ + --output cspresnet50.onnx + +onnxsim cspresnet50.onnx checkpoints/cspresnet50.onnx ``` ## Model Inference diff --git a/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh b/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh index 64dbabb3..b8846f98 100644 --- a/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh +++ b/models/cv/classification/cspresnet50/ixrt/ci/prepare.sh @@ -29,7 +29,8 @@ pip3 install mmcls==0.24.0 mmcv==1.5.3 pip install -r ../../ixrt_common/requirements.txt unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ mkdir -p checkpoints -python3 export_onnx.py \ - --config_file ./mmpretrain/configs/cspnet/cspresnet50_8xb32_in1k.py \ - --checkpoint_file /root/data/checkpoints/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth \ - --output_model ./checkpoints/cspresnet50.onnx +python3 ../../ixrt_common/export_mmcls.py \ + --cfg ./mmpretrain/configs/cspnet/cspresnet50_8xb32_in1k.py \ + --weight /root/data/checkpoints/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth \ + --output cspresnet50.onnx +onnxsim cspresnet50.onnx checkpoints/cspresnet50.onnx \ No newline at end of file diff --git a/models/cv/classification/cspresnet50/ixrt/export_onnx.py b/models/cv/classification/cspresnet50/ixrt/export_onnx.py deleted file mode 100644 index 9ba381cc..00000000 --- a/models/cv/classification/cspresnet50/ixrt/export_onnx.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from mmcls.apis import init_model -import argparse -import torch -import onnx - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--config_file", type=str) - parser.add_argument("--checkpoint_file", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -device='cuda:0' -class Model(torch.nn.Module): - def __init__(self): - super().__init__() - self.model = init_model(config_file, checkpoint_file, device='cuda:0') #.switch_to_deploy() - - def forward(self, x): - feat = self.model.backbone(x) - feat = self.model.neck(feat[0]) - out_head = self.model.head.fc(feat) - return out_head - -args = parse_args() -config_file = args.config_file -checkpoint_file = args.checkpoint_file -model = Model().eval() -x = torch.zeros(32, 3, 224, 224).to(device) -with torch.no_grad(): - output = model(x) - -################ pytorch onnx 模型导出 -print ("start transfer model to onnx") -torch.onnx.export(model, - x, - args.output_model, - input_names=["input"], - output_names=["output"], - do_constant_folding=True, - opset_version=12, -) - -print ("end transfer model to onnx") - -import onnx -import onnxsim -from mmcv import digit_version - -min_required_version = '0.4.0' -assert digit_version(onnxsim.__version__) >= digit_version( - min_required_version -), f'Requires to install onnxsim>={min_required_version}' - -model_opt, check_ok = onnxsim.simplify(args.output_model) -if check_ok: - onnx.save(model_opt, args.output_model) - print(f'Successfully simplified ONNX model: {args.output_model}') -else: - print('Failed to simplify ONNX model.') diff --git a/models/cv/classification/deit_tiny/ixrt/README.md b/models/cv/classification/deit_tiny/ixrt/README.md index 73362c8b..d47ad850 100644 --- a/models/cv/classification/deit_tiny/ixrt/README.md +++ b/models/cv/classification/deit_tiny/ixrt/README.md @@ -39,7 +39,7 @@ git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git mkdir checkpoints # export onnx model -python3 export.py --cfg mmpretrain/configs/deit/deit-tiny_pt-4xb256_in1k.py --weight deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth --output checkpoints/deit_tiny.onnx +python3 ../../ixrt_common/export_mmcls.py --cfg mmpretrain/configs/deit/deit-tiny_pt-4xb256_in1k.py --weight deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth --output checkpoints/deit_tiny.onnx # Use onnxsim optimize onnx model onnxsim checkpoints/deit_tiny.onnx checkpoints/deit_tiny_opt.onnx diff --git a/models/cv/classification/deit_tiny/ixrt/ci/prepare.sh b/models/cv/classification/deit_tiny/ixrt/ci/prepare.sh index 0cb06a89..31c78d8f 100644 --- a/models/cv/classification/deit_tiny/ixrt/ci/prepare.sh +++ b/models/cv/classification/deit_tiny/ixrt/ci/prepare.sh @@ -29,5 +29,5 @@ pip install -r ../../ixrt_common/requirements.txt pip install mmcv==1.5.3 mmcls==0.24.0 unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ mkdir -p checkpoints -python3 export.py --cfg mmpretrain/configs/deit/deit-tiny_pt-4xb256_in1k.py --weight deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth --output checkpoints/deit_tiny.onnx +python3 ../../ixrt_common/export_mmcls.py --cfg mmpretrain/configs/deit/deit-tiny_pt-4xb256_in1k.py --weight deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth --output checkpoints/deit_tiny.onnx onnxsim checkpoints/deit_tiny.onnx checkpoints/deit_tiny_opt.onnx \ No newline at end of file diff --git a/models/cv/classification/deit_tiny/ixrt/export.py b/models/cv/classification/deit_tiny/ixrt/export.py deleted file mode 100644 index 0078670f..00000000 --- a/models/cv/classification/deit_tiny/ixrt/export.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import argparse - -import torch -from mmcls.apis import init_model - -class Model(torch.nn.Module): - def __init__(self, config_file, checkpoint_file): - super().__init__() - self.model = init_model(config_file, checkpoint_file, device="cpu") - - def forward(self, x): - feat = self.model.backbone(x) - head = self.model.head.pre_logits(feat) - out_head = self.model.head.layers.head(head) - return out_head - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--cfg", - type=str, - required=True, - help="model config file.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - config_file = args.cfg - checkpoint_file = args.weight - model = Model(config_file, checkpoint_file).eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = dynamic_axes, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == '__main__': - main() - diff --git a/models/cv/classification/densenet161/ixrt/README.md b/models/cv/classification/densenet161/ixrt/README.md index 084aaf1c..8ed37f4c 100644 --- a/models/cv/classification/densenet161/ixrt/README.md +++ b/models/cv/classification/densenet161/ixrt/README.md @@ -33,7 +33,7 @@ pip3 install -r ../../ixrt_common/requirements.txt ```bash mkdir checkpoints -python3 export.py --weight densenet161-8d451a50.pth --output checkpoints/densenet161.onnx +python3 ../../ixrt_common/export.py --model-name densenet161 --weight densenet161-8d451a50.pth --output checkpoints/densenet161.onnx ``` ## Model Inference diff --git a/models/cv/classification/densenet161/ixrt/ci/prepare.sh b/models/cv/classification/densenet161/ixrt/ci/prepare.sh index 0c715e25..b87edd94 100644 --- a/models/cv/classification/densenet161/ixrt/ci/prepare.sh +++ b/models/cv/classification/densenet161/ixrt/ci/prepare.sh @@ -27,4 +27,4 @@ fi pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export.py --weight /root/data/checkpoints/densenet161-8d451a50.pth --output checkpoints/densenet161.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name densenet161 --weight /root/data/checkpoints/densenet161-8d451a50.pth --output checkpoints/densenet161.onnx \ No newline at end of file diff --git a/models/cv/classification/densenet161/ixrt/export.py b/models/cv/classification/densenet161/ixrt/export.py deleted file mode 100644 index 2e696e96..00000000 --- a/models/cv/classification/densenet161/ixrt/export.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse -import re - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.densenet161(weights=False) - - state_dict = torch.load(args.weight) - - pattern = re.compile(r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$' - ) - for key in list(state_dict.keys()): - res = pattern.match(key) - if res: - new_key = res.group(1) + res.group(2) - state_dict[new_key] = state_dict[key] - del state_dict[key] - - model.load_state_dict(state_dict) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = None, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/densenet169/ixrt/README.md b/models/cv/classification/densenet169/ixrt/README.md index 079b3eab..05ae5247 100644 --- a/models/cv/classification/densenet169/ixrt/README.md +++ b/models/cv/classification/densenet169/ixrt/README.md @@ -34,7 +34,7 @@ pip3 install -r ../../ixrt_common/requirements.txt ```bash mkdir checkpoints -python3 export.py --weight densenet169-b2777c0a.pth --output checkpoints/densenet169.onnx +python3 ../../ixrt_common/export.py --model-name densenet169 --weight densenet169-b2777c0a.pth --output checkpoints/densenet169.onnx ``` ## Model Inference diff --git a/models/cv/classification/densenet169/ixrt/ci/prepare.sh b/models/cv/classification/densenet169/ixrt/ci/prepare.sh index 8e35d120..83b33dc3 100644 --- a/models/cv/classification/densenet169/ixrt/ci/prepare.sh +++ b/models/cv/classification/densenet169/ixrt/ci/prepare.sh @@ -27,4 +27,4 @@ fi pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export.py --weight /root/data/checkpoints/densenet169-b2777c0a.pth --output checkpoints/densenet169.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name densenet169 --weight /root/data/checkpoints/densenet169-b2777c0a.pth --output checkpoints/densenet169.onnx \ No newline at end of file diff --git a/models/cv/classification/densenet169/ixrt/export.py b/models/cv/classification/densenet169/ixrt/export.py deleted file mode 100644 index dd3743c9..00000000 --- a/models/cv/classification/densenet169/ixrt/export.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse -import re - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.densenet169(weights=False) - - state_dict = torch.load(args.weight) - - pattern = re.compile(r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$' - ) - for key in list(state_dict.keys()): - res = pattern.match(key) - if res: - new_key = res.group(1) + res.group(2) - state_dict[new_key] = state_dict[key] - del state_dict[key] - - model.load_state_dict(state_dict) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = None, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/densenet201/ixrt/README.md b/models/cv/classification/densenet201/ixrt/README.md index fa772090..30729598 100644 --- a/models/cv/classification/densenet201/ixrt/README.md +++ b/models/cv/classification/densenet201/ixrt/README.md @@ -34,7 +34,7 @@ pip3 install -r ../../ixrt_common/requirements.txt ```bash mkdir checkpoints -python3 export.py --weight densenet201-c1103571.pth --output checkpoints/densenet201.onnx +python3 ../../ixrt_common/export.py --model-name densenet201 --weight densenet201-c1103571.pth --output checkpoints/densenet201.onnx ``` ## Model Inference diff --git a/models/cv/classification/densenet201/ixrt/ci/prepare.sh b/models/cv/classification/densenet201/ixrt/ci/prepare.sh index 41824bd3..01ba4c64 100644 --- a/models/cv/classification/densenet201/ixrt/ci/prepare.sh +++ b/models/cv/classification/densenet201/ixrt/ci/prepare.sh @@ -27,4 +27,4 @@ fi pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export.py --weight densenet201-c1103571.pth --output checkpoints/densenet201.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name densenet201 --weight densenet201-c1103571.pth --output checkpoints/densenet201.onnx \ No newline at end of file diff --git a/models/cv/classification/densenet201/ixrt/export.py b/models/cv/classification/densenet201/ixrt/export.py deleted file mode 100644 index 66019547..00000000 --- a/models/cv/classification/densenet201/ixrt/export.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse -import re - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.densenet201(weights=False) - - state_dict = torch.load(args.weight) - - pattern = re.compile(r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$' - ) - for key in list(state_dict.keys()): - res = pattern.match(key) - if res: - new_key = res.group(1) + res.group(2) - state_dict[new_key] = state_dict[key] - del state_dict[key] - - model.load_state_dict(state_dict) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = dynamic_axes, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/efficientnet_v2/ixrt/README.md b/models/cv/classification/efficientnet_v2/ixrt/README.md index f92fce16..f86bbaff 100755 --- a/models/cv/classification/efficientnet_v2/ixrt/README.md +++ b/models/cv/classification/efficientnet_v2/ixrt/README.md @@ -16,7 +16,7 @@ incorporates a series of enhancement strategies to further boost performance. ### Prepare Resources -Pretrained model: +Pretrained model: Dataset: to download the validation dataset. diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/README.md b/models/cv/classification/efficientnetv2_rw_t/ixrt/README.md index b65145f4..2318347a 100644 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/README.md +++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/README.md @@ -35,7 +35,7 @@ pip install timm ```bash mkdir checkpoints -python3 export.py --weight efficientnetv2_t_agc-3620981a.pth --output checkpoints/efficientnetv2_rw_t.onnx +python3 ../../ixrt_common/export_timm.py --model-name efficientnetv2_rw_t --weight efficientnetv2_t_agc-3620981a.pth --output checkpoints/efficientnetv2_rw_t.onnx ``` ## Model Inference diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/ci/prepare.sh b/models/cv/classification/efficientnetv2_rw_t/ixrt/ci/prepare.sh index 1c968041..b2f76a22 100644 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/ci/prepare.sh +++ b/models/cv/classification/efficientnetv2_rw_t/ixrt/ci/prepare.sh @@ -28,4 +28,4 @@ fi pip install -r ../../ixrt_common/requirements.txt pip install timm mkdir checkpoints -python3 export.py --weight efficientnetv2_t_agc-3620981a.pth --output checkpoints/efficientnetv2_rw_t.onnx \ No newline at end of file +python3 ../../ixrt_common/export_timm.py --model-name efficientnetv2_rw_t --weight efficientnetv2_t_agc-3620981a.pth --output checkpoints/efficientnetv2_rw_t.onnx \ No newline at end of file diff --git a/models/cv/classification/hrnet_w18/ixrt/README.md b/models/cv/classification/hrnet_w18/ixrt/README.md index f03a9e47..92aeaa06 100644 --- a/models/cv/classification/hrnet_w18/ixrt/README.md +++ b/models/cv/classification/hrnet_w18/ixrt/README.md @@ -28,14 +28,18 @@ yum install -y mesa-libGL apt install -y libgl1-mesa-glx pip3 install -r ../../ixrt_common/requirements.txt -pip3 install mmpretrain +pip3 install mmcv==1.5.3 mmcls==0.24.0 ``` ### Model Conversion ```bash +# git clone mmpretrain +git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git + mkdir checkpoints -python3 export_onnx.py --output_model checkpoints/hrnet_w18.onnx +# export onnx model +python3 ../../ixrt_common/export_mmcls.py --cfg mmpretrain/configs/hrnet/hrnet-w18_4xb32_in1k.py --weight hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth --output checkpoints/hrnet_w18.onnx ``` ## Model Inference diff --git a/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh b/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh index 07795f2a..f60ae0b4 100644 --- a/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh +++ b/models/cv/classification/hrnet_w18/ixrt/ci/prepare.sh @@ -26,8 +26,7 @@ else fi pip install -r ../../ixrt_common/requirements.txt -pip3 install mmpretrain +pip install mmcv==1.5.3 mmcls==0.24.0 +unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ mkdir checkpoints -mkdir -p /root/.cache/torch/hub/checkpoints/ -ln -s /root/data/checkpoints/hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth /root/.cache/torch/hub/checkpoints/hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth -python3 export_onnx.py --output_model checkpoints/hrnet_w18.onnx \ No newline at end of file +python3 ../../ixrt_common/export_mmcls.py --cfg mmpretrain/configs/hrnet/hrnet-w18_4xb32_in1k.py --weight hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth --output checkpoints/hrnet_w18.onnx \ No newline at end of file diff --git a/models/cv/classification/hrnet_w18/ixrt/export_onnx.py b/models/cv/classification/hrnet_w18/ixrt/export_onnx.py deleted file mode 100644 index ceb3e7e1..00000000 --- a/models/cv/classification/hrnet_w18/ixrt/export_onnx.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# ------------------------------------------------------------------------------ -# Copyright (c) Microsoft -# Licensed under the MIT License. -# Written by Bin Xiao (leoxiaobin@gmail.com) -# Modified by Bowen Cheng (bcheng9@illinois.edu) -# ------------------------------------------------------------------------------ - -import torch -from mmpretrain import get_model -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = get_model('hrnet-w18_3rdparty_8xb32_in1k', pretrained=True) -model.cuda() -model.eval() -input = torch.randn(32, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/README.md b/models/cv/classification/inception_v3/ixrt/README.md index 7beb9105..3f3f3977 100755 --- a/models/cv/classification/inception_v3/ixrt/README.md +++ b/models/cv/classification/inception_v3/ixrt/README.md @@ -35,7 +35,7 @@ pip3 install -r ../../ixrt_common/requirements.txt ```bash mkdir checkpoints -python3 export_onnx.py --origin_model inception_v3_google-0cc3c7bd.pth --output_model checkpoints/inception_v3.onnx +python3 ../../ixrt_common/export.py --model-name inception_v3 --weight inception_v3_google-0cc3c7bd.pth --output checkpoints/inception_v3.onnx ``` ## Model Inference diff --git a/models/cv/classification/inception_v3/ixrt/ci/prepare.sh b/models/cv/classification/inception_v3/ixrt/ci/prepare.sh index 39ed126c..532c333f 100644 --- a/models/cv/classification/inception_v3/ixrt/ci/prepare.sh +++ b/models/cv/classification/inception_v3/ixrt/ci/prepare.sh @@ -27,4 +27,4 @@ fi pip install -r ../../ixrt_common/requirements.txt mkdir checkpoints -python3 export_onnx.py --origin_model inception_v3_google-0cc3c7bd.pth --output_model checkpoints/inception_v3.onnx \ No newline at end of file +python3 ../../ixrt_common/export.py --model-name inception_v3 --weight inception_v3_google-0cc3c7bd.pth --output checkpoints/inception_v3.onnx \ No newline at end of file diff --git a/models/cv/classification/inception_v3/ixrt/export_onnx.py b/models/cv/classification/inception_v3/ixrt/export_onnx.py deleted file mode 100644 index c5c188ea..00000000 --- a/models/cv/classification/inception_v3/ixrt/export_onnx.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--origin_model", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output_model", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.inception_v3() - model.load_state_dict(torch.load(args.origin_model)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dummy_input = torch.randn(32, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output_model, - input_names = input_names, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() diff --git a/models/cv/classification/ixrt_common/config/DEIT_TINY_CONFIG b/models/cv/classification/ixrt_common/config/DEIT_TINY_CONFIG index 2464e4f5..b4137668 100644 --- a/models/cv/classification/ixrt_common/config/DEIT_TINY_CONFIG +++ b/models/cv/classification/ixrt_common/config/DEIT_TINY_CONFIG @@ -26,7 +26,7 @@ ORIGINE_MODEL=deit_tiny_opt.onnx # QUANT_SEED : 随机种子 保证量化结果可复现 # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 QUANT_OBSERVER=minmax -QUANT_BATCHSIZE=1 +QUANT_BATCHSIZE=32 QUANT_STEP=32 QUANT_SEED=42 DISABLE_QUANT_LIST= diff --git a/models/cv/classification/ixrt_common/config/RESNETV1D50_CONFIG b/models/cv/classification/ixrt_common/config/RESNETV1D50_CONFIG index 2c87e1d7..0bf91b04 100644 --- a/models/cv/classification/ixrt_common/config/RESNETV1D50_CONFIG +++ b/models/cv/classification/ixrt_common/config/RESNETV1D50_CONFIG @@ -26,7 +26,7 @@ ORIGINE_MODEL=resnet_v1_d50.onnx # QUANT_SEED : 随机种子 保证量化结果可复现 # QUANT_EXIST_ONNX : 如果有其他来源的量化模型则填写 QUANT_OBSERVER=minmax -QUANT_BATCHSIZE=32 +QUANT_BATCHSIZE=1 QUANT_STEP=32 QUANT_SEED=42 DISABLE_QUANT_LIST= diff --git a/models/cv/classification/ixrt_common/export.py b/models/cv/classification/ixrt_common/export.py index 506ecac2..dd10b13a 100644 --- a/models/cv/classification/ixrt_common/export.py +++ b/models/cv/classification/ixrt_common/export.py @@ -16,6 +16,7 @@ import torch import torchvision import argparse +import re def parse_args(): parser = argparse.ArgumentParser() @@ -47,7 +48,19 @@ def main(): # Fallback for models that do not accept 'pretrained' parameter model = getattr(torchvision.models, args.model_name)() - model.load_state_dict(torch.load(args.weight)) + state_dict = torch.load(args.weight) + + if args.model_name in ["densenet201", "densenet161", "densenet169"]: + pattern = re.compile(r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$' + ) + for key in list(state_dict.keys()): + res = pattern.match(key) + if res: + new_key = res.group(1) + res.group(2) + state_dict[new_key] = state_dict[key] + del state_dict[key] + + model.load_state_dict(state_dict) model.eval() input_names = ['input'] diff --git a/models/cv/classification/cspdarknet53/ixrt/export.py b/models/cv/classification/ixrt_common/export_mmcls.py similarity index 73% rename from models/cv/classification/cspdarknet53/ixrt/export.py rename to models/cv/classification/ixrt_common/export_mmcls.py index 9f5514d6..044df41d 100644 --- a/models/cv/classification/cspdarknet53/ixrt/export.py +++ b/models/cv/classification/ixrt_common/export_mmcls.py @@ -24,8 +24,24 @@ class Model(torch.nn.Module): def forward(self, x): feat = self.model.backbone(x) - feat = self.model.neck(feat) - out_head = self.model.head.fc(feat[0]) + + if hasattr(self.model, 'neck') and callable(getattr(self.model, 'neck', None)): + feat = self.model.neck(feat) + + if hasattr(self.model.head, 'fc') and isinstance(self.model.head.fc, torch.nn.Module): + out_head = self.model.head.fc(feat[0]) + elif ( + hasattr(self.model.head, 'pre_logits') + and hasattr(self.model.head, 'layers') + and hasattr(self.model.head.layers, 'head') + ): + head = self.model.head.pre_logits(feat) + out_head = self.model.head.layers.head(head) + else: + raise NotImplementedError( + "当前模型 head 结构不被支持,请确保其包含 'fc' 或 'pre_logits + layers.head'" + ) + return out_head def parse_args(): diff --git a/models/cv/classification/efficientnetv2_rw_t/ixrt/export.py b/models/cv/classification/ixrt_common/export_timm.py similarity index 85% rename from models/cv/classification/efficientnetv2_rw_t/ixrt/export.py rename to models/cv/classification/ixrt_common/export_timm.py index 3c3f579c..25e995a8 100644 --- a/models/cv/classification/efficientnetv2_rw_t/ixrt/export.py +++ b/models/cv/classification/ixrt_common/export_timm.py @@ -20,6 +20,11 @@ import argparse def parse_args(): parser = argparse.ArgumentParser() + parser.add_argument("--model-name", + type=str, + required=True, + help="Name of the model.") + parser.add_argument("--weight", type=str, required=True, @@ -35,8 +40,9 @@ def parse_args(): def main(): args = parse_args() + print(f"Loading model: {args.model_name}...") - model = timm.create_model('efficientnetv2_rw_t', checkpoint_path=args.weight) + model = timm.create_model(args.model_name, checkpoint_path=args.weight) model.eval() dummy_input = torch.randn([32, 3, 288, 288]) diff --git a/models/cv/classification/repvgg/ixrt/README.md b/models/cv/classification/repvgg/ixrt/README.md index 335ac98f..cd35ad32 100644 --- a/models/cv/classification/repvgg/ixrt/README.md +++ b/models/cv/classification/repvgg/ixrt/README.md @@ -36,10 +36,12 @@ pip3 install mmcls==0.24.0 mmcv==1.5.3 mkdir checkpoints git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git -python3 export_onnx.py \ - --config_file ./mmpretrain/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py \ - --checkpoint_file https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth \ - --output_model ./checkpoints/repvgg_A0.onnx +python3 ../../ixrt_common/export_mmcls.py \ + --cfg ./mmpretrain/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py \ + --weight repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth \ + --output repvgg_A0.onnx + +onnxsim repvgg_A0.onnx checkpoints/repvgg_A0.onnx ``` ## Model Inference diff --git a/models/cv/classification/repvgg/ixrt/ci/prepare.sh b/models/cv/classification/repvgg/ixrt/ci/prepare.sh index 55ad046b..3925cf31 100644 --- a/models/cv/classification/repvgg/ixrt/ci/prepare.sh +++ b/models/cv/classification/repvgg/ixrt/ci/prepare.sh @@ -29,7 +29,7 @@ pip install -r ../../ixrt_common/requirements.txt pip3 install mmcls==0.24.0 mmcv==1.5.3 unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ mkdir -p checkpoints -python3 export_onnx.py \ - --config_file ./mmpretrain/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py \ - --checkpoint_file /root/data/checkpoints/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth \ - --output_model ./checkpoints/repvgg_A0.onnx \ No newline at end of file +python3 ../../ixrt_common/export_mmcls.py \ + --cfg ./mmpretrain/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py \ + --weight /root/data/checkpoints/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth \ + --output ./checkpoints/repvgg_A0.onnx \ No newline at end of file diff --git a/models/cv/classification/repvgg/ixrt/export_onnx.py b/models/cv/classification/repvgg/ixrt/export_onnx.py deleted file mode 100644 index 9ba381cc..00000000 --- a/models/cv/classification/repvgg/ixrt/export_onnx.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from mmcls.apis import init_model -import argparse -import torch -import onnx - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--config_file", type=str) - parser.add_argument("--checkpoint_file", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -device='cuda:0' -class Model(torch.nn.Module): - def __init__(self): - super().__init__() - self.model = init_model(config_file, checkpoint_file, device='cuda:0') #.switch_to_deploy() - - def forward(self, x): - feat = self.model.backbone(x) - feat = self.model.neck(feat[0]) - out_head = self.model.head.fc(feat) - return out_head - -args = parse_args() -config_file = args.config_file -checkpoint_file = args.checkpoint_file -model = Model().eval() -x = torch.zeros(32, 3, 224, 224).to(device) -with torch.no_grad(): - output = model(x) - -################ pytorch onnx 模型导出 -print ("start transfer model to onnx") -torch.onnx.export(model, - x, - args.output_model, - input_names=["input"], - output_names=["output"], - do_constant_folding=True, - opset_version=12, -) - -print ("end transfer model to onnx") - -import onnx -import onnxsim -from mmcv import digit_version - -min_required_version = '0.4.0' -assert digit_version(onnxsim.__version__) >= digit_version( - min_required_version -), f'Requires to install onnxsim>={min_required_version}' - -model_opt, check_ok = onnxsim.simplify(args.output_model) -if check_ok: - onnx.save(model_opt, args.output_model) - print(f'Successfully simplified ONNX model: {args.output_model}') -else: - print('Failed to simplify ONNX model.') diff --git a/models/cv/classification/resnetv1d50/ixrt/README.md b/models/cv/classification/resnetv1d50/ixrt/README.md index b81fadc6..529ba5ff 100644 --- a/models/cv/classification/resnetv1d50/ixrt/README.md +++ b/models/cv/classification/resnetv1d50/ixrt/README.md @@ -28,14 +28,19 @@ yum install -y mesa-libGL apt install -y libgl1-mesa-glx pip3 install -r ../../ixrt_common/requirments.txt -pip3 install mmpretrain +pip3 install mmcv==1.5.3 mmcls==0.24.0 ``` ### Model Conversion ```bash +# git clone mmpretrain +git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git + mkdir checkpoints -python3 export_onnx.py --output_model checkpoints/resnet_v1_d50.onnx + +# export onnx model +python3 ../../ixrt_common/export_mmcls.py --cfg mmpretrain/configs/resnet/resnetv1d50_b32x8_imagenet.py --weight resnetv1d50_b32x8_imagenet_20210531-db14775a.pth --output checkpoints/resnet_v1_d50.onnx ``` ## Model Inference @@ -45,7 +50,7 @@ export PROJ_DIR=./ export DATASETS_DIR=/path/to/imagenet_val/ export CHECKPOINTS_DIR=./checkpoints export RUN_DIR=../../ixrt_common/ -export CONFIG_DIR=../../ixrt_common/config/config/RESNETV1D50_CONFIG +export CONFIG_DIR=../../ixrt_common/config/RESNETV1D50_CONFIG ``` ### FP16 diff --git a/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh index 30873aff..22566b28 100644 --- a/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh +++ b/models/cv/classification/resnetv1d50/ixrt/ci/prepare.sh @@ -26,8 +26,7 @@ else fi pip install -r ../../ixrt_common/requirements.txt -pip install mmpretrain +pip install mmcv==1.5.3 mmcls==0.24.0 +unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ mkdir checkpoints -mkdir -p /root/.cache/torch/hub/checkpoints/ -ln -s /root/data/checkpoints/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth /root/.cache/torch/hub/checkpoints/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth -python3 export_onnx.py --output_model checkpoints/resnet_v1_d50.onnx \ No newline at end of file +python3 ../../ixrt_common/export_mmcls.py --cfg mmpretrain/configs/resnet/resnetv1d50_b32x8_imagenet.py --weight resnetv1d50_b32x8_imagenet_20210531-db14775a.pth --output checkpoints/resnet_v1_d50.onnx \ No newline at end of file diff --git a/models/cv/classification/resnetv1d50/ixrt/export_onnx.py b/models/cv/classification/resnetv1d50/ixrt/export_onnx.py deleted file mode 100644 index e1ef8d61..00000000 --- a/models/cv/classification/resnetv1d50/ixrt/export_onnx.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -import torch -from mmpretrain import get_model -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -args = parse_args() -model = get_model('resnetv1d50_8xb32_in1k', pretrained=True) -model.cuda() -model.eval() -input = torch.randn(32, 3, 224, 224, device='cuda') -export_onnx_file = args.output_model - -torch.onnx.export(model, - input, - export_onnx_file, - export_params=True, - opset_version=11, - do_constant_folding=True, - input_names = ['input'], - output_names = ['output'],) -print(" ") -print('Model has been converted to ONNX') -print("exit") -exit() diff --git a/models/cv/classification/shufflenet_v1/ixrt/README.md b/models/cv/classification/shufflenet_v1/ixrt/README.md index efa4c981..9c97af0d 100644 --- a/models/cv/classification/shufflenet_v1/ixrt/README.md +++ b/models/cv/classification/shufflenet_v1/ixrt/README.md @@ -38,10 +38,10 @@ pip3 install mmcls==0.24.0 mmcv==1.5.3 mkdir checkpoints git clone -b v0.24.0 https://github.com/open-mmlab/mmpretrain.git -python3 export_onnx.py \ - --config_file ./mmpretrain/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py \ - --checkpoint_file ./shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth \ - --output_model ./checkpoints/shufflenet_v1.onnx +python3 ../../ixrt_common/export_mmcls.py \ + --cfg ./mmpretrain/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py \ + --weight ./shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth \ + --output ./checkpoints/shufflenet_v1.onnx ``` ## Model Inference diff --git a/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh b/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh index 4f7016ae..b68ada23 100644 --- a/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh +++ b/models/cv/classification/shufflenet_v1/ixrt/ci/prepare.sh @@ -29,7 +29,7 @@ pip install -r ../../ixrt_common/requirements.txt pip3 install mmcls==0.24.0 mmcv==1.5.3 mkdir -p checkpoints unzip -q /root/data/repos/mmpretrain-0.24.0.zip -d ./ -python3 export_onnx.py \ ---config_file ./mmpretrain/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py \ ---checkpoint_file /root/data/checkpoints/shufflenet_v1.pth \ ---output_model ./checkpoints/shufflenet_v1.onnx \ No newline at end of file +python3 ../../ixrt_common/export_mmcls.py \ +--cfg ./mmpretrain/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py \ +--weight /root/data/checkpoints/shufflenet_v1.pth \ +--output ./checkpoints/shufflenet_v1.onnx \ No newline at end of file diff --git a/models/cv/classification/shufflenet_v1/ixrt/export_onnx.py b/models/cv/classification/shufflenet_v1/ixrt/export_onnx.py deleted file mode 100644 index 9ba381cc..00000000 --- a/models/cv/classification/shufflenet_v1/ixrt/export_onnx.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from mmcls.apis import init_model -import argparse -import torch -import onnx - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--config_file", type=str) - parser.add_argument("--checkpoint_file", type=str) - parser.add_argument("--output_model", type=str) - args = parser.parse_args() - return args - -device='cuda:0' -class Model(torch.nn.Module): - def __init__(self): - super().__init__() - self.model = init_model(config_file, checkpoint_file, device='cuda:0') #.switch_to_deploy() - - def forward(self, x): - feat = self.model.backbone(x) - feat = self.model.neck(feat[0]) - out_head = self.model.head.fc(feat) - return out_head - -args = parse_args() -config_file = args.config_file -checkpoint_file = args.checkpoint_file -model = Model().eval() -x = torch.zeros(32, 3, 224, 224).to(device) -with torch.no_grad(): - output = model(x) - -################ pytorch onnx 模型导出 -print ("start transfer model to onnx") -torch.onnx.export(model, - x, - args.output_model, - input_names=["input"], - output_names=["output"], - do_constant_folding=True, - opset_version=12, -) - -print ("end transfer model to onnx") - -import onnx -import onnxsim -from mmcv import digit_version - -min_required_version = '0.4.0' -assert digit_version(onnxsim.__version__) >= digit_version( - min_required_version -), f'Requires to install onnxsim>={min_required_version}' - -model_opt, check_ok = onnxsim.simplify(args.output_model) -if check_ok: - onnx.save(model_opt, args.output_model) - print(f'Successfully simplified ONNX model: {args.output_model}') -else: - print('Failed to simplify ONNX model.') diff --git a/models/cv/classification/shufflenetv2_x2_0/ixrt/export.py b/models/cv/classification/shufflenetv2_x2_0/ixrt/export.py deleted file mode 100644 index 3a3c15a8..00000000 --- a/models/cv/classification/shufflenetv2_x2_0/ixrt/export.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import torch -import torchvision -import argparse - -def parse_args(): - parser = argparse.ArgumentParser() - - parser.add_argument("--weight", - type=str, - required=True, - help="pytorch model weight.") - - parser.add_argument("--output", - type=str, - required=True, - help="export onnx model path.") - - args = parser.parse_args() - return args - -def main(): - args = parse_args() - - model = torchvision.models.shufflenet_v2_x2_0() - model.load_state_dict(torch.load(args.weight)) - model.eval() - - input_names = ['input'] - output_names = ['output'] - dynamic_axes = {'input': {0: '-1'}, 'output': {0: '-1'}} - dummy_input = torch.randn(1, 3, 224, 224) - - torch.onnx.export( - model, - dummy_input, - args.output, - input_names = input_names, - dynamic_axes = dynamic_axes, - output_names = output_names, - opset_version=13 - ) - - print("Export onnx model successfully! ") - -if __name__ == "__main__": - main() -- Gitee