From 445cf59d947dd4233ca52dc9bd077c3a5b7390d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=85=83=E6=9D=B0?= Date: Wed, 27 Aug 2025 03:43:01 +0000 Subject: [PATCH] =?UTF-8?q?K=E8=BD=B4=E9=94=99=E5=B3=B0=E6=A0=B7=E4=BE=8B?= =?UTF-8?q?=E9=80=82=E9=85=8D=E4=BB=A5=E5=8F=8A=E5=B8=B8=E9=87=8F=E5=8C=96?= =?UTF-8?q?=E6=A0=B7=E4=BE=8B=E6=95=B4=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kernel_launch_method_by_direct/run.sh | 5 ++ .../kernel_launch_method_by_direct/run.sh | 5 ++ .../matrix/batch_matmul_bias_no_batch/run.sh | 5 ++ examples/matrix/batch_matmul_tscm/run.sh | 5 ++ .../kernel_launch_method_by_direct/run.sh | 5 ++ examples/matrix/matmul_a2b2share/run.sh | 5 ++ examples/matrix/matmul_async_iterate/run.sh | 5 ++ .../matrix/matmul_async_iterate_all/run.sh | 5 ++ examples/matrix/matmul_callback/run.sh | 5 ++ examples/matrix/matmul_channelsplit/run.sh | 5 ++ examples/matrix/matmul_constant/README.md | 8 ++- .../op_kernel/matmul_constant_impl.h | 23 +++++- examples/matrix/matmul_constant/run.sh | 5 ++ .../matrix/matmul_constant/testcase/case.csv | 2 +- examples/matrix/matmul_gemv/run.sh | 5 ++ examples/matrix/matmul_ibshareAB/run.sh | 5 ++ examples/matrix/matmul_ibshareB/run.sh | 5 ++ examples/matrix/matmul_int4/run.sh | 5 ++ .../matmul_k_reorder_load/CMakeLists.txt | 1 + .../matrix/matmul_k_reorder_load/README.md | 7 +- .../matmul_k_reorder_load/cmake/cpu_lib.cmake | 4 ++ .../matmul_k_reorder_load/cmake/npu_lib.cmake | 1 + .../matmul_k_reorder_load_custom_kernel.h | 5 ++ examples/matrix/matmul_k_reorder_load/run.sh | 18 ++++- .../matmul_k_reorder_load/testcase/case.csv | 2 +- examples/matrix/matmul_l0c_extend/run.sh | 5 ++ examples/matrix/matmul_l0cache/run.sh | 5 ++ examples/matrix/matmul_l2cache/run.sh | 5 ++ .../matmul_mixdualmaster_custom_kernel.h | 2 +- examples/matrix/matmul_mixdualmaster/run.sh | 5 ++ examples/matrix/matmul_mndb/run.sh | 5 ++ examples/matrix/matmul_nbuffer33/run.sh | 5 ++ examples/matrix/matmul_nd_align/run.sh | 5 ++ examples/matrix/matmul_nz/run.sh | 5 ++ examples/matrix/matmul_partial_output/run.sh | 5 ++ examples/matrix/matmul_perf/CMakeLists.txt | 6 -- examples/matrix/matmul_perf/README.md | 72 ++----------------- .../matrix/matmul_perf/cmake/cpu_lib.cmake | 6 -- .../matrix/matmul_perf/cmake/npu_lib.cmake | 6 -- .../op_kernel/matmul_perf_custom_kernel.cpp | 7 -- .../op_kernel/matmul_perf_custom_kernel.h | 38 ---------- examples/matrix/matmul_perf/run.sh | 9 ++- examples/matrix/matmul_preload/run.sh | 5 ++ examples/matrix/matmul_quant/run.sh | 5 ++ examples/matrix/matmul_sparse/run.sh | 5 ++ examples/matrix/matmul_splitk/run.sh | 5 ++ examples/matrix/matmul_triangle/run.sh | 5 ++ examples/matrix/matmul_tscm/run.sh | 5 ++ examples/matrix/matmul_unaligned/run.sh | 5 ++ examples/matrix/matmul_unitflag/run.sh | 5 ++ 50 files changed, 232 insertions(+), 145 deletions(-) diff --git a/examples/matrix/basic_block_matmul/kernel_launch_method_by_direct/run.sh b/examples/matrix/basic_block_matmul/kernel_launch_method_by_direct/run.sh index 9f3c58bb..e4396318 100644 --- a/examples/matrix/basic_block_matmul/kernel_launch_method_by_direct/run.sh +++ b/examples/matrix/basic_block_matmul/kernel_launch_method_by_direct/run.sh @@ -27,6 +27,11 @@ if [[ ! "${SOC_VERSION}" =~ ^Ascend910 ]]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/batch_matmul/kernel_launch_method_by_direct/run.sh b/examples/matrix/batch_matmul/kernel_launch_method_by_direct/run.sh index f37c3548..a95e5d1b 100644 --- a/examples/matrix/batch_matmul/kernel_launch_method_by_direct/run.sh +++ b/examples/matrix/batch_matmul/kernel_launch_method_by_direct/run.sh @@ -35,6 +35,11 @@ if [[ ! "${SOC_VERSION}" =~ ^Ascend910 ]]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/batch_matmul_bias_no_batch/run.sh b/examples/matrix/batch_matmul_bias_no_batch/run.sh index b23451b8..4652cec2 100644 --- a/examples/matrix/batch_matmul_bias_no_batch/run.sh +++ b/examples/matrix/batch_matmul_bias_no_batch/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/batch_matmul_tscm/run.sh b/examples/matrix/batch_matmul_tscm/run.sh index 21e667d5..aec9d4ce 100644 --- a/examples/matrix/batch_matmul_tscm/run.sh +++ b/examples/matrix/batch_matmul_tscm/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul/kernel_launch_method_by_direct/run.sh b/examples/matrix/matmul/kernel_launch_method_by_direct/run.sh index 58864042..d7c77e32 100644 --- a/examples/matrix/matmul/kernel_launch_method_by_direct/run.sh +++ b/examples/matrix/matmul/kernel_launch_method_by_direct/run.sh @@ -35,6 +35,11 @@ if [[ ! "${SOC_VERSION}" =~ ^Ascend ]]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_a2b2share/run.sh b/examples/matrix/matmul_a2b2share/run.sh index ff29b6de..37b063b4 100644 --- a/examples/matrix/matmul_a2b2share/run.sh +++ b/examples/matrix/matmul_a2b2share/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_async_iterate/run.sh b/examples/matrix/matmul_async_iterate/run.sh index 34f794f7..11ff84f6 100644 --- a/examples/matrix/matmul_async_iterate/run.sh +++ b/examples/matrix/matmul_async_iterate/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_async_iterate_all/run.sh b/examples/matrix/matmul_async_iterate_all/run.sh index c7e80377..e42cc839 100644 --- a/examples/matrix/matmul_async_iterate_all/run.sh +++ b/examples/matrix/matmul_async_iterate_all/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_callback/run.sh b/examples/matrix/matmul_callback/run.sh index 3d4ef189..1e716556 100644 --- a/examples/matrix/matmul_callback/run.sh +++ b/examples/matrix/matmul_callback/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_channelsplit/run.sh b/examples/matrix/matmul_channelsplit/run.sh index 1153cb01..29eb47af 100644 --- a/examples/matrix/matmul_channelsplit/run.sh +++ b/examples/matrix/matmul_channelsplit/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_constant/README.md b/examples/matrix/matmul_constant/README.md index 30106c66..e24cd7b6 100644 --- a/examples/matrix/matmul_constant/README.md +++ b/examples/matrix/matmul_constant/README.md @@ -63,13 +63,17 @@ MatmulConstantCustom单算子,对输入的A、B矩阵做矩阵乘和加bias偏 BASE_K }; constexpr MatmulConfig CUSTOM_CFG = GetMMConfig(shapeParams); ``` - - 通过GetMatmulApiTiling接口获取常量化Tiling信息,使用自定义MatmulConfig模板创建Matmul对象。 + - 通过GetMatmulApiTiling接口获取常量化Tiling信息。 + ``` + auto constantCFG = AscendC::GetMatmulApiTiling(mmCFG); + ``` + - 使用自定义MatmulConfig模板创建Matmul对象。 ``` using A_TYPE = AscendC::MatmulType; using B_TYPE = AscendC::MatmulType; using C_TYPE = AscendC::MatmulType; using BIAS_TYPE = AscendC::MatmulType; - constexpr static auto CONSTANT_CFG = AscendC::GetMatmulApiTiling(CUSTOM_CFG); + constexpr static auto CONSTANT_CFG = GetCustomConstantCFG(); AscendC::Matmul matmulObj; ``` - 初始化操作,传入常量化Tiling信息。 diff --git a/examples/matrix/matmul_constant/op_kernel/matmul_constant_impl.h b/examples/matrix/matmul_constant/op_kernel/matmul_constant_impl.h index 4cdc8285..3c71febb 100644 --- a/examples/matrix/matmul_constant/op_kernel/matmul_constant_impl.h +++ b/examples/matrix/matmul_constant/op_kernel/matmul_constant_impl.h @@ -26,7 +26,6 @@ constexpr MatmulShapeParams shapeParams = { MAX_M, BASE_M, BASE_N, BASE_K }; -constexpr MatmulConfig CUSTOM_CFG = GetMMConfig(shapeParams); struct MatmulProblemShape { int32_t useCoreNum; int32_t m; @@ -38,6 +37,26 @@ struct MatmulProblemShape { int32_t isBias; }; +constexpr int32_t TILING_DEPTH_PARAM = 8; +constexpr int32_t TILING_STEPK_PARAM = 4; +constexpr int32_t TILING_STEPMN_PARAM = 1; + +template +__aicore__ inline constexpr MatmulApiStaticTiling GetCustomConstantCFG() +{ + MatmulConfig mmCFG = GetMMConfig(shapeParams); + // enable unitflag for performance comparison + mmCFG.enUnitFlag = true; + auto constantCFG = AscendC::GetMatmulApiTiling(mmCFG); + constantCFG.depthA1 = TILING_DEPTH_PARAM; + constantCFG.depthB1 = TILING_DEPTH_PARAM; + constantCFG.stepKa = TILING_STEPK_PARAM; + constantCFG.stepKb = TILING_STEPK_PARAM; + constantCFG.stepM = TILING_STEPMN_PARAM; + constantCFG.stepN = TILING_STEPMN_PARAM; + return constantCFG; +} + __aicore__ inline void CopyTiling(MatmulProblemShape *tiling, GM_ADDR tilingGM) { int32_t *ptr = reinterpret_cast(tiling); @@ -58,7 +77,7 @@ public: using B_TYPE = AscendC::MatmulType; using C_TYPE = AscendC::MatmulType; using BIAS_TYPE = AscendC::MatmulType; - constexpr static auto CONSTANT_CFG = AscendC::GetMatmulApiTiling(CUSTOM_CFG); + constexpr static auto CONSTANT_CFG = GetCustomConstantCFG(); AscendC::Matmul matmulObj; MatmulProblemShape shapes; diff --git a/examples/matrix/matmul_constant/run.sh b/examples/matrix/matmul_constant/run.sh index 631f7ba3..5c05c655 100644 --- a/examples/matrix/matmul_constant/run.sh +++ b/examples/matrix/matmul_constant/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_constant/testcase/case.csv b/examples/matrix/matmul_constant/testcase/case.csv index e8f8491a..d5adba85 100644 --- a/examples/matrix/matmul_constant/testcase/case.csv +++ b/examples/matrix/matmul_constant/testcase/case.csv @@ -1 +1 @@ -1, case001, 1024, 4096, 1024 \ No newline at end of file +1, case001, 128, 30720, 64 \ No newline at end of file diff --git a/examples/matrix/matmul_gemv/run.sh b/examples/matrix/matmul_gemv/run.sh index a74f957e..75d2dd48 100644 --- a/examples/matrix/matmul_gemv/run.sh +++ b/examples/matrix/matmul_gemv/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_ibshareAB/run.sh b/examples/matrix/matmul_ibshareAB/run.sh index 04595c3c..746f8655 100644 --- a/examples/matrix/matmul_ibshareAB/run.sh +++ b/examples/matrix/matmul_ibshareAB/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_ibshareB/run.sh b/examples/matrix/matmul_ibshareB/run.sh index 28d0e0ce..f38c0f87 100644 --- a/examples/matrix/matmul_ibshareB/run.sh +++ b/examples/matrix/matmul_ibshareB/run.sh @@ -68,6 +68,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_int4/run.sh b/examples/matrix/matmul_int4/run.sh index 00217c87..fb73e320 100644 --- a/examples/matrix/matmul_int4/run.sh +++ b/examples/matrix/matmul_int4/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_k_reorder_load/CMakeLists.txt b/examples/matrix/matmul_k_reorder_load/CMakeLists.txt index b519751f..4ebe04fe 100644 --- a/examples/matrix/matmul_k_reorder_load/CMakeLists.txt +++ b/examples/matrix/matmul_k_reorder_load/CMakeLists.txt @@ -50,6 +50,7 @@ target_compile_options(ascendc_matmul_k_reorder_load_bbit PRIVATE ) target_compile_definitions(ascendc_matmul_k_reorder_load_bbit PRIVATE + $<$:ENABLE_SPLITK_FEATURE> SOC_VERSION="${SOC_VERSION}" ) diff --git a/examples/matrix/matmul_k_reorder_load/README.md b/examples/matrix/matmul_k_reorder_load/README.md index f45ca325..f7f9d631 100644 --- a/examples/matrix/matmul_k_reorder_load/README.md +++ b/examples/matrix/matmul_k_reorder_load/README.md @@ -83,15 +83,16 @@ enableKdimReorderLoad用于配置是否使能K轴错峰加载数据。基于相 - 编译执行 ``` - bash run.sh -r [RUN_MODE] -v [SOC_VERSION] -p [IS_PERF] + bash run.sh -r [RUN_MODE] -v [SOC_VERSION] -p [IS_PERF] -e [ENABLE_FEATURE] ``` 其中脚本参数说明如下: - RUN_MODE :编译执行方式,可选择CPU调试,NPU仿真,NPU上板,对应参数分别为[cpu / sim / npu]。若需要详细了解NPU仿真相关内容,请参考[《算子开发工具msProf》](https://hiascend.com/document/redirect/CannCommunityToolMsProf)中的“工具使用”章节。 - SOC_VERSION :昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号: - Atlas A2训练系列产品/Atlas 800I A2推理产品 - - IS_PERF : 是否获取执行性能数据,当前只在NPU执行时生效,打开后性能数据显示在执行结果中,可选择关闭和开启该功能,对应参数分别为[0 / 1]。 + - IS_PERF :是否获取执行性能数据,当前只在NPU执行时生效,打开后性能数据显示在执行结果中,可选择关闭和开启该功能,对应参数分别为[0 / 1]。 + - ENABLE_FEATURE :是否使能K轴错峰加载数据,可选择关闭和开启该功能,对应参数分别为[0 / 1],默认开启。 示例如下,Ascendxxxyy请替换为实际的AI处理器型号。 ``` - bash run.sh -r cpu -v Ascendxxxyy -p 0 + bash run.sh -r cpu -v Ascendxxxyy -p 0 -e 1 ``` \ No newline at end of file diff --git a/examples/matrix/matmul_k_reorder_load/cmake/cpu_lib.cmake b/examples/matrix/matmul_k_reorder_load/cmake/cpu_lib.cmake index 244469fb..687497c4 100644 --- a/examples/matrix/matmul_k_reorder_load/cmake/cpu_lib.cmake +++ b/examples/matrix/matmul_k_reorder_load/cmake/cpu_lib.cmake @@ -20,6 +20,10 @@ target_link_libraries(ascendc_kernels_${RUN_MODE} PRIVATE tikicpulib::${SOC_VERSION} ) +target_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE + $<$:ENABLE_K_REORDER_LOAD_FEATURE> +) + target_compile_options(ascendc_kernels_${RUN_MODE} PRIVATE -g -O0 diff --git a/examples/matrix/matmul_k_reorder_load/cmake/npu_lib.cmake b/examples/matrix/matmul_k_reorder_load/cmake/npu_lib.cmake index 927d2490..456a0640 100644 --- a/examples/matrix/matmul_k_reorder_load/cmake/npu_lib.cmake +++ b/examples/matrix/matmul_k_reorder_load/cmake/npu_lib.cmake @@ -21,6 +21,7 @@ ascendc_library(ascendc_kernels_${RUN_MODE} STATIC ) ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE + $<$:ENABLE_K_REORDER_LOAD_FEATURE> -DASCENDC_DUMP -DHAVE_WORKSPACE -DHAVE_TILING diff --git a/examples/matrix/matmul_k_reorder_load/op_kernel/matmul_k_reorder_load_custom_kernel.h b/examples/matrix/matmul_k_reorder_load/op_kernel/matmul_k_reorder_load_custom_kernel.h index 74e962a1..ad5a1489 100644 --- a/examples/matrix/matmul_k_reorder_load/op_kernel/matmul_k_reorder_load_custom_kernel.h +++ b/examples/matrix/matmul_k_reorder_load/op_kernel/matmul_k_reorder_load_custom_kernel.h @@ -40,8 +40,13 @@ public: */ __aicore__ inline void Process(); // Set the penultimate parameter enableKdimReorderLoad to true to create a Matmul object +#ifdef ENABLE_K_REORDER_LOAD_FEATURE static constexpr auto CFG_MDL_REORDER = GetMDLConfig(false, false, 0, false, false, false, false, true, true, false, false, true); +#else + static constexpr auto CFG_MDL_REORDER = GetMDLConfig(false, false, 0, false, false, false, + false, true, true, false, false, false); +#endif // Enable IBSHARE parameters for matrix A and matrix B AscendC::Matmul< AscendC::MatmulType, diff --git a/examples/matrix/matmul_k_reorder_load/run.sh b/examples/matrix/matmul_k_reorder_load/run.sh index 0531859a..a53e37c1 100644 --- a/examples/matrix/matmul_k_reorder_load/run.sh +++ b/examples/matrix/matmul_k_reorder_load/run.sh @@ -9,8 +9,9 @@ # ====================================================================================================================== export IS_PERF="0" +export ENABLE_FEATURE="1" -SHORT=r:,v:,p:, +SHORT=r:,v:,p:,e:, LONG=run-mode:,soc-version:,perf:, OPTS=$(getopt -a --options $SHORT --longoptions $LONG -- "$@") eval set -- "$OPTS" @@ -26,6 +27,9 @@ do (-p | --perf ) IS_PERF="$2" shift 2;; + (-e | --enable-feature ) + ENABLE_FEATURE="$2" + shift 2;; (--) shift; break;; @@ -51,12 +55,22 @@ if [ "${IS_PERF}" != "0" ] && [ "${IS_PERF}" != "1" ]; then exit 1 fi +if [ "${ENABLE_FEATURE}" != "0" ] && [ "${ENABLE_FEATURE}" != "1" ]; then + echo "[ERROR] Unsupported ENABLE_FEATURE: ${ENABLE_FEATURE}, which can only be 0 or 1." + exit 1 +fi + # only npu mode support is_perf = 1 if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then echo "[ERROR] Unsupported IS_PERF: ${IS_PERF} while RUN_MODE is ${RUN_MODE}." exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build @@ -65,7 +79,7 @@ source $ASCEND_HOME_DIR/bin/setenv.bash export LD_LIBRARY_PATH=${ASCEND_HOME_DIR}/tools/simulator/${SOC_VERSION}/lib:$LD_LIBRARY_PATH set -euo pipefail -cmake -DRUN_MODE=${RUN_MODE} -DSOC_VERSION=${SOC_VERSION} -DASCEND_CANN_PACKAGE_PATH=${ASCEND_HOME_DIR} .. +cmake -DRUN_MODE=${RUN_MODE} -DSOC_VERSION=${SOC_VERSION} -DENABLE_FEATURE=${ENABLE_FEATURE} -DASCEND_CANN_PACKAGE_PATH=${ASCEND_HOME_DIR} .. make -j16 cd ../ diff --git a/examples/matrix/matmul_k_reorder_load/testcase/case.csv b/examples/matrix/matmul_k_reorder_load/testcase/case.csv index eb7a49f2..a1c1bf3f 100644 --- a/examples/matrix/matmul_k_reorder_load/testcase/case.csv +++ b/examples/matrix/matmul_k_reorder_load/testcase/case.csv @@ -1 +1 @@ -1, case001, 512, 2048, 6144 \ No newline at end of file +1, case001, 768, 2048, 6144 \ No newline at end of file diff --git a/examples/matrix/matmul_l0c_extend/run.sh b/examples/matrix/matmul_l0c_extend/run.sh index 4fceabf6..85a36eb1 100644 --- a/examples/matrix/matmul_l0c_extend/run.sh +++ b/examples/matrix/matmul_l0c_extend/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_l0cache/run.sh b/examples/matrix/matmul_l0cache/run.sh index e049b003..331d8a0b 100644 --- a/examples/matrix/matmul_l0cache/run.sh +++ b/examples/matrix/matmul_l0cache/run.sh @@ -35,6 +35,11 @@ if [[ ! "${SOC_VERSION}" =~ ^Ascend910 ]]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_l2cache/run.sh b/examples/matrix/matmul_l2cache/run.sh index 631f7ba3..5c05c655 100644 --- a/examples/matrix/matmul_l2cache/run.sh +++ b/examples/matrix/matmul_l2cache/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_mixdualmaster/op_kernel/matmul_mixdualmaster_custom_kernel.h b/examples/matrix/matmul_mixdualmaster/op_kernel/matmul_mixdualmaster_custom_kernel.h index 0ef050d4..918f3763 100644 --- a/examples/matrix/matmul_mixdualmaster/op_kernel/matmul_mixdualmaster_custom_kernel.h +++ b/examples/matrix/matmul_mixdualmaster/op_kernel/matmul_mixdualmaster_custom_kernel.h @@ -41,7 +41,7 @@ public: __aicore__ inline void Process(); // Set the penultimate parameter enableMixDualMaster to true to create a Matmul object static constexpr auto MM_CFG = GetNormalConfig(false, false, false, BatchMode::BATCH_LESS_THAN_L1, - true, IterateOrder::UNDEF, ScheduleType::INNER_PRODUCT, true, true, false); + true, IterateOrder::UNDEF, ScheduleType::INNER_PRODUCT, true, true); // Enable IBSHARE parameters for matrix A and matrix B AscendC::Matmul< AscendC::MatmulType, diff --git a/examples/matrix/matmul_mixdualmaster/run.sh b/examples/matrix/matmul_mixdualmaster/run.sh index 78e91376..53d52d17 100644 --- a/examples/matrix/matmul_mixdualmaster/run.sh +++ b/examples/matrix/matmul_mixdualmaster/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_mndb/run.sh b/examples/matrix/matmul_mndb/run.sh index dca86b58..c378e7c6 100644 --- a/examples/matrix/matmul_mndb/run.sh +++ b/examples/matrix/matmul_mndb/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_nbuffer33/run.sh b/examples/matrix/matmul_nbuffer33/run.sh index 6443aff0..5b5887f5 100644 --- a/examples/matrix/matmul_nbuffer33/run.sh +++ b/examples/matrix/matmul_nbuffer33/run.sh @@ -66,6 +66,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_nd_align/run.sh b/examples/matrix/matmul_nd_align/run.sh index 0b18b5fd..9f9ec6fa 100644 --- a/examples/matrix/matmul_nd_align/run.sh +++ b/examples/matrix/matmul_nd_align/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_nz/run.sh b/examples/matrix/matmul_nz/run.sh index 631f7ba3..5c05c655 100644 --- a/examples/matrix/matmul_nz/run.sh +++ b/examples/matrix/matmul_nz/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_partial_output/run.sh b/examples/matrix/matmul_partial_output/run.sh index 2b109b98..68be88e5 100644 --- a/examples/matrix/matmul_partial_output/run.sh +++ b/examples/matrix/matmul_partial_output/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_perf/CMakeLists.txt b/examples/matrix/matmul_perf/CMakeLists.txt index 456cb328..f353d86a 100644 --- a/examples/matrix/matmul_perf/CMakeLists.txt +++ b/examples/matrix/matmul_perf/CMakeLists.txt @@ -72,12 +72,6 @@ if (PERF_MODE GREATER 2) ) message(STATUS "[INFO] Enable MDL UNITFLAG") endif() -if (PERF_MODE GREATER 3) - target_compile_definitions(ascendc_matmul_perf_bbit PRIVATE - -DENABLE_CONSTANT - ) - message(STATUS "[INFO] Enable CONSTANT") -endif() target_include_directories(ascendc_matmul_perf_bbit PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/examples/matrix/matmul_perf/README.md b/examples/matrix/matmul_perf/README.md index c65b0e4e..551a6afa 100644 --- a/examples/matrix/matmul_perf/README.md +++ b/examples/matrix/matmul_perf/README.md @@ -1,7 +1,7 @@ ## 概述 -本样例介绍了调用Matmul API实现四种性能优化特性(纯Cube模式、MDL模板、UnitFlag、Tiling全量常量化)的单算子。 +本样例介绍了调用Matmul API实现三种性能优化特性(纯Cube模式、MDL模板、UnitFlag)的单算子。 本样例以直调的方式调用算子核函数。 直调:核函数的基础调用方式,开发者完成算子核函数的开发和Tiling实现后,即可通过AscendCL运行时接口,完成算子的调用。包含CPU侧、NPU侧和仿真三种运行验证方法。 @@ -26,10 +26,8 @@ - 纯Cube模式:在只有矩阵计算,没有矢量计算的场景下,通过使能纯Cube模式,可以优化Matmul计算中的消息通信性能开销,提升算子性能。 - MDL模板:在MTE2循环搬运次数多的大Shape场景下,使能MDL模板后,可以实现MTE2从Global Memory一次性搬入多个基本块到A1/B1,提升带宽利用率,减少MTE2的搬运次数,提升算子性能。 - UnitFlag:在算子的CUBE计算流水和FIXPIPE数据搬出流水串行且未被其他流水掩盖时,通过使能UnitFlag功能,可以实现CUBE计算流水和FIXPIPE数据搬出流水之间的流水并行,提升算子性能。 - - Tiling常量化:分为两种场景,1)全量常量化:在获取Matmul模板时,可以确定常量的singleCore Shape(singleCoreM/singleCoreN/singleCoreK)和常量的Base Shape(basicM/basicN/basicK,也称为baseM/baseN/baseK);2)部分常量化:在获取Matmul模板时,仅能确定常量的Base Shape(basicM/basicN/basicK)。 - 若Matmul初始化的Scalar计算较多,影响指令头开销,或Matmul迭代间的Scalar计算较多,阻塞MTE2流水。这两种场景下,在满足上述Tiling常量化条件时,可以使能Tiling常量化功能,减少Scalar计算开销,提升算子性能。 - 以上四个特性的编码是相互独立的,本样例支持特性逐个叠加。具体叠加方式可参考下述“编译运行样例 > PERF_MODE参数说明”。 + 以上三个特性的编码是相互独立的,本样例支持特性逐个叠加。具体叠加方式可参考下述“编译运行样例 > PERF_MODE参数说明”。 - 算子规格 @@ -47,7 +45,7 @@
## 算子实现介绍 -本样例的四个性能优化特性复用公共的算子Kernel代码和Host代码,通过编译宏的方式,隔离各个性能优化特性的差异化代码实现。 +本样例的三个性能优化特性复用公共的算子Kernel代码和Host代码,通过编译宏的方式,隔离各个性能优化特性的差异化代码实现。 - 算子Kernel实现 - 计算逻辑:C = A * B + Bias。 - A、B为源操作数,A为左矩阵,形状为[M, K];B为右矩阵,形状为[K, N]。 @@ -110,52 +108,6 @@ AscendC::Matmul matmulObj; // 使用自定义的MatmulConfig参数CFG_MDL_UNITFLAG创建Matmul对象 ``` - - 使能Tiling全量常量化功能实现,首先使用MAX_M、MAX_N、MAX_K、BASE_M、BASE_N、BASE_K的常数值设置MatmulShapeParams,调用GetMMConfig得到自定义的MatmulConfig模板参数mmCFG;再通过GetMatmulApiTiling接口将Tiling信息常量化,得到常量化模板参数CUSTOM_CFG_CONSTANT,基于CUSTOM_CFG_CONSTANT创建Matmul对象。 - ``` - #define ASCNEND_CUBE_ONLY // 设置ASCEND_CUBE_ONLY宏 - #include "lib/matmul_intf.h" - - constexpr int32_t MAX_M = 10000; // custom matmul kernel support max value of M Dim shape - constexpr int32_t MAX_N = 10000; // custom matmul kernel support max value of N Dim shape - constexpr int32_t MAX_K = 10000; // custom matmul kernel support max value of K Dim shape - constexpr int32_t BASE_M = 128; // BASE_M * BASE_K * sizeof(typeA) <=L0A size - constexpr int32_t BASE_N = 256; // BASE_N * BASE_K * sizeof(typeB) <=L0B size - constexpr int32_t BASE_K = 64; // BASE_M * BASE_N * sizeof(typeC) <=L0C size - constexpr MatmulShapeParams shapeParams = { MAX_M, - MAX_N, - MAX_K, - BASE_M, - BASE_N, - BASE_K }; - - constexpr int32_t TILING_DEPTH_PARAM = 8; - constexpr int32_t TILING_STEPK_PARAM = 4; - constexpr int32_t TILING_STEPMN_PARAM = 1; - - template - __aicore__ inline constexpr MatmulApiStaticTiling GetCustomConstantCFG() - { - MatmulConfig mmCFG = GetMMConfig(shapeParams); - // enable unitflag for performance comparison - mmCFG.enUnitFlag = true; - auto constantCFG = AscendC::GetMatmulApiTiling(mmCFG); - constantCFG.depthA1 = TILING_DEPTH_PARAM; - constantCFG.depthB1 = TILING_DEPTH_PARAM; - constantCFG.stepKa = TILING_STEPK_PARAM; - constantCFG.stepKb = TILING_STEPK_PARAM; - constantCFG.stepM = TILING_STEPMN_PARAM; - constantCFG.stepN = TILING_STEPMN_PARAM; - return constantCFG; - } - - using A_TYPE = AscendC::MatmulType; - using B_TYPE = AscendC::MatmulType; - using C_TYPE = AscendC::MatmulType; - using BIAS_TYPE = AscendC::MatmulType; - - constexpr static auto CUSTOM_CFG_CONSTANT = GetCustomConstantCFG(); - AscendC::Matmul matmulObj; // 使用自定义的MatmulConfig参数CUSTOM_CFG_CONSTANT创建Matmul对象 - ``` - 初始化操作。 - 默认实现 ``` @@ -164,14 +116,6 @@ - 使能纯Cube模式实现,同默认实现。 - 使能MDL模板实现,同默认实现。 - 使能UnitFlag功能实现,同默认实现。 - - 使能Tiling全量常量化功能实现 - ``` - // 全量常量化场景,调用REGIST_MATMUL_OBJ接口初始化Matmul对象时,可以在入参传递Tiling参数的位置,使用空指针替代。 - REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulObj, (TCubeTiling*)nullptr); - - // 部分常量化场景,调用REGIST_MATMUL_OBJ接口初始化Matmul对象时,仍需要使用Tiling。 - REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulObj, &tiling); - ``` - 设置左矩阵A、右矩阵B、Bias。 - 默认实现 ``` @@ -182,13 +126,6 @@ - 使能纯Cube模式实现,同默认实现。 - 使能MDL模板实现,同默认实现。 - 使能UnitFlag功能实现,同默认实现。 - - 使能Tiling全量常量化功能实现 - ``` - matmulObj.SetOrgShape(tiling.M, tiling.N, tiling.Ka); // 调用SetOrgShape设置Matmul计算原始Shape - matmulObj.SetTensorA(aGlobal); // 设置左矩阵A - matmulObj.SetTensorB(bGlobal); // 设置右矩阵B - matmulObj.SetBias(biasGlobal); // 设置Bias - ``` - 完成矩阵乘操作。 ``` matmulObj.IterateAll(cGlobal); @@ -250,12 +187,11 @@ - SOC_VERSION :昇腾AI处理器型号,如果无法确定具体的[SOC_VERSION],则在安装昇腾AI处理器的服务器执行npu-smi info命令进行查询,在查询到的“Name”前增加Ascend信息,例如“Name”对应取值为xxxyy,实际配置的[SOC_VERSION]值为Ascendxxxyy。支持以下产品型号: - Atlas A2训练系列产品/Atlas 800I A2推理产品 - IS_PERF :是否获取执行性能数据,当前只在NPU执行时生效,打开后性能数据显示在执行结果中,可选择关闭和开启该功能,对应参数分别为[0 / 1]。 - - PERF_MODE :配置性能优化的方式,支持的取值为0,1,2,3,4,各取值的含义如下。该参数的默认值为0。 + - PERF_MODE :配置性能优化的方式,支持的取值为0,1,2,3,各取值的含义如下。该参数的默认值为0。 - PERF_MODE = 0:默认实现。使能MIX模式 + NORM模板(未使能UnitFlag功能和Tiling全量常量化功能); - PERF_MODE = 1:使能纯Cube模式; - PERF_MODE = 2:使能纯Cube模式 + MDL模板; - PERF_MODE = 3:使能纯Cube模式 + MDL模板 + UnitFlag功能; - - PERF_MODE = 4:使能纯Cube模式 + MDL模板 + UnitFlag功能 + Tiling全量常量化功能; 示例如下,Ascendxxxyy请替换为实际的AI处理器型号。 ``` diff --git a/examples/matrix/matmul_perf/cmake/cpu_lib.cmake b/examples/matrix/matmul_perf/cmake/cpu_lib.cmake index ffb9281d..393dca05 100644 --- a/examples/matrix/matmul_perf/cmake/cpu_lib.cmake +++ b/examples/matrix/matmul_perf/cmake/cpu_lib.cmake @@ -45,12 +45,6 @@ if (PERF_MODE GREATER 2) ) message(STATUS "[INFO] CPU Target Enable MDL UNITFLAG") endif() -if (PERF_MODE GREATER 3) - target_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE - -DENABLE_CONSTANT - ) - message(STATUS "[INFO] CPU Target Enable CONSTANT") -endif() install(TARGETS ascendc_kernels_${RUN_MODE} DESTINATION ${CMAKE_INSTALL_LIBDIR} diff --git a/examples/matrix/matmul_perf/cmake/npu_lib.cmake b/examples/matrix/matmul_perf/cmake/npu_lib.cmake index 49541829..0b336bf0 100644 --- a/examples/matrix/matmul_perf/cmake/npu_lib.cmake +++ b/examples/matrix/matmul_perf/cmake/npu_lib.cmake @@ -45,9 +45,3 @@ if (PERF_MODE GREATER 2) ) message(STATUS "[INFO] NPU/SIM Target Enable MDL UNITFLAG") endif() -if (PERF_MODE GREATER 3) - ascendc_compile_definitions(ascendc_kernels_${RUN_MODE} PRIVATE - -DENABLE_CONSTANT - ) - message(STATUS "[INFO] NPU/SIM Target Enable CONSTANT") -endif() diff --git a/examples/matrix/matmul_perf/op_kernel/matmul_perf_custom_kernel.cpp b/examples/matrix/matmul_perf/op_kernel/matmul_perf_custom_kernel.cpp index 5218d599..e0a62aee 100644 --- a/examples/matrix/matmul_perf/op_kernel/matmul_perf_custom_kernel.cpp +++ b/examples/matrix/matmul_perf/op_kernel/matmul_perf_custom_kernel.cpp @@ -68,9 +68,6 @@ __aicore__ inline void MatmulKernel::Process(Asce tailM = tailM < tiling.singleCoreM ? (tailM > 0 ? tailM : tiling.singleCoreM) : tiling.singleCoreM; int tailN = tiling.N - nCoreIndex * tiling.singleCoreN; tailN = tailN < tiling.singleCoreN ? (tailN > 0 ? tailN : tiling.singleCoreN) : tiling.singleCoreN; -#ifdef ENABLE_CONSTANT - matmulObj.SetOrgShape(tiling.M, tiling.N, tiling.Ka); -#endif matmulObj.SetSingleShape(tailM, tailN, tiling.Ka); matmulObj.SetTensorA(aGlobal, isTransA); matmulObj.SetTensorB(bGlobal, isTransB); @@ -134,11 +131,7 @@ extern "C" __global__ __aicore__ void matmul_perf_custom(GM_ADDR a, GM_ADDR b, AscendC::TPipe pipe; // init matmul kernel, isTransA=false, isTransB=false matmulKernel.Init(a, b, bias, c, tiling, false, false); -#ifdef ENABLE_CONSTANT - REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulKernel.matmulObj, (TCubeTiling*)nullptr); -#else REGIST_MATMUL_OBJ(&pipe, GetSysWorkSpacePtr(), matmulKernel.matmulObj, &tiling); -#endif // matmul kernel process matmulKernel.Process(&pipe); } diff --git a/examples/matrix/matmul_perf/op_kernel/matmul_perf_custom_kernel.h b/examples/matrix/matmul_perf/op_kernel/matmul_perf_custom_kernel.h index 6441e48d..07ff8487 100644 --- a/examples/matrix/matmul_perf/op_kernel/matmul_perf_custom_kernel.h +++ b/examples/matrix/matmul_perf/op_kernel/matmul_perf_custom_kernel.h @@ -35,39 +35,6 @@ __aicore__ inline constexpr MatmulConfig GetCustomMDLCFG() return mmCfg; } -constexpr int32_t MAX_M = 10000; // custom matmul kernel support max value of M Dim shape -constexpr int32_t MAX_N = 10000; // custom matmul kernel support max value of N Dim shape -constexpr int32_t MAX_K = 10000; // custom matmul kernel support max value of K Dim shape -constexpr int32_t BASE_M = 128; // BASE_M * BASE_K * sizeof(typeA) <=L0A size -constexpr int32_t BASE_N = 256; // BASE_N * BASE_K * sizeof(typeB) <=L0B size -constexpr int32_t BASE_K = 64; // BASE_M * BASE_N * sizeof(typeC) <=L0C size -constexpr MatmulShapeParams shapeParams = { MAX_M, - MAX_N, - MAX_K, - BASE_M, - BASE_N, - BASE_K }; - -constexpr int32_t TILING_DEPTH_PARAM = 8; -constexpr int32_t TILING_STEPK_PARAM = 4; -constexpr int32_t TILING_STEPMN_PARAM = 1; - -template -__aicore__ inline constexpr MatmulApiStaticTiling GetCustomConstantCFG() -{ - MatmulConfig mmCFG = GetMMConfig(shapeParams); - // enable unitflag for performance comparison - mmCFG.enUnitFlag = true; - auto constantCFG = AscendC::GetMatmulApiTiling(mmCFG); - constantCFG.depthA1 = TILING_DEPTH_PARAM; - constantCFG.depthB1 = TILING_DEPTH_PARAM; - constantCFG.stepKa = TILING_STEPK_PARAM; - constantCFG.stepKb = TILING_STEPK_PARAM; - constantCFG.stepM = TILING_STEPMN_PARAM; - constantCFG.stepN = TILING_STEPMN_PARAM; - return constantCFG; -} - template class MatmulKernel { public: @@ -96,10 +63,6 @@ public: using B_TYPE = AscendC::MatmulType; using C_TYPE = AscendC::MatmulType; using BIAS_TYPE = AscendC::MatmulType; -#ifdef ENABLE_CONSTANT - constexpr static auto CUSTOM_CFG_CONSTANT = GetCustomConstantCFG(); - AscendC::Matmul matmulObj; -#else #ifdef ENABLE_MDL constexpr static MatmulConfig CUSTOM_CFG_MDL = GetCustomMDLCFG(); AscendC::Matmul matmulObj; @@ -107,7 +70,6 @@ public: constexpr static MatmulConfig CUSTOM_CFG_NORM = GetCustomNormCFG(); AscendC::Matmul matmulObj; #endif -#endif private: /** diff --git a/examples/matrix/matmul_perf/run.sh b/examples/matrix/matmul_perf/run.sh index 41e6d46c..c090d308 100644 --- a/examples/matrix/matmul_perf/run.sh +++ b/examples/matrix/matmul_perf/run.sh @@ -66,8 +66,13 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi -if ! [[ "${PERF_MODE}" =~ ^[0-4]+$ ]]; then - echo "[ERROR] Unsupported PERF_MODE: ${PERF_MODE}, which can only be 0, 1, 2, 3, 4." +if ! [[ "${PERF_MODE}" =~ ^[0-3]+$ ]]; then + echo "[ERROR] Unsupported PERF_MODE: ${PERF_MODE}, which can only be 0, 1, 2, 3." + exit 1 +fi + +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." exit 1 fi diff --git a/examples/matrix/matmul_preload/run.sh b/examples/matrix/matmul_preload/run.sh index 87d9d1a9..66d3a53f 100644 --- a/examples/matrix/matmul_preload/run.sh +++ b/examples/matrix/matmul_preload/run.sh @@ -73,6 +73,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_quant/run.sh b/examples/matrix/matmul_quant/run.sh index 34ed330f..6c7e0950 100644 --- a/examples/matrix/matmul_quant/run.sh +++ b/examples/matrix/matmul_quant/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_sparse/run.sh b/examples/matrix/matmul_sparse/run.sh index d634b732..53a29cf4 100644 --- a/examples/matrix/matmul_sparse/run.sh +++ b/examples/matrix/matmul_sparse/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_splitk/run.sh b/examples/matrix/matmul_splitk/run.sh index 72e4ffaa..168c410b 100644 --- a/examples/matrix/matmul_splitk/run.sh +++ b/examples/matrix/matmul_splitk/run.sh @@ -66,6 +66,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_triangle/run.sh b/examples/matrix/matmul_triangle/run.sh index d81a852f..e9c582da 100644 --- a/examples/matrix/matmul_triangle/run.sh +++ b/examples/matrix/matmul_triangle/run.sh @@ -65,6 +65,11 @@ if [ "${TRIANGLE_MODE}" != "upper" ] && [ "${TRIANGLE_MODE}" != "lower" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_tscm/run.sh b/examples/matrix/matmul_tscm/run.sh index 266e12d0..b6f86c96 100644 --- a/examples/matrix/matmul_tscm/run.sh +++ b/examples/matrix/matmul_tscm/run.sh @@ -57,6 +57,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_unaligned/run.sh b/examples/matrix/matmul_unaligned/run.sh index 525ef0a2..3b607bf8 100644 --- a/examples/matrix/matmul_unaligned/run.sh +++ b/examples/matrix/matmul_unaligned/run.sh @@ -55,6 +55,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build diff --git a/examples/matrix/matmul_unitflag/run.sh b/examples/matrix/matmul_unitflag/run.sh index c53da86e..23cf0e1d 100644 --- a/examples/matrix/matmul_unitflag/run.sh +++ b/examples/matrix/matmul_unitflag/run.sh @@ -66,6 +66,11 @@ if [ "${IS_PERF}" == "1" ] && [ "${RUN_MODE}" != "npu" ]; then exit 1 fi +if [[ "${SOC_VERSION}" =~ ^Ascend910B4-1 ]] && [ "${RUN_MODE}" == "sim" ]; then + echo "[ERROR] SocVersion: ${SOC_VERSION} can not support sim mode, please use Ascend910B4." + exit 1 +fi + rm -rf build mkdir build cd build -- Gitee