From 02e832b56a4c7710e94c6f0c594ac265f96ed6c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=B5=A9=E5=8D=97?= Date: Tue, 19 Mar 2024 16:24:45 +0800 Subject: [PATCH] =?UTF-8?q?[ADDED]=20=E6=96=B0=E5=A2=9E=E5=86=85=E5=AD=98?= =?UTF-8?q?=E6=B3=84=E6=BC=8F=E3=80=81=E9=9D=9E=E6=B3=95=E9=87=8A=E6=94=BE?= =?UTF-8?q?=E5=92=8C=E5=86=85=E5=AD=98=E6=9C=AA=E4=BD=BF=E7=94=A8=E6=A3=80?= =?UTF-8?q?=E6=B5=8B=E6=A0=B7=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sample/README.md | 43 ++++++++++++----- sample/build/build.sh | 15 ++++++ sample/sanitizer_sample/README.md | 22 +++++++++ .../memcheck/illegal_free/Makefile | 29 ++++++++++++ .../illegal_free/illegal_free_kernel.cpp | 12 +++++ .../memcheck/illegal_free/main.cpp | 42 +++++++++++++++++ .../memcheck/memory_leak/Makefile | 29 ++++++++++++ .../memcheck/memory_leak/main.cpp | 41 +++++++++++++++++ .../memory_leak/memory_leak_kernel.cpp | 12 +++++ .../memcheck/memory_unused/Makefile | 29 ++++++++++++ .../memcheck/memory_unused/main.cpp | 46 +++++++++++++++++++ .../memory_unused/memory_unused_kernel.cpp | 12 +++++ 12 files changed, 320 insertions(+), 12 deletions(-) create mode 100644 sample/sanitizer_sample/README.md create mode 100644 sample/sanitizer_sample/memcheck/illegal_free/Makefile create mode 100644 sample/sanitizer_sample/memcheck/illegal_free/illegal_free_kernel.cpp create mode 100644 sample/sanitizer_sample/memcheck/illegal_free/main.cpp create mode 100644 sample/sanitizer_sample/memcheck/memory_leak/Makefile create mode 100644 sample/sanitizer_sample/memcheck/memory_leak/main.cpp create mode 100644 sample/sanitizer_sample/memcheck/memory_leak/memory_leak_kernel.cpp create mode 100644 sample/sanitizer_sample/memcheck/memory_unused/Makefile create mode 100644 sample/sanitizer_sample/memcheck/memory_unused/main.cpp create mode 100644 sample/sanitizer_sample/memcheck/memory_unused/memory_unused_kernel.cpp diff --git a/sample/README.md b/sample/README.md index 6bd55a2f83..41c2a74749 100644 --- a/sample/README.md +++ b/sample/README.md @@ -25,9 +25,7 @@ source set_env.sh |- pytorch_adapter # 适配pytorch的AscendC单算子极简工程,可配合msdebug和msprof工具 |- jit_compile # jit模式,运行时编译使用 |- with_setuptools # 编译成wheel包安装使用 - |- sanitizer_sample # 异常样例,用于配合mssanitizer工具 - |- racecheck # 含竞争问题的样例 - |- xx # 其他异常样例 + |- sanitizer_sample # 用于异常检测的异常算子样例,详见[异常检测样例介绍](./sanitizer_sample/README.md) ``` 如果你关注自定义算子的pytorch框架适配,详见[此处](./pytorch_adapter/README.md) @@ -48,17 +46,38 @@ source set_env.sh msdebug ./*.fatbin ``` -## 内存检测 sanitizer -1. 编译阶段:在编译过程中添加```--cce-enable-sanitizer -g```参数, 在链接过程中添加```--cce-enable-sanitizer```参数。(现样例中已在Makefile中添加),执行如下命令: -``` -make -``` +## 异常检测 mssanitizer -2. 运行阶段: -``` -mssanitizer ./*.fatbin # 默认进行memcheck检查 -``` +### 使用前准备 +若使用 mssanitizer 对算子进行异常检测,还需要加入额外编译选项,具体如下: + +在 `sample\normal_sample\vec_only` 相对路径下的 `Makefile` 文件中修改如下内容: + +1. 编译阶段增加 `-g --cce-enable-sanitizer` 选项启用检测功能 + ``` makefile + COMPILER_FLAG := -xcce -O2 -std=c++17 + # 修改为: + COMPILER_FLAG := -xcce -O2 -std=c++17 --cce-enable-sanitizer -g + ``` +2. 链接阶段增加 `--cce-enable-sanitizer` 选项启用检测功能 + ``` makefile + LINK_FLAG := --cce-fatobj-link + # 修改为: + LINK_FLAG := --cce-fatobj-link --cce-enable-sanitizer + ``` + +`sample\sanitizer_sample` 目录下的样例均已支持检测功能,也可参考此目录下的样例进行修改。 +### 运行异常检测工具 + +1. 内存检测 + ``` bash + mssanitizer -t memcheck ./illegal_align.fatbin + ``` +2. 竞争检测 + ``` bash + mssanitizer -t racecheck ./raw_error.fatbin + ``` ## 算子调优 算子调优工具可以支持上板和仿真算子的调优,下面将以vec_only中的算子为例,进行工具使用的实战命令讲解 diff --git a/sample/build/build.sh b/sample/build/build.sh index bbb2915b04..7c1c31b51a 100644 --- a/sample/build/build.sh +++ b/sample/build/build.sh @@ -33,6 +33,21 @@ cd ${TOP_DIR}/sanitizer_sample/memcheck/illegal_align make mv *.fatbin ${TOP_DIR}/build +# memory leak sample +cd ${TOP_DIR}/sanitizer_sample/memcheck/memory_leak +make +mv *.fatbin ${TOP_DIR}/build + +# illegal free sample +cd ${TOP_DIR}/sanitizer_sample/memcheck/illegal_free +make +mv *.fatbin ${TOP_DIR}/build + +# memory ununsed sample +cd ${TOP_DIR}/sanitizer_sample/memcheck/memory_unused +make +mv *.fatbin ${TOP_DIR}/build + cd ${TOP_DIR}/sanitizer_sample/Racecheck make mv *.fatbin ${TOP_DIR}/build \ No newline at end of file diff --git a/sample/sanitizer_sample/README.md b/sample/sanitizer_sample/README.md new file mode 100644 index 0000000000..624782e207 --- /dev/null +++ b/sample/sanitizer_sample/README.md @@ -0,0 +1,22 @@ +# 异常检测样例介绍 + +## 内存检测样例 + +用于展示内存检测的异常用例在 `memcheck` 目录下,各样例的功能说明如下: + +| 样例 | 说明 | +|---|---| +| illegal_align | 用于展示非法对齐访问异常检测的样例 | +| illegal_read_and_write | 用于展示非法读写异常检测的样例 | +| out_of_bound | 用于展示多核内存踩踏异常检测的样例 | +| memory_leak | 用于展示内存泄漏异常检测的样例 | +| illegal_free | 用于展示内存非法释放异常检测的样例 | +| memory_unused | 用于展示内存未使用异常检测的样例 | + +## 竞争检测样例 + +用于展示竞争检测的异常用例在 `racecheck` 目录下,各样例的功能说明如下: + +| 样例 | 说明 | +|---|---| +| raw_error | 用于展示 read-after-write 竞争检测的样例 | \ No newline at end of file diff --git a/sample/sanitizer_sample/memcheck/illegal_free/Makefile b/sample/sanitizer_sample/memcheck/illegal_free/Makefile new file mode 100644 index 0000000000..88da6c456d --- /dev/null +++ b/sample/sanitizer_sample/memcheck/illegal_free/Makefile @@ -0,0 +1,29 @@ +ASCEND_HOME_PATH ?= /usr/local/Ascend/ascend-toolkit/latest + +COMPILER := ${ASCEND_HOME_PATH}/compiler/ccec_compiler/bin/ccec +HOST_COMPILER := g++ +COMPILER_FLAG := -xcce -O2 -std=c++17 --cce-enable-sanitizer -g +HOST_COMPILER_FLAG := -O2 -std=c++17 +LINK_FLAG := --cce-fatobj-link --cce-enable-sanitizer +DAV_FLAG := --cce-aicore-arch=dav-c220-vec +ASCENDC_INC_FLAG := -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw/impl -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw/interface -I${ASCEND_HOME_PATH}/include # 参考device_intf.cmake的配置简化 +HOST_INC_FLAG := -I${ASCEND_HOME_PATH}/include -I${ASCEND_HOME_PATH}/tools/mssanitizer/include/acl +LINK_LIBS := -L${ASCEND_HOME_PATH}/lib64 -lruntime -lascendcl -lstdc++ +LINK_LIBS += -L${ASCEND_HOME_PATH}/tools/mssanitizer/lib64 -lascend_acl_hook + +all: build + +build: illegal_free_kernel.o main.o illegal_free.fatbin + +illegal_free_kernel.o: illegal_free_kernel.cpp + $(COMPILER) $(COMPILER_FLAG) $(DAV_FLAG) $(ASCENDC_INC_FLAG) -o $@ -c $^ + +main.o: main.cpp + $(HOST_COMPILER) $(HOST_COMPILER_FLAG) $(HOST_INC_FLAG) -o $@ -c $^ + +illegal_free.fatbin: illegal_free_kernel.o main.o + $(COMPILER) $(LINK_FLAG) $(DAV_FLAG) -o $@ $^ ${LINK_LIBS} + +.PHONY: clean +clean: + rm *.o illegal_free.fatbin diff --git a/sample/sanitizer_sample/memcheck/illegal_free/illegal_free_kernel.cpp b/sample/sanitizer_sample/memcheck/illegal_free/illegal_free_kernel.cpp new file mode 100644 index 0000000000..4eaa2ef12d --- /dev/null +++ b/sample/sanitizer_sample/memcheck/illegal_free/illegal_free_kernel.cpp @@ -0,0 +1,12 @@ +#include "kernel_operator.h" +#include "acl/acl.h" +using namespace AscendC; + +extern "C" __global__ __aicore__ void illegal_free_kernel(__gm__ uint8_t *gm) { + *gm = 1; +} + +extern "C" void illegal_free_kernel_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm) +{ + illegal_free_kernel<<>>(gm); +} diff --git a/sample/sanitizer_sample/memcheck/illegal_free/main.cpp b/sample/sanitizer_sample/memcheck/illegal_free/main.cpp new file mode 100644 index 0000000000..c399dca094 --- /dev/null +++ b/sample/sanitizer_sample/memcheck/illegal_free/main.cpp @@ -0,0 +1,42 @@ +#include +#include "acl.h" + +#define ACL_ERROR_NONE 0 + +#define CHECK_ACL(x) \ + do { \ + aclError __ret = x; \ + if (__ret != ACL_ERROR_NONE) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \ + } \ + } while (0); + +extern "C" void illegal_free_kernel_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm); + +int main(void) +{ + CHECK_ACL(aclInit(nullptr)); + aclrtContext context; + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + CHECK_ACL(aclrtCreateContext(&context, deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *gm = nullptr; + CHECK_ACL(aclrtMalloc((void**)&gm, 256, ACL_MEM_MALLOC_HUGE_FIRST)); + + uint64_t blockDim = 1UL; + illegal_free_kernel_do(blockDim, nullptr, stream, gm); + CHECK_ACL(aclrtSynchronizeStream(stream)); + + // 代码 27 行分配了 GM 内存并返回指针 `gm',但此处对非法的内存地址进行了释放,导致非法释放异常 + CHECK_ACL(aclrtFree(gm + 0x10)); + // 以下是正确写法: + CHECK_ACL(aclrtFree(gm)); + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtDestroyContext(context)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); + return 0; +} \ No newline at end of file diff --git a/sample/sanitizer_sample/memcheck/memory_leak/Makefile b/sample/sanitizer_sample/memcheck/memory_leak/Makefile new file mode 100644 index 0000000000..a97ab708bf --- /dev/null +++ b/sample/sanitizer_sample/memcheck/memory_leak/Makefile @@ -0,0 +1,29 @@ +ASCEND_HOME_PATH ?= /usr/local/Ascend/ascend-toolkit/latest + +COMPILER := ${ASCEND_HOME_PATH}/compiler/ccec_compiler/bin/ccec +HOST_COMPILER := g++ +COMPILER_FLAG := -xcce -O2 -std=c++17 --cce-enable-sanitizer -g +HOST_COMPILER_FLAG := -O2 -std=c++17 +LINK_FLAG := --cce-fatobj-link --cce-enable-sanitizer +DAV_FLAG := --cce-aicore-arch=dav-c220-vec +ASCENDC_INC_FLAG := -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw/impl -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw/interface -I${ASCEND_HOME_PATH}/include # 参考device_intf.cmake的配置简化 +HOST_INC_FLAG := -I${ASCEND_HOME_PATH}/include -I${ASCEND_HOME_PATH}/tools/mssanitizer/include/acl +LINK_LIBS := -L${ASCEND_HOME_PATH}/lib64 -lruntime -lascendcl -lstdc++ +LINK_LIBS += -L${ASCEND_HOME_PATH}/tools/mssanitizer/lib64 -lascend_acl_hook + +all: build + +build: memory_leak_kernel.o main.o memory_leak.fatbin + +memory_leak_kernel.o: memory_leak_kernel.cpp + $(COMPILER) $(COMPILER_FLAG) $(DAV_FLAG) $(ASCENDC_INC_FLAG) -o $@ -c $^ + +main.o: main.cpp + $(HOST_COMPILER) $(HOST_COMPILER_FLAG) $(HOST_INC_FLAG) -o $@ -c $^ + +memory_leak.fatbin: memory_leak_kernel.o main.o + $(COMPILER) $(LINK_FLAG) $(DAV_FLAG) -o $@ $^ ${LINK_LIBS} + +.PHONY: clean +clean: + rm *.o memory_leak.fatbin diff --git a/sample/sanitizer_sample/memcheck/memory_leak/main.cpp b/sample/sanitizer_sample/memcheck/memory_leak/main.cpp new file mode 100644 index 0000000000..1021100d64 --- /dev/null +++ b/sample/sanitizer_sample/memcheck/memory_leak/main.cpp @@ -0,0 +1,41 @@ +#include +#include "acl.h" + +#define ACL_ERROR_NONE 0 + +#define CHECK_ACL(x) \ + do { \ + aclError __ret = x; \ + if (__ret != ACL_ERROR_NONE) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \ + } \ + } while (0); + +extern "C" void memory_leak_kernel_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm); + +int main(void) +{ + CHECK_ACL(aclInit(nullptr)); + aclrtContext context; + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + CHECK_ACL(aclrtCreateContext(&context, deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *gm = nullptr; + CHECK_ACL(aclrtMalloc((void**)&gm, 256, ACL_MEM_MALLOC_HUGE_FIRST)); + + uint64_t blockDim = 1UL; + memory_leak_kernel_do(blockDim, nullptr, stream, gm); + CHECK_ACL(aclrtSynchronizeStream(stream)); + + // 代码 27 行分配了 GM 内存,此处忘记对内存进行释放,导致了内存泄漏异常 + // 以下是正确写法: + // CHECK_ACL(aclrtFree(gm)); + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtDestroyContext(context)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); + return 0; +} \ No newline at end of file diff --git a/sample/sanitizer_sample/memcheck/memory_leak/memory_leak_kernel.cpp b/sample/sanitizer_sample/memcheck/memory_leak/memory_leak_kernel.cpp new file mode 100644 index 0000000000..98ea6b3d7d --- /dev/null +++ b/sample/sanitizer_sample/memcheck/memory_leak/memory_leak_kernel.cpp @@ -0,0 +1,12 @@ +#include "kernel_operator.h" +#include "acl/acl.h" +using namespace AscendC; + +extern "C" __global__ __aicore__ void memory_leak_kernel(__gm__ uint8_t *gm) { + *gm = 1; +} + +extern "C" void memory_leak_kernel_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm) +{ + memory_leak_kernel<<>>(gm); +} diff --git a/sample/sanitizer_sample/memcheck/memory_unused/Makefile b/sample/sanitizer_sample/memcheck/memory_unused/Makefile new file mode 100644 index 0000000000..3938d9f3f1 --- /dev/null +++ b/sample/sanitizer_sample/memcheck/memory_unused/Makefile @@ -0,0 +1,29 @@ +ASCEND_HOME_PATH ?= /usr/local/Ascend/ascend-toolkit/latest + +COMPILER := ${ASCEND_HOME_PATH}/compiler/ccec_compiler/bin/ccec +HOST_COMPILER := g++ +COMPILER_FLAG := -xcce -O2 -std=c++17 --cce-enable-sanitizer -g +HOST_COMPILER_FLAG := -O2 -std=c++17 +LINK_FLAG := --cce-fatobj-link --cce-enable-sanitizer +DAV_FLAG := --cce-aicore-arch=dav-c220-vec +ASCENDC_INC_FLAG := -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw/impl -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw/interface -I${ASCEND_HOME_PATH}/include # 参考device_intf.cmake的配置简化 +HOST_INC_FLAG := -I${ASCEND_HOME_PATH}/include -I${ASCEND_HOME_PATH}/tools/mssanitizer/include/acl +LINK_LIBS := -L${ASCEND_HOME_PATH}/lib64 -lruntime -lascendcl -lstdc++ +LINK_LIBS += -L${ASCEND_HOME_PATH}/tools/mssanitizer/lib64 -lascend_acl_hook + +all: build + +build: memory_unused_kernel.o main.o memory_unused.fatbin + +memory_unused_kernel.o: memory_unused_kernel.cpp + $(COMPILER) $(COMPILER_FLAG) $(DAV_FLAG) $(ASCENDC_INC_FLAG) -o $@ -c $^ + +main.o: main.cpp + $(HOST_COMPILER) $(HOST_COMPILER_FLAG) $(HOST_INC_FLAG) -o $@ -c $^ + +memory_unused.fatbin: memory_unused_kernel.o main.o + $(COMPILER) $(LINK_FLAG) $(DAV_FLAG) -o $@ $^ ${LINK_LIBS} + +.PHONY: clean +clean: + rm *.o memory_unused.fatbin diff --git a/sample/sanitizer_sample/memcheck/memory_unused/main.cpp b/sample/sanitizer_sample/memcheck/memory_unused/main.cpp new file mode 100644 index 0000000000..5b4eef007b --- /dev/null +++ b/sample/sanitizer_sample/memcheck/memory_unused/main.cpp @@ -0,0 +1,46 @@ +#include +#include "acl.h" + +#define ACL_ERROR_NONE 0 + +#define CHECK_ACL(x) \ + do { \ + aclError __ret = x; \ + if (__ret != ACL_ERROR_NONE) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \ + } \ + } while (0); + +extern "C" void memory_unused_kernel_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm); + +int main(void) +{ + CHECK_ACL(aclInit(nullptr)); + aclrtContext context; + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + CHECK_ACL(aclrtCreateContext(&context, deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *gm = nullptr; + uint8_t *host = nullptr; + CHECK_ACL(aclrtMalloc((void**)&gm, 256, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMallocHost((void**)&host, 256)); + // 代码的第 28 行在 GM 上分配了 256 个字节的内存,但是算子只使用了其中 128 个 + // 字节,会产生内存未使用的异常报告 + CHECK_ACL(aclrtMemcpy(gm, 256, host, 128, ACL_MEMCPY_HOST_TO_DEVICE)); + // 以下是正确写法: + // CHECK_ACL(aclrtMemcpy(gm, 256, host, 256, ACL_MEMCPY_HOST_TO_DEVICE)); + + uint64_t blockDim = 1UL; + memory_unused_kernel_do(blockDim, nullptr, stream, gm); + CHECK_ACL(aclrtSynchronizeStream(stream)); + + CHECK_ACL(aclrtFree(gm)); + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtDestroyContext(context)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); + return 0; +} \ No newline at end of file diff --git a/sample/sanitizer_sample/memcheck/memory_unused/memory_unused_kernel.cpp b/sample/sanitizer_sample/memcheck/memory_unused/memory_unused_kernel.cpp new file mode 100644 index 0000000000..7d837c7464 --- /dev/null +++ b/sample/sanitizer_sample/memcheck/memory_unused/memory_unused_kernel.cpp @@ -0,0 +1,12 @@ +#include "kernel_operator.h" +#include "acl/acl.h" +using namespace AscendC; + +extern "C" __global__ __aicore__ void memory_unused_kernel(__gm__ uint8_t *gm) { + *gm = 1; +} + +extern "C" void memory_unused_kernel_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm) +{ + memory_unused_kernel<<>>(gm); +} -- Gitee