diff --git a/sample/README.md b/sample/README.md index 6bd55a2f83422b2f0c8424c9687a38f1698aa6fb..41c2a74749117d30bfb7b4684ff467945e347e9f 100644 --- a/sample/README.md +++ b/sample/README.md @@ -25,9 +25,7 @@ source set_env.sh |- pytorch_adapter # 适配pytorch的AscendC单算子极简工程,可配合msdebug和msprof工具 |- jit_compile # jit模式,运行时编译使用 |- with_setuptools # 编译成wheel包安装使用 - |- sanitizer_sample # 异常样例,用于配合mssanitizer工具 - |- racecheck # 含竞争问题的样例 - |- xx # 其他异常样例 + |- sanitizer_sample # 用于异常检测的异常算子样例,详见[异常检测样例介绍](./sanitizer_sample/README.md) ``` 如果你关注自定义算子的pytorch框架适配,详见[此处](./pytorch_adapter/README.md) @@ -48,17 +46,38 @@ source set_env.sh msdebug ./*.fatbin ``` -## 内存检测 sanitizer -1. 编译阶段:在编译过程中添加```--cce-enable-sanitizer -g```参数, 在链接过程中添加```--cce-enable-sanitizer```参数。(现样例中已在Makefile中添加),执行如下命令: -``` -make -``` +## 异常检测 mssanitizer -2. 运行阶段: -``` -mssanitizer ./*.fatbin # 默认进行memcheck检查 -``` +### 使用前准备 +若使用 mssanitizer 对算子进行异常检测,还需要加入额外编译选项,具体如下: + +在 `sample\normal_sample\vec_only` 相对路径下的 `Makefile` 文件中修改如下内容: + +1. 编译阶段增加 `-g --cce-enable-sanitizer` 选项启用检测功能 + ``` makefile + COMPILER_FLAG := -xcce -O2 -std=c++17 + # 修改为: + COMPILER_FLAG := -xcce -O2 -std=c++17 --cce-enable-sanitizer -g + ``` +2. 链接阶段增加 `--cce-enable-sanitizer` 选项启用检测功能 + ``` makefile + LINK_FLAG := --cce-fatobj-link + # 修改为: + LINK_FLAG := --cce-fatobj-link --cce-enable-sanitizer + ``` + +`sample\sanitizer_sample` 目录下的样例均已支持检测功能,也可参考此目录下的样例进行修改。 +### 运行异常检测工具 + +1. 内存检测 + ``` bash + mssanitizer -t memcheck ./illegal_align.fatbin + ``` +2. 竞争检测 + ``` bash + mssanitizer -t racecheck ./raw_error.fatbin + ``` ## 算子调优 算子调优工具可以支持上板和仿真算子的调优,下面将以vec_only中的算子为例,进行工具使用的实战命令讲解 diff --git a/sample/build/build.sh b/sample/build/build.sh index bbb2915b04c1c28828dbb7de6f00c28ca8bbe45f..7c1c31b51a170e3a8d468bdc1e0d9dade33e61bf 100644 --- a/sample/build/build.sh +++ b/sample/build/build.sh @@ -33,6 +33,21 @@ cd ${TOP_DIR}/sanitizer_sample/memcheck/illegal_align make mv *.fatbin ${TOP_DIR}/build +# memory leak sample +cd ${TOP_DIR}/sanitizer_sample/memcheck/memory_leak +make +mv *.fatbin ${TOP_DIR}/build + +# illegal free sample +cd ${TOP_DIR}/sanitizer_sample/memcheck/illegal_free +make +mv *.fatbin ${TOP_DIR}/build + +# memory ununsed sample +cd ${TOP_DIR}/sanitizer_sample/memcheck/memory_unused +make +mv *.fatbin ${TOP_DIR}/build + cd ${TOP_DIR}/sanitizer_sample/Racecheck make mv *.fatbin ${TOP_DIR}/build \ No newline at end of file diff --git a/sample/sanitizer_sample/README.md b/sample/sanitizer_sample/README.md new file mode 100644 index 0000000000000000000000000000000000000000..624782e207f619f32c33a8473eee4aaaaf3bac0d --- /dev/null +++ b/sample/sanitizer_sample/README.md @@ -0,0 +1,22 @@ +# 异常检测样例介绍 + +## 内存检测样例 + +用于展示内存检测的异常用例在 `memcheck` 目录下,各样例的功能说明如下: + +| 样例 | 说明 | +|---|---| +| illegal_align | 用于展示非法对齐访问异常检测的样例 | +| illegal_read_and_write | 用于展示非法读写异常检测的样例 | +| out_of_bound | 用于展示多核内存踩踏异常检测的样例 | +| memory_leak | 用于展示内存泄漏异常检测的样例 | +| illegal_free | 用于展示内存非法释放异常检测的样例 | +| memory_unused | 用于展示内存未使用异常检测的样例 | + +## 竞争检测样例 + +用于展示竞争检测的异常用例在 `racecheck` 目录下,各样例的功能说明如下: + +| 样例 | 说明 | +|---|---| +| raw_error | 用于展示 read-after-write 竞争检测的样例 | \ No newline at end of file diff --git a/sample/sanitizer_sample/memcheck/illegal_free/Makefile b/sample/sanitizer_sample/memcheck/illegal_free/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..88da6c456d10486f6d958a3d8ba8fa883d49b940 --- /dev/null +++ b/sample/sanitizer_sample/memcheck/illegal_free/Makefile @@ -0,0 +1,29 @@ +ASCEND_HOME_PATH ?= /usr/local/Ascend/ascend-toolkit/latest + +COMPILER := ${ASCEND_HOME_PATH}/compiler/ccec_compiler/bin/ccec +HOST_COMPILER := g++ +COMPILER_FLAG := -xcce -O2 -std=c++17 --cce-enable-sanitizer -g +HOST_COMPILER_FLAG := -O2 -std=c++17 +LINK_FLAG := --cce-fatobj-link --cce-enable-sanitizer +DAV_FLAG := --cce-aicore-arch=dav-c220-vec +ASCENDC_INC_FLAG := -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw/impl -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw/interface -I${ASCEND_HOME_PATH}/include # 参考device_intf.cmake的配置简化 +HOST_INC_FLAG := -I${ASCEND_HOME_PATH}/include -I${ASCEND_HOME_PATH}/tools/mssanitizer/include/acl +LINK_LIBS := -L${ASCEND_HOME_PATH}/lib64 -lruntime -lascendcl -lstdc++ +LINK_LIBS += -L${ASCEND_HOME_PATH}/tools/mssanitizer/lib64 -lascend_acl_hook + +all: build + +build: illegal_free_kernel.o main.o illegal_free.fatbin + +illegal_free_kernel.o: illegal_free_kernel.cpp + $(COMPILER) $(COMPILER_FLAG) $(DAV_FLAG) $(ASCENDC_INC_FLAG) -o $@ -c $^ + +main.o: main.cpp + $(HOST_COMPILER) $(HOST_COMPILER_FLAG) $(HOST_INC_FLAG) -o $@ -c $^ + +illegal_free.fatbin: illegal_free_kernel.o main.o + $(COMPILER) $(LINK_FLAG) $(DAV_FLAG) -o $@ $^ ${LINK_LIBS} + +.PHONY: clean +clean: + rm *.o illegal_free.fatbin diff --git a/sample/sanitizer_sample/memcheck/illegal_free/illegal_free_kernel.cpp b/sample/sanitizer_sample/memcheck/illegal_free/illegal_free_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4eaa2ef12d0582cc7100bfd66532f12d87a7e3f4 --- /dev/null +++ b/sample/sanitizer_sample/memcheck/illegal_free/illegal_free_kernel.cpp @@ -0,0 +1,12 @@ +#include "kernel_operator.h" +#include "acl/acl.h" +using namespace AscendC; + +extern "C" __global__ __aicore__ void illegal_free_kernel(__gm__ uint8_t *gm) { + *gm = 1; +} + +extern "C" void illegal_free_kernel_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm) +{ + illegal_free_kernel<<>>(gm); +} diff --git a/sample/sanitizer_sample/memcheck/illegal_free/main.cpp b/sample/sanitizer_sample/memcheck/illegal_free/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c399dca094eb9af7686a8d63b75b44b5b997921c --- /dev/null +++ b/sample/sanitizer_sample/memcheck/illegal_free/main.cpp @@ -0,0 +1,42 @@ +#include +#include "acl.h" + +#define ACL_ERROR_NONE 0 + +#define CHECK_ACL(x) \ + do { \ + aclError __ret = x; \ + if (__ret != ACL_ERROR_NONE) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \ + } \ + } while (0); + +extern "C" void illegal_free_kernel_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm); + +int main(void) +{ + CHECK_ACL(aclInit(nullptr)); + aclrtContext context; + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + CHECK_ACL(aclrtCreateContext(&context, deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *gm = nullptr; + CHECK_ACL(aclrtMalloc((void**)&gm, 256, ACL_MEM_MALLOC_HUGE_FIRST)); + + uint64_t blockDim = 1UL; + illegal_free_kernel_do(blockDim, nullptr, stream, gm); + CHECK_ACL(aclrtSynchronizeStream(stream)); + + // 代码 27 行分配了 GM 内存并返回指针 `gm',但此处对非法的内存地址进行了释放,导致非法释放异常 + CHECK_ACL(aclrtFree(gm + 0x10)); + // 以下是正确写法: + CHECK_ACL(aclrtFree(gm)); + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtDestroyContext(context)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); + return 0; +} \ No newline at end of file diff --git a/sample/sanitizer_sample/memcheck/memory_leak/Makefile b/sample/sanitizer_sample/memcheck/memory_leak/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..a97ab708bfd509f4acfdd2c7946d4d4db2f6458d --- /dev/null +++ b/sample/sanitizer_sample/memcheck/memory_leak/Makefile @@ -0,0 +1,29 @@ +ASCEND_HOME_PATH ?= /usr/local/Ascend/ascend-toolkit/latest + +COMPILER := ${ASCEND_HOME_PATH}/compiler/ccec_compiler/bin/ccec +HOST_COMPILER := g++ +COMPILER_FLAG := -xcce -O2 -std=c++17 --cce-enable-sanitizer -g +HOST_COMPILER_FLAG := -O2 -std=c++17 +LINK_FLAG := --cce-fatobj-link --cce-enable-sanitizer +DAV_FLAG := --cce-aicore-arch=dav-c220-vec +ASCENDC_INC_FLAG := -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw/impl -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw/interface -I${ASCEND_HOME_PATH}/include # 参考device_intf.cmake的配置简化 +HOST_INC_FLAG := -I${ASCEND_HOME_PATH}/include -I${ASCEND_HOME_PATH}/tools/mssanitizer/include/acl +LINK_LIBS := -L${ASCEND_HOME_PATH}/lib64 -lruntime -lascendcl -lstdc++ +LINK_LIBS += -L${ASCEND_HOME_PATH}/tools/mssanitizer/lib64 -lascend_acl_hook + +all: build + +build: memory_leak_kernel.o main.o memory_leak.fatbin + +memory_leak_kernel.o: memory_leak_kernel.cpp + $(COMPILER) $(COMPILER_FLAG) $(DAV_FLAG) $(ASCENDC_INC_FLAG) -o $@ -c $^ + +main.o: main.cpp + $(HOST_COMPILER) $(HOST_COMPILER_FLAG) $(HOST_INC_FLAG) -o $@ -c $^ + +memory_leak.fatbin: memory_leak_kernel.o main.o + $(COMPILER) $(LINK_FLAG) $(DAV_FLAG) -o $@ $^ ${LINK_LIBS} + +.PHONY: clean +clean: + rm *.o memory_leak.fatbin diff --git a/sample/sanitizer_sample/memcheck/memory_leak/main.cpp b/sample/sanitizer_sample/memcheck/memory_leak/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1021100d64aedb07ad111559172db7d6b2fa9935 --- /dev/null +++ b/sample/sanitizer_sample/memcheck/memory_leak/main.cpp @@ -0,0 +1,41 @@ +#include +#include "acl.h" + +#define ACL_ERROR_NONE 0 + +#define CHECK_ACL(x) \ + do { \ + aclError __ret = x; \ + if (__ret != ACL_ERROR_NONE) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \ + } \ + } while (0); + +extern "C" void memory_leak_kernel_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm); + +int main(void) +{ + CHECK_ACL(aclInit(nullptr)); + aclrtContext context; + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + CHECK_ACL(aclrtCreateContext(&context, deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *gm = nullptr; + CHECK_ACL(aclrtMalloc((void**)&gm, 256, ACL_MEM_MALLOC_HUGE_FIRST)); + + uint64_t blockDim = 1UL; + memory_leak_kernel_do(blockDim, nullptr, stream, gm); + CHECK_ACL(aclrtSynchronizeStream(stream)); + + // 代码 27 行分配了 GM 内存,此处忘记对内存进行释放,导致了内存泄漏异常 + // 以下是正确写法: + // CHECK_ACL(aclrtFree(gm)); + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtDestroyContext(context)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); + return 0; +} \ No newline at end of file diff --git a/sample/sanitizer_sample/memcheck/memory_leak/memory_leak_kernel.cpp b/sample/sanitizer_sample/memcheck/memory_leak/memory_leak_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..98ea6b3d7d3bfa92b29f154c5ba531442d7a72af --- /dev/null +++ b/sample/sanitizer_sample/memcheck/memory_leak/memory_leak_kernel.cpp @@ -0,0 +1,12 @@ +#include "kernel_operator.h" +#include "acl/acl.h" +using namespace AscendC; + +extern "C" __global__ __aicore__ void memory_leak_kernel(__gm__ uint8_t *gm) { + *gm = 1; +} + +extern "C" void memory_leak_kernel_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm) +{ + memory_leak_kernel<<>>(gm); +} diff --git a/sample/sanitizer_sample/memcheck/memory_unused/Makefile b/sample/sanitizer_sample/memcheck/memory_unused/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..3938d9f3f1bd04d5fa933feffde28142314ed1c3 --- /dev/null +++ b/sample/sanitizer_sample/memcheck/memory_unused/Makefile @@ -0,0 +1,29 @@ +ASCEND_HOME_PATH ?= /usr/local/Ascend/ascend-toolkit/latest + +COMPILER := ${ASCEND_HOME_PATH}/compiler/ccec_compiler/bin/ccec +HOST_COMPILER := g++ +COMPILER_FLAG := -xcce -O2 -std=c++17 --cce-enable-sanitizer -g +HOST_COMPILER_FLAG := -O2 -std=c++17 +LINK_FLAG := --cce-fatobj-link --cce-enable-sanitizer +DAV_FLAG := --cce-aicore-arch=dav-c220-vec +ASCENDC_INC_FLAG := -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw/impl -I${ASCEND_HOME_PATH}/compiler/tikcpp/tikcfw/interface -I${ASCEND_HOME_PATH}/include # 参考device_intf.cmake的配置简化 +HOST_INC_FLAG := -I${ASCEND_HOME_PATH}/include -I${ASCEND_HOME_PATH}/tools/mssanitizer/include/acl +LINK_LIBS := -L${ASCEND_HOME_PATH}/lib64 -lruntime -lascendcl -lstdc++ +LINK_LIBS += -L${ASCEND_HOME_PATH}/tools/mssanitizer/lib64 -lascend_acl_hook + +all: build + +build: memory_unused_kernel.o main.o memory_unused.fatbin + +memory_unused_kernel.o: memory_unused_kernel.cpp + $(COMPILER) $(COMPILER_FLAG) $(DAV_FLAG) $(ASCENDC_INC_FLAG) -o $@ -c $^ + +main.o: main.cpp + $(HOST_COMPILER) $(HOST_COMPILER_FLAG) $(HOST_INC_FLAG) -o $@ -c $^ + +memory_unused.fatbin: memory_unused_kernel.o main.o + $(COMPILER) $(LINK_FLAG) $(DAV_FLAG) -o $@ $^ ${LINK_LIBS} + +.PHONY: clean +clean: + rm *.o memory_unused.fatbin diff --git a/sample/sanitizer_sample/memcheck/memory_unused/main.cpp b/sample/sanitizer_sample/memcheck/memory_unused/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5b4eef007bf5e1c7ebfd2323e3d4332c4e1f67d8 --- /dev/null +++ b/sample/sanitizer_sample/memcheck/memory_unused/main.cpp @@ -0,0 +1,46 @@ +#include +#include "acl.h" + +#define ACL_ERROR_NONE 0 + +#define CHECK_ACL(x) \ + do { \ + aclError __ret = x; \ + if (__ret != ACL_ERROR_NONE) { \ + std::cerr << __FILE__ << ":" << __LINE__ << " aclError:" << __ret << std::endl; \ + } \ + } while (0); + +extern "C" void memory_unused_kernel_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm); + +int main(void) +{ + CHECK_ACL(aclInit(nullptr)); + aclrtContext context; + int32_t deviceId = 0; + CHECK_ACL(aclrtSetDevice(deviceId)); + CHECK_ACL(aclrtCreateContext(&context, deviceId)); + aclrtStream stream = nullptr; + CHECK_ACL(aclrtCreateStream(&stream)); + + uint8_t *gm = nullptr; + uint8_t *host = nullptr; + CHECK_ACL(aclrtMalloc((void**)&gm, 256, ACL_MEM_MALLOC_HUGE_FIRST)); + CHECK_ACL(aclrtMallocHost((void**)&host, 256)); + // 代码的第 28 行在 GM 上分配了 256 个字节的内存,但是算子只使用了其中 128 个 + // 字节,会产生内存未使用的异常报告 + CHECK_ACL(aclrtMemcpy(gm, 256, host, 128, ACL_MEMCPY_HOST_TO_DEVICE)); + // 以下是正确写法: + // CHECK_ACL(aclrtMemcpy(gm, 256, host, 256, ACL_MEMCPY_HOST_TO_DEVICE)); + + uint64_t blockDim = 1UL; + memory_unused_kernel_do(blockDim, nullptr, stream, gm); + CHECK_ACL(aclrtSynchronizeStream(stream)); + + CHECK_ACL(aclrtFree(gm)); + CHECK_ACL(aclrtDestroyStream(stream)); + CHECK_ACL(aclrtDestroyContext(context)); + CHECK_ACL(aclrtResetDevice(deviceId)); + CHECK_ACL(aclFinalize()); + return 0; +} \ No newline at end of file diff --git a/sample/sanitizer_sample/memcheck/memory_unused/memory_unused_kernel.cpp b/sample/sanitizer_sample/memcheck/memory_unused/memory_unused_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7d837c746455672510208bc00b02834a601a9076 --- /dev/null +++ b/sample/sanitizer_sample/memcheck/memory_unused/memory_unused_kernel.cpp @@ -0,0 +1,12 @@ +#include "kernel_operator.h" +#include "acl/acl.h" +using namespace AscendC; + +extern "C" __global__ __aicore__ void memory_unused_kernel(__gm__ uint8_t *gm) { + *gm = 1; +} + +extern "C" void memory_unused_kernel_do(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm) +{ + memory_unused_kernel<<>>(gm); +}