diff --git a/debug/accuracy_tools/CMakeLists.txt b/debug/accuracy_tools/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b73df6f420415cc8edb42c77d3356654c41aab77 --- /dev/null +++ b/debug/accuracy_tools/CMakeLists.txt @@ -0,0 +1,18 @@ +project(accracy_tools) +cmake_minimum_required(VERSION 3.14) + +execute_process( + COMMAND uname -m + OUTPUT_VARIABLE machine_arch + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +if (DEFINED ARCH_TYPE AND NOT "${ARCH_TYPE}" STREQUAL "${machine_arch}") + message(FATAL_ERROR "Cross-compilation is not supported currently. (compile ${ARCH_TYPE} on ${machine_arch})") +endif() + + +set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") +set(ENV{PROJECT_ROOT_PATH} "${CMAKE_SOURCE_DIR}") +include(utils) +add_subdirectory(msprobe) \ No newline at end of file diff --git a/debug/accuracy_tools/MANIFEST.in b/debug/accuracy_tools/MANIFEST.in index 7997215ffdb2071277645bf47c520db304b1bd98..2afe7f3d2a54437b44b2d9f91505234f4c611740 100644 --- a/debug/accuracy_tools/MANIFEST.in +++ b/debug/accuracy_tools/MANIFEST.in @@ -2,4 +2,5 @@ include README.md include LICENSE recursive-include msprobe * recursive-exclude msprobe/test * +recursive-exclude msprobe/ccsrc * diff --git a/debug/accuracy_tools/build.sh b/debug/accuracy_tools/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..a21d11e05f7e7bd7e9bbf28a0fd70ad3d4835fda --- /dev/null +++ b/debug/accuracy_tools/build.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +set -e + +BUILD_PATH=$(pwd) + +BUILD_ARGS=$(getopt -o ha:v:j:ft --long help,release,debug,arch:python-version:,CANN-path:,jobs:,force-rebuild,local,test-cases -- "$@") +eval set -- "${BUILD_ARGS}" + +ARCH_TYPE=$(uname -m) +BUILD_TYPE=release +CANN_PATH="" +CONCURRENT_JOBS=16 +BUILD_TEST_CASE=False +USE_LOCAL_FIRST=False +PYTHON_VERSION="" + +HELP_DOC=$(cat << EOF +Usage: build.sh [OPTION]...\n +Build the C++ part of MsProbe.\n +\n +Arguments:\n + -a, --arch Specify the schema, which generally does not need to be set up.\n + --CANN-path Specify the CANN path. When set, the build script will find the dependent files in\n + the specified path.\n + -j, --jobs Specify the number of compilation jobs(default 16).\n + -f, --force-rebuild Clean up the cache before building.\n + -t, --test-cases Build test cases.\n + --local Prioritize the use of on-premises, third-party resources as dependencies.\n + --release Build the release version(default).\n + --debug Build the debug version. + -v, --python-version Specify version of python. +EOF +) + +while true; do + case "$1" in + -h | --help) + echo -e ${HELP_DOC} + exit 0 ;; + -a | --arch) + ARCH_TYPE="$2" ; shift 2 ;; + -v | --python-version) + PYTHON_VERSION="$2" ; shift 2 ;; + --release) + BUILD_TYPE=release ; shift ;; + --debug) + BUILD_TYPE=debug ; shift ;; + --CANN-path) + CANN_PATH="$2" ; shift 2 ;; + -j | --jobs) + CONCURRENT_JOBS="$2" ; shift 2 ;; + --local) + USE_LOCAL_FIRST=True ; shift ;; + -f | --force-rebuild) + rm -rf "${BUILD_PATH}/build_dependency" "${BUILD_PATH}/lib" "${BUILD_PATH}/output" "${BUILD_PATH}/third_party" \ + "${BUILD_PATH}/msprobe/lib/_msprobe_c.so" + shift ;; + -t | --test-cases) + BUILD_TEST_CASE=True ; shift ;; + --) + shift ; break ;; + *) + echo "Unknow argument $1" + exit 1 ;; + esac +done + +BUILD_OUTPUT_PATH=${BUILD_PATH}/output/${BUILD_TYPE} + +cmake -B ${BUILD_OUTPUT_PATH} -S . -DARCH_TYPE=${ARCH_TYPE} -DBUILD_TYPE=${BUILD_TYPE} -DCANN_PATH=${CANN_PATH} \ + -DUSE_LOCAL_FIRST=${USE_LOCAL_FIRST} -DBUILD_TEST_CASE=${BUILD_TEST_CASE} \ + -DPYTHON_VERSION=${PYTHON_VERSION} +cd ${BUILD_OUTPUT_PATH} +make -j${CONCURRENT_JOBS} + +if [[ ! -e ${BUILD_OUTPUT_PATH}/msprobe/ccsrc/lib_msprobe_c.so ]]; then + echo "Failed to build lib_msprobe_c.so." + exit 1 +fi + +if [[ ! -e ${BUILD_PATH}/msprobe/lib ]]; then + mkdir ${BUILD_PATH}/msprobe/lib +fi + +cp ${BUILD_OUTPUT_PATH}/msprobe/ccsrc/lib_msprobe_c.so ${BUILD_PATH}/msprobe/lib/_msprobe_c.so diff --git a/debug/accuracy_tools/cmake/Findcpython.cmake b/debug/accuracy_tools/cmake/Findcpython.cmake new file mode 100644 index 0000000000000000000000000000000000000000..815fbc638de824fb91f2e7183781a6415007868b --- /dev/null +++ b/debug/accuracy_tools/cmake/Findcpython.cmake @@ -0,0 +1,16 @@ +set(PKG_NAME cpython) + +if (NOT ${PKG_NAME}_FOUND) + +find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development) +if (NOT Python3_FOUND) + message(FATAL_ERROR "${Python3} is not found.") +endif() + +set(PACKAGE_VERSION ${Python3_VERSION}) + +include_directories(${Python3_INCLUDE_DIRS}) +set(${PKG_NAME}_LIBRARIES ${Python3_LIBRARIES}) +set(${PKG_NAME}_FOUND TRUE) + +endif() diff --git a/debug/accuracy_tools/cmake/Findgtest.cmake b/debug/accuracy_tools/cmake/Findgtest.cmake new file mode 100644 index 0000000000000000000000000000000000000000..dbfe76abcc9b5d3c2f61642cc8c6e270fc441a0f --- /dev/null +++ b/debug/accuracy_tools/cmake/Findgtest.cmake @@ -0,0 +1,49 @@ +set(PACKAGE_VERSION 1.12.1) + +set(PKG_NAME gtest) +set(URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.12.1.tar.gz") +set(SHA256_VALUE "81964fe578e9bd7c94dfdb09c8e4d6e6759e19967e397dbea48d1c10e45d0df2") +set(DOWNLOAD_PATH "$ENV{PROJECT_ROOT_PATH}/third_party") +set(DIR_NAME "${DOWNLOAD_PATH}/googletest-release-1.12.1") + +if (NOT ${PKG_NAME}_FOUND) + +download_opensource_pkg(${PKG_NAME} + URL ${URL} + SHA256 ${SHA256_VALUE} + DOWNLOAD_PATH ${DOWNLOAD_PATH} +) + +include_directories(${DIR_NAME}/googletest/include) +include_directories(${DIR_NAME}/googlemock/include) + +set(BUILD_DEPENDENCY_PATH "$ENV{PROJECT_ROOT_PATH}/build_dependency") +execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND cmake . -DBUILD_SHARED_LIBS=ON + RESULT_VARIABLE RESULT +) +if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build gtest. ${RESULT}") +endif() +execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND make -j16 + RESULT_VARIABLE RESULT +) +if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build gtest. ${RESULT}") +endif() + +file(GLOB GTEST_SO "${DIR_NAME}/lib/libgtest.so") +file(GLOB GMOCK_SO "${DIR_NAME}/lib/libgmock.so") +file(GLOB GTEST_MAIN_SO "${DIR_NAME}/lib/libgtest_main.so") +file(GLOB GMOCK_MAIN_SO "${DIR_NAME}/lib/libgmock_main.so") +if (NOT GTEST_SO OR NOT GMOCK_SO OR NOT GTEST_MAIN_SO OR NOT GMOCK_MAIN_SO) + message(FATAL_ERROR "Failed to build gtest.") +endif() + +set(${PKG_NAME}_LIBRARIES "${GTEST_SO};${GMOCK_SO};${GTEST_MAIN_SO};${GMOCK_MAIN_SO}") +set(${PKG_NAME}_FOUND TRUE) + +endif() \ No newline at end of file diff --git a/debug/accuracy_tools/cmake/Findmockcpp.cmake b/debug/accuracy_tools/cmake/Findmockcpp.cmake new file mode 100644 index 0000000000000000000000000000000000000000..c360702c187bfdef553a6b67344ea132a18373f6 --- /dev/null +++ b/debug/accuracy_tools/cmake/Findmockcpp.cmake @@ -0,0 +1,45 @@ +set(PACKAGE_VERSION 2.7) + +set(PKG_NAME mockcpp) +set(URL "https://gitee.com/sinojelly/mockcpp/repository/archive/v2.7.zip") +set(SHA256_VALUE "0dc7111c5be9785d0550ed3b68db7e12fd5d7802b7bc6548c52ac7b9e727fcc1") +set(DOWNLOAD_PATH "$ENV{PROJECT_ROOT_PATH}/third_party") +set(DIR_NAME "${DOWNLOAD_PATH}/mockcpp-v2.7") + +if (NOT ${PKG_NAME}_FOUND) + +download_opensource_pkg(${PKG_NAME} + URL ${URL} + SHA256 ${SHA256_VALUE} + DOWNLOAD_PATH ${DOWNLOAD_PATH} +) + +include_directories(${DIR_NAME}/include) +include_directories(${DIR_NAME}/3rdparty) + +execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND cmake . + RESULT_VARIABLE RESULT +) +if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build mockcpp. ${RESULT}") +endif() +execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND make -j16 + RESULT_VARIABLE RESULT +) +if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build mockcpp. ${RESULT}") +endif() + +file(GLOB MOCKCPP_LIB "${DIR_NAME}/src/libmockcpp.a") +if (NOT MOCKCPP_LIB) + message(FATAL_ERROR "Failed to build mockcpp.") +endif() + +set(${PKG_NAME}_LIBRARIES "${MOCKCPP_LIB}") +set(${PKG_NAME}_FOUND TRUE) + +endif() \ No newline at end of file diff --git a/debug/accuracy_tools/cmake/Findnlohmannjson.cmake b/debug/accuracy_tools/cmake/Findnlohmannjson.cmake new file mode 100644 index 0000000000000000000000000000000000000000..0f85cc00a0d30a3896a8f47cac95911929070e33 --- /dev/null +++ b/debug/accuracy_tools/cmake/Findnlohmannjson.cmake @@ -0,0 +1,20 @@ +set(PACKAGE_VERSION 3.10.1) + +set(PKG_NAME nlohmannjson) +set(URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.10.1.zip") +set(SHA256_VALUE "5c7d0a0542431fef628f8dc4c34fd022fe8747ccb577012d58f38672d8747e0d") +set(DOWNLOAD_PATH "$ENV{PROJECT_ROOT_PATH}/third_party") +set(DIR_NAME "${DOWNLOAD_PATH}/JSON-for-Modern-CPP-v3.10.1") + +if (NOT ${PKG_NAME}_FOUND) + +download_opensource_pkg(${PKG_NAME} + URL ${URL} + SHA256 ${SHA256_VALUE} + DOWNLOAD_PATH ${DOWNLOAD_PATH} +) + +include_directories(${DIR_NAME}/include) +set(${PKG_NAME}_FOUND TRUE) + +endif() diff --git a/debug/accuracy_tools/cmake/Findopenssl.cmake b/debug/accuracy_tools/cmake/Findopenssl.cmake new file mode 100644 index 0000000000000000000000000000000000000000..d361095242917df8accbb81a51de65c5ca5ac980 --- /dev/null +++ b/debug/accuracy_tools/cmake/Findopenssl.cmake @@ -0,0 +1,73 @@ +set(PACKAGE_VERSION 1.1.1) + +set(PKG_NAME openssl) +set(URL "https://gitee.com/mirrors/openssl/repository/archive/OpenSSL_1_1_1k.tar.gz") +set(SHA256_VALUE "b92f9d3d12043c02860e5e602e50a73ed21a69947bcc74d391f41148e9f6aa95") +set(DOWNLOAD_PATH "$ENV{PROJECT_ROOT_PATH}/third_party") +set(DIR_NAME "${DOWNLOAD_PATH}/openssl-OpenSSL_1_1_1k") + +if (NOT ${PKG_NAME}_FOUND) + +if (DEFINED USE_LOCAL_FIRST AND "${USE_LOCAL_FIRST}" STREQUAL "True") +find_package(OpenSSL) +if (OpenSSL_FOUND AND OPENSSL_INCLUDE_DIR AND OPENSSL_LIBRARIES) + if (${OPENSSL_VERSION} VERSION_GREATER_EQUAL ${PACKAGE_VERSION}) + message("Found openssl ${OPENSSL_VERSION}, witch is equal or greater than the minimum required version ${PACKAGE_VERSION}. Use it instead.") + set(PACKAGE_VERSION ${PACKAGE_VERSION}) + set(${PKG_NAME}_FOUND TRUE) + include_directories(${OPENSSL_INCLUDE_DIR}) + set(${PKG_NAME}_LIBRARIES ${OPENSSL_LIBRARIES}) + return() + endif() +endif() +endif() + +download_opensource_pkg(${PKG_NAME} + URL ${URL} + SHA256 ${SHA256_VALUE} + DOWNLOAD_PATH ${DOWNLOAD_PATH} +) + +include_directories(${DIR_NAME}/include) +set(BUILD_DEPENDENCY_PATH "$ENV{PROJECT_ROOT_PATH}/build_dependency") +file(GLOB OPENSSL_LIB "${BUILD_DEPENDENCY_PATH}/lib/libssl.a") +file(GLOB CRYPTO_LIB "${BUILD_DEPENDENCY_PATH}/lib/libcrypto.a") +if (OPENSSL_LIB AND CRYPTO_LIB) + set(${PKG_NAME}_FOUND TRUE) + set(${PKG_NAME}_LIBRARIES "${OPENSSL_LIB};${CRYPTO_LIB}") + return() +endif() + +execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND ./config -fPIC no-shared --prefix=${BUILD_DEPENDENCY_PATH} + RESULT_VARIABLE RESULT +) +if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build openssl. ${RESULT}") +endif() + +execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND make -j16 + RESULT_VARIABLE RESULT +) +if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build openssl. ${RESULT}") +endif() + +execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND make install +) + +file(GLOB OPENSSL_LIB "${BUILD_DEPENDENCY_PATH}/lib/libssl.a") +file(GLOB CRYPTO_LIB "${BUILD_DEPENDENCY_PATH}/lib/libcrypto.a") +if (NOT OPENSSL_LIB OR NOT CRYPTO_LIB) + message(FATAL_ERROR "Failed to build openssl.") +endif() + +set(${PKG_NAME}_LIBRARIES "${OPENSSL_LIB};${CRYPTO_LIB}") +set(${PKG_NAME}_FOUND TRUE) + +endif() diff --git a/debug/accuracy_tools/cmake/Findprotobuf.cmake b/debug/accuracy_tools/cmake/Findprotobuf.cmake new file mode 100644 index 0000000000000000000000000000000000000000..4d70515e980f7a921447250fe58400f600419e4c --- /dev/null +++ b/debug/accuracy_tools/cmake/Findprotobuf.cmake @@ -0,0 +1,93 @@ +set(PACKAGE_VERSION 3.13.0) + +set(PKG_NAME protobuf) +set(URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.13.0.tar.gz") +set(SHA256_VALUE "ab9b39e7053a6fb06b01bf75fb6ec6a71a1ada5a5f8e2446f927336e97b9e7bb") +set(DOWNLOAD_PATH "$ENV{PROJECT_ROOT_PATH}/third_party") +set(DIR_NAME "${DOWNLOAD_PATH}/protobuf_source-v3.13.0") + +if (NOT ${PKG_NAME}_FOUND) + +if (DEFINED USE_LOCAL_FIRST AND "${USE_LOCAL_FIRST}" STREQUAL "True") +find_program(PROTOC_EXECUTABLE protoc) +find_package(Protobuf) +if (PROTOC_EXECUTABLE AND Protobuf_FOUND) +execute_process( + COMMAND ${PROTOC_EXECUTABLE} --version + OUTPUT_VARIABLE PROTOC_VERSION_OUTPUT + ERROR_VARIABLE PROTOC_VERSION_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE +) +string(REGEX MATCH "[0-9]+\\.[0-9]+" PROTOC_VERSION ${PROTOC_VERSION_OUTPUT}) +if(${PROTOC_VERSION} VERSION_GREATER_EQUAL ${PACKAGE_VERSION}) + message("Found protoc ${PROTOC_VERSION}, witch is equal or greater than the minimum required version ${PACKAGE_VERSION}. Use it instead.") + set(PACKAGE_VERSION ${PROTOC_VERSION}) + set(${PKG_NAME}_FOUND TRUE) + set(${PKG_NAME}_LIBRARIES ${Protobuf_LIBRARIES}) + set(PROTOC_EXECUTABLE ${PROTOC_EXECUTABLE}) + include_directories(${Protobuf_INCLUDE_DIRS}) + return() +endif() +endif() +endif() + +download_opensource_pkg(${PKG_NAME} + URL ${URL} + SHA256 ${SHA256_VALUE} + DOWNLOAD_PATH ${DOWNLOAD_PATH} +) + +include_directories(${DIR_NAME}/src) +set(BUILD_DEPENDENCY_PATH "$ENV{PROJECT_ROOT_PATH}/build_dependency") +file(GLOB PROTOC_EXECUTABLE "${BUILD_DEPENDENCY_PATH}/bin/protoc") +file(GLOB ${PKG_NAME}_LIBRARIES "${BUILD_DEPENDENCY_PATH}/lib/libprotobuf.a") +if (PROTOC_EXECUTABLE AND ${PKG_NAME}_LIBRARIES) + set(${PKG_NAME}_FOUND TRUE) + set(PROTOC_EXECUTABLE ${PROTOC_EXECUTABLE}) + set(${PKG_NAME}_LIBRARIES ${${PKG_NAME}_LIBRARIES}) + return() +endif() + +execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND ./autogen.sh + RESULT_VARIABLE RESULT +) +if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build protobuf. ${RESULT}") +endif() + +execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND ./configure CFLAGS=-fPIC CXXFLAGS=-fPIC --prefix=${BUILD_DEPENDENCY_PATH} --enable-cpp + RESULT_VARIABLE RESULT +) +if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build protobuf. ${RESULT}") +endif() + +execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND make -j16 + RESULT_VARIABLE RESULT +) +if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build protobuf. ${RESULT}") +endif() + +execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND make install +) + +file(GLOB PROTOC_EXECUTABLE "${BUILD_DEPENDENCY_PATH}/bin/protoc") +file(GLOB ${PKG_NAME}_LIBRARIES "${BUILD_DEPENDENCY_PATH}/lib/libprotobuf.a") +if (NOT PROTOC_EXECUTABLE OR NOT ${PKG_NAME}_LIBRARIES) + message(FATAL_ERROR "Failed to build protobuf.") +endif() + +set(PROTOC_EXECUTABLE ${PROTOC_EXECUTABLE}) +set(${PKG_NAME}_LIBRARIES ${${PKG_NAME}_LIBRARIES}) +set(${PKG_NAME}_FOUND TRUE) + +endif() diff --git a/debug/accuracy_tools/cmake/download_opensource.sh b/debug/accuracy_tools/cmake/download_opensource.sh new file mode 100644 index 0000000000000000000000000000000000000000..725e971621434c32d9954c80b9efe234502eefcc --- /dev/null +++ b/debug/accuracy_tools/cmake/download_opensource.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +if [ "$#" -lt 2 ]; then + echo "Usage: $0 [ ] [ ]" + exit 1 +fi + +url=$1 +path=$2 + +if [ "$#" -ge 3 ]; then + sha256_value=$3 +fi +if [ "$#" -ge 4 ]; then + tag=$4 +fi + +echo "Start to download ${url}..." + +if [ ! -d "$path" ]; then + echo "The specified path does not exist: $path" + exit 1 +fi +cd ${path} + +extension=$(echo "${url}" | awk -F'[./]' '{print $NF}') +if [[ "${extension}" == "gz" || "${extension}" == "zip" ]]; then + fullname="${path}/$(basename "${url}")" + if [[ -e ${fullname} ]]; then + echo "Source ${fullname} is exists, will not download again." + else + curl -L "${url}" -o ${fullname} -k + if [ $? -eq 0 ]; then + echo "Download successful: ${url}" + else + echo "Download failed: ${url}" + exit 1 + fi + fi + + if [[ ! -z "${sha256_value}" ]]; then + sha256data=$(sha256sum "${fullname}" | cut -d' ' -f1) + if [[ "${sha256data}" != "${sha256_value}" ]]; then + echo "Failed to verify sha256: ${url}" + exit 1 + fi + fi + + if [[ "${extension}" == "gz" ]]; then + tar -zxvf ${fullname} -C ./ -n > /dev/null + elif [[ "${extension}" == "zip" ]]; then + unzip -n ${fullname} -d ./ > /dev/null + fi +elif [[ "${extension}" == "git" ]]; then + if [[ -z "${tag}" ]]; then + git clone ${url} + else + git clone ${url} -b "${tag}" + fi + if [ $? -eq 0 ]; then + echo "Download successful: ${url}" + else + echo "Download failed: ${url}" + exit 1 + fi +else + echo "Unknow url ${url}" + exit 1 +fi diff --git a/debug/accuracy_tools/cmake/utils.cmake b/debug/accuracy_tools/cmake/utils.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e3e963d63e99da4e0bb1fd2973051278feb04435 --- /dev/null +++ b/debug/accuracy_tools/cmake/utils.cmake @@ -0,0 +1,45 @@ + +function(download_opensource_pkg pkg_name) + message("start to download ${pkg_name}...") + set(options) + set(oneValueArgs URL SHA256 GIT_TAG DOWNLOAD_PATH DIR_NAME BUILD_CMD) + set(multiValueArgs PATCHES) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if (NOT PKG_URL) + message(FATAL_ERROR "${pkg_name} need URL.") + endif() + if (NOT PKG_DOWNLOAD_PATH) + set(PKG_DOWNLOAD_PATH "${CMAKE_SOURCE_DIR}/../third_party") + endif() + file(MAKE_DIRECTORY ${PKG_DOWNLOAD_PATH}) + + execute_process( + WORKING_DIRECTORY $ENV{PROJECT_ROOT_PATH}/cmake + COMMAND bash download_opensource.sh ${PKG_URL} ${PKG_DOWNLOAD_PATH} ${PKG_SHA256} ${PKG_GIT_TAG} + RESULT_VARIABLE RESULT + ) + if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to download ${pkg_name}(${RESULT}).") + endif() + if (PKG_BUILD_CMD) + execute_process(COMMAND bash -c "cd ${PKG_DOWNLOAD_PATH}/${DIR_NAME};${PKG_BUILD_CMD}") + endif() +endfunction() + +function(compile_protobuf_file output_path) + if (NOT PROTOC_EXECUTABLE) + message(FATAL_ERROR "You shall install protobuf first.") + endif() + file(MAKE_DIRECTORY ${output_path}) + foreach(file ${ARGN}) + get_filename_component(abs_file_path ${file} ABSOLUTE) + get_filename_component(file_name ${file} NAME_WE) + get_filename_component(file_dir ${abs_file_path} PATH) + file(RELATIVE_PATH rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${file_dir}) + execute_process( + COMMAND ${PROTOC_EXECUTABLE} -I${file_dir} --cpp_out=${output_path} ${abs_file_path} + ) + message("Compile protobuf file ${file}") + endforeach() +endfunction() diff --git a/debug/accuracy_tools/msprobe/CMakeLists.txt b/debug/accuracy_tools/msprobe/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..66085a4b0bdb0589f2d90e6f8b23e4c1d6b27c13 --- /dev/null +++ b/debug/accuracy_tools/msprobe/CMakeLists.txt @@ -0,0 +1,5 @@ +add_subdirectory(ccsrc) + +if (DEFINED BUILD_TEST_CASE AND "${BUILD_TEST_CASE}" STREQUAL "True") +add_subdirectory(test) +endif() \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/CMakeLists.txt b/debug/accuracy_tools/msprobe/ccsrc/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2579a3a0e785c0e0ca384b4d52118a5d828249f8 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/CMakeLists.txt @@ -0,0 +1,60 @@ +project(msprobe VERSION 1.0.0 LANGUAGES CXX C) +cmake_minimum_required(VERSION 3.14) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +find_package(cpython MODULE REQUIRED) +find_package(openssl MODULE REQUIRED) +find_package(nlohmannjson MODULE REQUIRED) +find_package(protobuf MODULE REQUIRED) + +if (DEFINED CANN_PATH AND NOT "${CANN_PATH}" STREQUAL "") + file(GLOB_RECURSE DUMP_DATA_PROTOS "${CANN_PATH}/**/dump_data.proto") + if (DUMP_DATA_PROTOS) + list(GET DUMP_DATA_PROTOS 0 DUMP_DATA_PROTO) + file(COPY "${DUMP_DATA_PROTO}" DESTINATION "${CMAKE_CURRENT_SOURCE_DIR}/third_party/ACL/AclDumpMsg.proto") + else() + message("Warning: File dump_data.proto not found.") + endif() +endif() + +set(PROTO_PATH ${CMAKE_CURRENT_SOURCE_DIR}/proto) +file(GLOB_RECURSE PROTO_SRC "*.proto") +compile_protobuf_file( + ${PROTO_PATH} + ${PROTO_SRC} +) + +add_library(_msprobe_c SHARED) + +target_compile_options(_msprobe_c PRIVATE "-Wall") +target_compile_options(_msprobe_c PRIVATE "-fPIC") +target_compile_options(_msprobe_c PRIVATE "-fstack-protector-all") +target_compile_options(_msprobe_c PRIVATE "-ftrapv") +target_compile_options(_msprobe_c PRIVATE "-fstack-check") + +target_link_options(_msprobe_c PRIVATE "-Wl,-z,relor") +target_link_options(_msprobe_c PRIVATE "-Wl,-z,now") +target_link_options(_msprobe_c PRIVATE "-Wl,-z,noexecstack") + +target_link_libraries(_msprobe_c PUBLIC dl) +target_link_libraries(_msprobe_c PUBLIC pthread) +target_link_libraries(_msprobe_c PUBLIC ${cpython_LIBRARIES}) +target_link_libraries(_msprobe_c PUBLIC ${openssl_LIBRARIES}) +target_link_libraries(_msprobe_c PUBLIC ${protobuf_LIBRARIES}) + +if(DEFINED BUILD_TYPE AND "${BUILD_TYPE}" STREQUAL "debug") + target_compile_options(_msprobe_c PRIVATE "-O0") + target_compile_options(_msprobe_c PRIVATE "-g") + target_compile_definitions(_msprobe_c PRIVATE __DEBUG__) +else() + target_compile_options(_msprobe_c PRIVATE "-O2") +endif() + +target_include_directories(_msprobe_c PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) + +file(GLOB_RECURSE SOURCES "*.cpp" "*.cc") +target_sources(_msprobe_c PRIVATE ${SOURCES}) + +install(TARGETS _msprobe_c LIBRARY DESTINATION lib) diff --git a/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.cpp b/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.cpp new file mode 100644 index 0000000000000000000000000000000000000000..99d8128bb8de945ca5989c45a6987885e8b50b9f --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.cpp @@ -0,0 +1,488 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include "include/ErrorCode.hpp" +#include "include/Macro.hpp" +#include "utils/FileUtils.hpp" +#include "base/ErrorInfos.hpp" +#include "DebuggerConfigFieldMap.hpp" +#include "DebuggerConfig.hpp" + +namespace MindStudioDebugger { + +template +DebuggerErrno ParseJsonBaseObj2Var(const nlohmann::json& content, const std::string& field, T& output, + bool mandatory=false) +{ + nlohmann::json::const_iterator iter = content.find(field); + if (iter == content.end()) { + if (mandatory) { + return DebuggerErrno::ERROR_FIELD_NOT_EXISTS; + } else { + return DebuggerErrno::OK; + } + } + + try { + output = iter->get(); + return DebuggerErrno::OK; + } catch (const nlohmann::detail::type_error& e) { + /* 数据类型不匹配异常 */ + return DebuggerErrno::ERROR_INVALID_FORMAT; + } +} + +template +DebuggerErrno ParseJsonStringAndTrans(const nlohmann::json& content, const std::string& field, + const std::map& enum2name, T& output, bool mandatory=false) { + DebuggerErrno ret; + std::string value; + + ret = ParseJsonBaseObj2Var(content, field, value, true); + if (ret == DebuggerErrno::ERROR_FIELD_NOT_EXISTS && !mandatory) { + return DebuggerErrno::OK; + } + + if (ret != DebuggerErrno::OK) { + return ret; + } + + int32_t enumId = GetEnumIdFromName(enum2name, value); + if (enumId == debuggerInvalidEnum) { + return DebuggerErrno::ERROR_UNKNOWN_VALUE; + } + + output = static_cast(enumId); + return DebuggerErrno::OK; +} + +#define PARSE_OPTIONAL_FIELD_CHECK_RET(content, field, output) \ + { \ + if (ParseJsonBaseObj2Var(content, field, output) != DebuggerErrno::OK) { \ + LOG_ERROR(DebuggerErrno::ERROR_UNKNOWN_VALUE, \ + "Field " + std::string(field) + " cannot be parsed."); \ + } \ + } + +#define PARSE_OPTIONAL_FIELD_TRANS_CHECK_RET(content, field, transMap, output) \ + { \ + if (ParseJsonStringAndTrans(content, field, transMap, output) != DebuggerErrno::OK) { \ + LOG_ERROR(DebuggerErrno::ERROR_UNKNOWN_VALUE, \ + "Value of field " + std::string(field) + " is unknown."); \ + } \ + } + +static bool DebuggerCfgParseUIntRangeGetBorder(const std::string& exp, uint32_t& left, uint32_t& right) +{ + if (std::count(exp.begin(), exp.end(), '-') != 1) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_FORMAT, "When using a range expression, it should be formatted as \"a-b\"."); + return false; + } + std::istringstream iss(exp); + char dash; + iss >> left >> dash >> right; + if (iss.fail() || dash != '-') { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_FORMAT, "When using a range expression, it should be formatted as \"a-b\"."); + return false; + } + if (left >= right) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_FORMAT, + "When using a range expression, the left border should be smaller than the right."); + return false; + } + return true; +} + +void DebuggerCfgParseUIntRange(const nlohmann::json& content, const std::string& name, std::vector& range) +{ + if (!content.contains(name)) { + return; + } + + const nlohmann::json& array = content[name]; + if (!array.is_array()) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_FORMAT, name + " should be empty or an array."); + return; + } + + range.clear(); + range.reserve(array.size()); + std::vector> buf; + buf.reserve(array.size()); + uint32_t realLen = 0; + /* a-b表示的范围可能很大,此处为了减少反复申请内存,对于a-b形式先预留空间再解析 */ + for (const auto& element : array) { + if (element.is_number()) { + range.emplace_back(element.get()); + realLen++; + } else if (element.is_string()) { + std::string exp = element.get(); + uint32_t begin, end; + if (!DebuggerCfgParseUIntRangeGetBorder(exp, begin, end)) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_FORMAT, "Failed to parse " + name + "."); + return; + } + realLen += (end - begin + 1); + buf.emplace_back(std::make_pair(begin, end)); + } + } + + constexpr uint32_t maxEleNum = 65536; + if (realLen > maxEleNum) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_FORMAT, + "When using a range expression in " + name + ", maximum of 65536 elements can be expressed."); + return; + } + + if (!buf.empty()) { + range.reserve(realLen); + for (const auto& border : buf) { + for (uint32_t i = border.first; i <= border.second; ++i) { + range.emplace_back(i); + } + } + } + return; +} + +/* 老规则此处只能指定一个task,新规则允许task列表,出于兼容性考虑,此处允许输入string或list格式 */ +void CommonCfgParseTasks(const nlohmann::json& content, std::vector& tasks) +{ + std::vector taskNameList; + std::string taskName; + DebuggerErrno ret; + + ret = ParseJsonBaseObj2Var(content, kTask, taskName, true); + if (ret == DebuggerErrno::ERROR_FIELD_NOT_EXISTS) { + ret = ParseJsonBaseObj2Var>(content, kTasks, taskNameList, true); + } else { + taskNameList.emplace_back(taskName); + } + + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "Value of field task(s) should be string or list."); + return; + } + + for (auto& ele : taskNameList) { + int32_t enumId = GetEnumIdFromName(TaskTypeEnum2Name, ele); + if (enumId == debuggerInvalidEnum) { + LOG_ERROR(DebuggerErrno::ERROR_UNKNOWN_VALUE, "Task " + ele + " is unknown."); + return; + } + if (!ELE_IN_VECTOR(tasks, static_cast(enumId))) { + tasks.emplace_back(static_cast(enumId)); + } + } + return; +} + +constexpr char kRegexPrefix[] = "name-regex("; +constexpr char kRegexSuffix[] = ")"; +constexpr size_t kRegexPrefixLen = sizeof(kRegexPrefix) - 1; +constexpr size_t kRegexSuffixLen = sizeof(kRegexSuffix) - 1; + +void KernelListMatcher::Parse(const std::vector& expressions) +{ + for (auto& expression : expressions) { + size_t len = expression.size(); + if (strncmp(expression.c_str(), kRegexPrefix, kRegexPrefixLen) == 0 && + strncmp(expression.c_str() + (len - kRegexSuffixLen), kRegexSuffix, kRegexSuffixLen) == 0) { + /* name-regex(xxx)表示正则表达式*/ + regexList.emplace_back(expression.substr(kRegexPrefixLen, len - kRegexPrefixLen - kRegexSuffixLen)); + } else { + /* 否则认为是full scope name */ + fullNameList.emplace_back(expression); + } + } +} + +std::vector KernelListMatcher::GenRealKernelList(const char** fullKernelList) const +{ + std::vector output; + /* 返回空列表表示全部dump,返回一个空字符串表示没有匹配上的,都不dump */ + if (this->empty() || fullKernelList == nullptr) { + return output; + } + output = fullNameList; + + for (auto& reg : regexList) { + for (const char** ss = fullKernelList; *ss != nullptr; ++ss) { + if (std::regex_search(*ss, reg)) { + output.emplace_back(*ss); + } + } + } + + if (output.empty()) { + output.emplace_back(""); + LOG_INFO("No kernel matches, so nothing will be dumped."); + } + + return output; +} + +void CommonCfg::Parse(const nlohmann::json& content) +{ + CommonCfgParseTasks(content, tasks); + if (tasks.empty()) { + return; + } + + PARSE_OPTIONAL_FIELD_CHECK_RET(content, kOutputPath, outputPath); + outputPath = FileUtils::GetAbsPath(outputPath); + DebuggerCfgParseUIntRange(content, kRank, rank); + DebuggerCfgParseUIntRange(content, kStep, step); + PARSE_OPTIONAL_FIELD_TRANS_CHECK_RET(content, kLevel, DebuggerLevelEnum2Name, level); + PARSE_OPTIONAL_FIELD_CHECK_RET(content, kSeed, seed); + PARSE_OPTIONAL_FIELD_CHECK_RET(content, kIsDeterministic, isDeterministic); + PARSE_OPTIONAL_FIELD_CHECK_RET(content, kEnableDataloader, enableDataloader); + PARSE_OPTIONAL_FIELD_CHECK_RET(content, kAclConfig, aclConfig); +} + +void DebuggerCfgParseDataMode(const nlohmann::json& content, DebuggerDataDirection& direction, DebuggerDataInOut& inout) +{ + std::vector buf; + bool fw, bw, in, out, all; + + direction = DebuggerDataDirection::DIRECTION_BOTH; + inout = DebuggerDataInOut::INOUT_BOTH; + PARSE_OPTIONAL_FIELD_CHECK_RET(content, kDataMode, buf); + all = static_cast(std::find(buf.begin(), buf.end(), kDataModeAll) != buf.end()); + if (buf.empty() || all) { + return; + } + + fw = static_cast(std::find(buf.begin(), buf.end(), kDirectionForward) != buf.end()); + bw = static_cast(std::find(buf.begin(), buf.end(), kDirectionBackward) != buf.end()); + in = static_cast(std::find(buf.begin(), buf.end(), kInOutInput) != buf.end()); + out = static_cast(std::find(buf.begin(), buf.end(), kInOutOutput) != buf.end()); + + /* 互补项都配或都不配都表示both,因此关注不同的场景就行 */ + if (fw != bw) { + if (fw) { + direction = DebuggerDataDirection::DIRECTION_FORWARD; + } else { + direction = DebuggerDataDirection::DIRECTION_BACKWARD; + } + } + if (in != out) { + if (in) { + inout = DebuggerDataInOut::INOUT_INPUT; + } else { + inout = DebuggerDataInOut::INOUT_OUTPUT; + } + } + return; +} + +void StatisticsCfgParseSummary(const nlohmann::json& content, std::vector& summaryOption) +{ + /* 老规则支持"statistics"或"md5",新规则支持"max"/"min"/"l2norm"/"md5"组合,此处兼容 */ + DebuggerErrno ret; + std::string mode = kStatistics; + std::vector modeListName; + + /* 若无该字段,认为是statistic,因此这里给mode设个默认值 */ + ret = ParseJsonBaseObj2Var(content, kSummaryMode, mode); + if (ret == DebuggerErrno::OK) { + if (mode == kStatistics) { + summaryOption.push_back(DebuggerSummaryOption::MAX); + summaryOption.push_back(DebuggerSummaryOption::MIN); + summaryOption.push_back(DebuggerSummaryOption::MEAN); + summaryOption.push_back(DebuggerSummaryOption::L2NORM); + } else if (mode == kMd5) { + summaryOption.push_back(DebuggerSummaryOption::MD5); + } else { + LOG_ERROR(DebuggerErrno::ERROR_UNKNOWN_VALUE, "Summary mode " + mode + " is unknown."); + } + return; + } + + ret = ParseJsonBaseObj2Var>(content, kSummaryMode, modeListName); + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "Value of field summary_mode should be string or list."); + return; + } + + /* 若有该字段但值为空,认为是statistic */ + if (modeListName.empty()) { + summaryOption.push_back(DebuggerSummaryOption::MAX); + summaryOption.push_back(DebuggerSummaryOption::MIN); + summaryOption.push_back(DebuggerSummaryOption::MEAN); + summaryOption.push_back(DebuggerSummaryOption::L2NORM); + return; + } + + for (auto& ele : modeListName) { + int32_t enumId = GetEnumIdFromName(SummaryOptionEnum2Name, ele); + if (enumId == debuggerInvalidEnum) { + LOG_ERROR(DebuggerErrno::ERROR_UNKNOWN_VALUE, "Summary mode " + ele + " is unknown."); + return; + } + summaryOption.push_back(static_cast(enumId)); + } + + return; +} + +void StatisticsCfg::Parse(const nlohmann::json& content) +{ + std::vector filter; + PARSE_OPTIONAL_FIELD_CHECK_RET(content, kScope, scope); + PARSE_OPTIONAL_FIELD_CHECK_RET(content, kList, filter); + filter.erase(std::remove_if(filter.begin(), filter.end(), + [](const std::string& s) { return s.find_first_not_of(' ') == std::string::npos; }), + filter.end()); + list = std::move(filter); + if (DebuggerConfig::GetInstance().GetDebugLevel() == DebuggerLevel::L2) { + matcher.Parse(list); + } + DebuggerCfgParseDataMode(content, direction, inout); + StatisticsCfgParseSummary(content, summaryOption); +} + +void DumpTensorCfg::Parse(const nlohmann::json& content) +{ + std::vector filter; + PARSE_OPTIONAL_FIELD_CHECK_RET(content, kScope, scope); + PARSE_OPTIONAL_FIELD_CHECK_RET(content, kList, filter); + filter.erase(std::remove_if(filter.begin(), filter.end(), + [](const std::string& s) { return s.find_first_not_of(' ') == std::string::npos; }), + filter.end()); + list = std::move(filter); + if (DebuggerConfig::GetInstance().GetDebugLevel() == DebuggerLevel::L2) { + matcher.Parse(list); + } + DebuggerCfgParseDataMode(content, direction, inout); + PARSE_OPTIONAL_FIELD_TRANS_CHECK_RET(content, kFileFormat, DumpFileFormatEnum2Name, fileFormat); + PARSE_OPTIONAL_FIELD_CHECK_RET(content, kBackwardInput, backwardInput); +} + +void OverflowCheckCfg::Parse(const nlohmann::json& content) +{ + PARSE_OPTIONAL_FIELD_CHECK_RET(content, kOverflowNums, overflowNums); + PARSE_OPTIONAL_FIELD_TRANS_CHECK_RET(content, kCheckMode, OpCheckLevelEnum2Name, checkMode); +} + +void DebuggerConfig::Reset() +{ + LOG_INFO("Reset configuration."); + commonCfg = CommonCfg(); + statisticCfg.reset(); + dumpTensorCfg.reset(); + overflowCheckCfg.reset(); + loaded = false; +} + +void DebuggerConfig::Parse() +{ + std::ifstream cfgFile; + DebuggerErrno ret = FileUtils::OpenFile(cfgFilePath_, cfgFile); + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "Failed to open file " + cfgFilePath_ + "."); + return; + } + + nlohmann::json content; + nlohmann::json::const_iterator iter; + try { + cfgFile >> content; + } catch (const nlohmann::json::parse_error& e) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_FORMAT, "Failed to parse json file " + cfgFilePath_ + "."); + return; + } + + commonCfg.Parse(content); + +#define PARSE_SUBTASK_CONFIG(enumeration, name, member, basetype) \ + do { \ + if (ELE_IN_VECTOR(commonCfg.tasks, enumeration)) { \ + iter = content.find(name); \ + if (iter != content.end()) { \ + member = std::make_shared(); \ + member->Parse(*iter); \ + } \ + } \ + } while (0) + + PARSE_SUBTASK_CONFIG(DebuggerTaskType::TASK_DUMP_STATISTICS, kTaskStatistics, statisticCfg, StatisticsCfg); + PARSE_SUBTASK_CONFIG(DebuggerTaskType::TASK_DUMP_TENSOR, kTaskDumpTensor, dumpTensorCfg, DumpTensorCfg); + PARSE_SUBTASK_CONFIG(DebuggerTaskType::TASK_OVERFLOW_CHECK, kTaskOverflowCheck, overflowCheckCfg, OverflowCheckCfg); + +#undef PARSE_SUBTASK_CONFIG + return; +} + +int32_t DebuggerConfig::LoadConfig(const std::string& framework, const std::string& cfgFilePath) +{ + if (loaded) { + LOG_WARNING(DebuggerErrno::ERROR, "Repeated initialization, which may lead to errors."); + Reset(); + } + + cfgFilePath_ = FileUtils::GetAbsPath(cfgFilePath); + if (cfgFilePath_ == "") { + LOG_ERROR(DebuggerErrno::ERROR_CANNOT_PARSE_PATH, "Cannot parse path " + cfgFilePath + "."); + return -1; + } + + DebuggerErrno ret = FileUtils::CheckFileBeforeRead(cfgFilePath_, "r", FileType::JSON); + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "Config file " + cfgFilePath + " is invalid."); + return -1; + } + + int32_t enumId = GetEnumIdFromName(FrameworkEnum2Name, framework); + if (enumId == debuggerInvalidEnum) { + LOG_ERROR(DebuggerErrno::ERROR_UNKNOWN_VALUE, "Unknown framework " + framework + "."); + return -1; + } + framework_ = static_cast(enumId); + + Parse(); + if (ErrorInfosManager::GetTopErrLevelInDuration() >= DebuggerErrLevel::LEVEL_ERROR) { + LOG_ERROR(DebuggerErrno::ERROR, "Failed to parse config file " + cfgFilePath + "."); + return -1; + } + + CheckConfigValidity(); + if (ErrorInfosManager::GetTopErrLevelInDuration() >= DebuggerErrLevel::LEVEL_ERROR) { + LOG_ERROR(DebuggerErrno::ERROR, "Config file " + cfgFilePath + " is invalid."); + return -1; + } + + loaded = true; + return 0; +} + +bool DebuggerConfig::CheckConfigValidity() +{ + if (commonCfg.tasks.empty()) { + LOG_WARNING(DebuggerErrno::ERROR, "No task configured. MsProbe will do nothing."); + return true; + } + + /* 解析时已做格式有效性校验,数值有效性放在python前端校验 */ + return true; +} + +} diff --git a/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.hpp b/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.hpp new file mode 100644 index 0000000000000000000000000000000000000000..15ea9e6fda47c0380d9718f135a1baf0658788eb --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.hpp @@ -0,0 +1,265 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "include/Macro.hpp" + +namespace MindStudioDebugger { + +constexpr int debuggerInvalidEnum = -1; + +enum class DebuggerFramework { + FRAMEWORK_PYTORCH, + FRAMEWORK_MINDSPORE, + + FRAMEWORK_BUTT, +}; + +enum class DebuggerTaskType { + TASK_DUMP_TENSOR, + TASK_DUMP_STATISTICS, + TASK_OVERFLOW_CHECK, + TASK_FREE_BENCHMARK, + TASK_RUN_UT, + TASK_GRAD_PROBE, + + TASK_BUTT = debuggerInvalidEnum, +}; + +enum class DebuggerDevType { + DEVICE_TYPE_NPU, + DEVICE_TYPE_GPU, + DEVICE_TYPE_CPU, + + DEVICE_TYPE_BUTT = debuggerInvalidEnum, +}; + +enum class DebuggerLevel { + L0, + L1, + L2, + MIX, + + LEVEL_BUTT = debuggerInvalidEnum, +}; + +enum class DebuggerDataDirection { + DIRECTION_FORWARD, + DIRECTION_BACKWARD, + DIRECTION_BOTH, + + DIRECTION_BUTT = debuggerInvalidEnum, +}; + +enum class DebuggerDataInOut { + INOUT_INPUT, + INOUT_OUTPUT, + INOUT_BOTH, + + INOUT_BUTT = debuggerInvalidEnum, +}; + +enum class DebuggerDumpFileFormat { + FILE_FORMAT_BIN, + FILE_FORMAT_NPY, + + FILE_FORMAT_BUTT = debuggerInvalidEnum, +}; + +enum class DebuggerOpCheckLevel { + CHECK_LEVEL_AICORE, + CHECK_LEVEL_ATOMIC, + CHECK_LEVEL_ALL, + + CHECK_LEVEL_BUTT = debuggerInvalidEnum, +}; + +enum class DebuggerSummaryOption { + MAX, + MIN, + MEAN, + L2NORM, + NAN_CNT, + NEG_INF_CNT, + POS_INF_CNT, + MD5, + + SUMMARY_BUTT = debuggerInvalidEnum, +}; + +class KernelListMatcher { +public: + KernelListMatcher() = default; + ~KernelListMatcher() = default; + + void Parse(const std::vector& expressions); + std::vector GenRealKernelList(const char** fullKernelList) const; + + inline bool empty() const {return fullNameList.empty() && regexList.empty();} + inline bool needAllKernels() const {return !regexList.empty();} + +private: + std::vector fullNameList; + std::vector regexList; +}; + +/* 说明:config类作为基础的配置解析查询类,对外应该是只读的,外部仅能通过Parse接口解析配置文件,而不应该直接修改配置字段,此处用以下方式防止外部误操作 + * 1、外部统一调用单例类DebuggerConfig的Parse解析配置文件,无法创建子配置类并调用其Parse函数 + * 2、子配置类通过添加DebuggerConfig为友元类允许其调用子配置类的Parse + * 3、DebuggerConfig对外提供获取子配置类的方法,返回的是const类型指针,实现外部只读(而非将成员变量都写为private并提供get函数) + */ +class DebuggerConfig; + +class CommonCfg { +public: + friend class DebuggerConfig; + CommonCfg() = default; + ~CommonCfg() = default; + + std::vector tasks; + std::string outputPath{"./output"}; + std::vector rank; + std::vector step; + DebuggerLevel level{DebuggerLevel::L1}; + int32_t seed{1234}; + bool isDeterministic{false}; + bool enableDataloader{false}; + std::string aclConfig; + +private: + void Parse(const nlohmann::json &content); +}; + +class StatisticsCfg { +public: + friend class DebuggerConfig; + StatisticsCfg() = default; + ~StatisticsCfg() = default; + + std::vector scope; + std::vector list; + KernelListMatcher matcher; + DebuggerDataDirection direction{DebuggerDataDirection::DIRECTION_BOTH}; + DebuggerDataInOut inout{DebuggerDataInOut::INOUT_BOTH}; + std::vector summaryOption; + +private: + void Parse(const nlohmann::json &content); +}; + +class DumpTensorCfg { +public: + friend class DebuggerConfig; + DumpTensorCfg() = default; + ~DumpTensorCfg() = default; + + std::vector scope; + std::vector list; + KernelListMatcher matcher; + DebuggerDataDirection direction{DebuggerDataDirection::DIRECTION_BOTH}; + DebuggerDataInOut inout{DebuggerDataInOut::INOUT_BOTH}; + DebuggerDumpFileFormat fileFormat{DebuggerDumpFileFormat::FILE_FORMAT_NPY}; + std::vector backwardInput; + bool onlineRunUt{false}; + std::string nfsPath; + std::string tlsPath; + std::string host; + int32_t port{-1}; +private: + void Parse(const nlohmann::json &content); +}; + +class OverflowCheckCfg { +public: + friend class DebuggerConfig; + OverflowCheckCfg() = default; + ~OverflowCheckCfg() = default; + + uint32_t overflowNums{1}; + DebuggerOpCheckLevel checkMode{DebuggerOpCheckLevel::CHECK_LEVEL_ALL}; + +private: + void Parse(const nlohmann::json &content); +}; + + +class DebuggerConfig { + +public: + static DebuggerConfig& GetInstance() { + static DebuggerConfig instance_; + return instance_; + } + + int32_t LoadConfig(const std::string& framework, const std::string& cfgFilePath); + void Reset(); + + bool IsCfgLoaded() const {return loaded;} + DebuggerFramework GetFramework() const {return framework_;} + const std::vector& GetTaskList() const {return commonCfg.tasks;} + const std::string& GetOutputPath() const {return commonCfg.outputPath;} + const std::vector& GetRankRange() const {return commonCfg.rank;}; + const std::vector& GetStepRange() const {return commonCfg.step;}; + DebuggerLevel GetDebugLevel() const {return commonCfg.level;} + int32_t GetRandSeed() const {return commonCfg.seed;} + bool IsDeterministic() const {return commonCfg.isDeterministic;} + bool IsDataloaderEnable() const {return commonCfg.enableDataloader;} + std::string GetAclConfigPath() const {return commonCfg.aclConfig;} + + std::shared_ptr GetStatisticsCfg() const + {return std::const_pointer_cast(statisticCfg);} + std::shared_ptr GetDumpTensorCfg() const + {return std::const_pointer_cast(dumpTensorCfg);} + std::shared_ptr GetOverflowCheckCfg() const + {return std::const_pointer_cast(overflowCheckCfg);} + + bool IsRankHits(uint32_t rankId) const + {return commonCfg.rank.empty() || ELE_IN_VECTOR(commonCfg.rank, rankId);} + bool IsStepHits(uint32_t stepId) const + {return commonCfg.step.empty() || ELE_IN_VECTOR(commonCfg.step, stepId);} + +private: + DebuggerConfig() = default; + ~DebuggerConfig() = default; + explicit DebuggerConfig(const DebuggerConfig &obj) = delete; + DebuggerConfig& operator=(const DebuggerConfig &obj) = delete; + explicit DebuggerConfig(DebuggerConfig &&obj) = delete; + DebuggerConfig& operator=(DebuggerConfig &&obj) = delete; + + void Parse(); + bool CheckConfigValidity(); + + DebuggerFramework framework_; + std::string cfgFilePath_; + bool loaded{false}; + CommonCfg commonCfg; + std::shared_ptr statisticCfg{nullptr}; + std::shared_ptr dumpTensorCfg{nullptr}; + std::shared_ptr overflowCheckCfg{nullptr}; +}; + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfigFieldMap.hpp b/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfigFieldMap.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d98e3eb7748de02476871c8508145fdf3f05dd41 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfigFieldMap.hpp @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "DebuggerConfig.hpp" + +namespace MindStudioDebugger { + +constexpr const char* kFramework = "framework"; +constexpr const char* kFrameworkPyTorch = "PyTorch"; +constexpr const char* kFrameworkMindSpore = "MindSpore"; + +constexpr const char* kTaskStatistics = "statistics"; +constexpr const char* kTaskDumpTensor = "tensor"; +constexpr const char* kTaskOverflowCheck = "overflow_check"; + +constexpr const char* kLevel0 = "L0"; +constexpr const char* kLevel1 = "L1"; +constexpr const char* kLevel2 = "L2"; +constexpr const char* kLevelMix = "mix"; + +constexpr const char* kDirectionForward = "forward"; +constexpr const char* kDirectionBackward = "backward"; +constexpr const char* kDirectionBoth = "both"; +constexpr const char* kInOutInput = "input"; +constexpr const char* kInOutOutput = "output"; +constexpr const char* kInOutBoth = "both"; +constexpr const char* kDataModeAll = "all"; + +constexpr const char* kFreeBenchmarkHandlerCheck = "check"; +constexpr const char* kFreeBenchmarkHandlerFix = "fix"; + +constexpr const char* kDumpFileFormatBin = "bin"; +constexpr const char* kDumpFileFormatNpy = "npy"; + +constexpr const char* kOpCheckLevelAiCore = "aicore"; +constexpr const char* kOpCheckLevelAtomic = "atomic"; +constexpr const char* kOpCheckLevelAll = "all"; + +constexpr const char* kTask = "task"; +constexpr const char* kTasks = "tasks"; +constexpr const char* kOutputPath = "dump_path"; +constexpr const char* kRank = "rank"; +constexpr const char* kStep = "step"; +constexpr const char* kLevel = "level"; +constexpr const char* kSeed = "seed"; +constexpr const char* kIsDeterministic = "is_deterministic"; +constexpr const char* kEnableDataloader = "enable_dataloader"; +constexpr const char* kAclConfig = "acl_config"; + +constexpr const char* kScope = "scope"; +constexpr const char* kList = "list"; + +constexpr const char* kDataMode = "data_mode"; +constexpr const char* kSummaryMode = "summary_mode"; +constexpr const char* kFileFormat = "file_format"; +constexpr const char* kOverflowNums = "overflow_nums"; +constexpr const char* kCheckMode = "check_mode"; +constexpr const char* kBackwardInput = "backward_input"; + +constexpr const char* kStatistics = "statistics"; +constexpr const char* kMd5 = "md5"; +constexpr const char* kMax = "max"; +constexpr const char* kMin = "min"; +constexpr const char* kMean = "mean"; +constexpr const char* kL2Norm = "l2norm"; +constexpr const char* kNanCount = "nan count"; +constexpr const char* kNegativeInfCount = "negative inf count"; +constexpr const char* kPositiveInfCount = "positive inf count"; + +const std::map FrameworkEnum2Name = { + {static_cast(DebuggerFramework::FRAMEWORK_PYTORCH), kFrameworkPyTorch}, + {static_cast(DebuggerFramework::FRAMEWORK_MINDSPORE), kFrameworkMindSpore}, +}; + +const std::map TaskTypeEnum2Name = { + {static_cast(DebuggerTaskType::TASK_DUMP_TENSOR), kTaskDumpTensor}, + {static_cast(DebuggerTaskType::TASK_DUMP_STATISTICS), kTaskStatistics}, + {static_cast(DebuggerTaskType::TASK_OVERFLOW_CHECK), kTaskOverflowCheck}, +}; + +const std::map DebuggerLevelEnum2Name = { + {static_cast(DebuggerLevel::L0), kLevel0}, + {static_cast(DebuggerLevel::L1), kLevel1}, + {static_cast(DebuggerLevel::L2), kLevel2}, + {static_cast(DebuggerLevel::MIX), kLevelMix}, +}; + +const std::map DataDirectionEnum2Name = { + {static_cast(DebuggerDataDirection::DIRECTION_FORWARD), kDirectionForward}, + {static_cast(DebuggerDataDirection::DIRECTION_BACKWARD), kDirectionBackward}, + {static_cast(DebuggerDataDirection::DIRECTION_BOTH), kDirectionBoth}, +}; + +const std::map DataInOutEnum2Name = { + {static_cast(DebuggerDataInOut::INOUT_INPUT), kInOutInput}, + {static_cast(DebuggerDataInOut::INOUT_OUTPUT), kInOutOutput}, + {static_cast(DebuggerDataInOut::INOUT_BOTH), kInOutBoth}, +}; + +const std::map DumpFileFormatEnum2Name = { + {static_cast(DebuggerDumpFileFormat::FILE_FORMAT_BIN), kDumpFileFormatBin}, + {static_cast(DebuggerDumpFileFormat::FILE_FORMAT_NPY), kDumpFileFormatNpy}, +}; + +const std::map OpCheckLevelEnum2Name = { + {static_cast(DebuggerOpCheckLevel::CHECK_LEVEL_AICORE), kOpCheckLevelAiCore}, + {static_cast(DebuggerOpCheckLevel::CHECK_LEVEL_ATOMIC), kOpCheckLevelAtomic}, + {static_cast(DebuggerOpCheckLevel::CHECK_LEVEL_ALL), kOpCheckLevelAll}, +}; + +const std::map SummaryOptionEnum2Name = { + {static_cast(DebuggerSummaryOption::MAX), kMax}, + {static_cast(DebuggerSummaryOption::MIN), kMin}, + {static_cast(DebuggerSummaryOption::MEAN), kMean}, + {static_cast(DebuggerSummaryOption::NAN_CNT), kNanCount}, + {static_cast(DebuggerSummaryOption::NEG_INF_CNT), kNegativeInfCount}, + {static_cast(DebuggerSummaryOption::POS_INF_CNT), kPositiveInfCount}, + {static_cast(DebuggerSummaryOption::L2NORM), kL2Norm}, + + {static_cast(DebuggerSummaryOption::MD5), kMd5}, +}; + +inline int32_t GetEnumIdFromName(const std::map& enum2name, const std::string& name) +{ + for (auto iter = enum2name.begin(); iter != enum2name.end(); iter++) { + if (iter->second == name) { + return iter->first; + } + } + return debuggerInvalidEnum; +} + +inline std::string GetNameFromEnumId(const std::map& enum2name, int32_t id) +{ + auto iter = enum2name.find(id); + if (iter == enum2name.end()) { + return "UNKNOWN"; + } + return iter->second; +} + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/base/Environment.cpp b/debug/accuracy_tools/msprobe/ccsrc/base/Environment.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3a31e03cf898901767e3c658b993edc14b76e35a --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/base/Environment.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/CPythonUtils.hpp" +#include "DebuggerConfig.hpp" +#include "Environment.hpp" + +namespace MindStudioDebugger { +namespace Environment { + +static int32_t GetRankID_PT() +{ + /* if torch.distributed.is_initialized(): + * return torch.distributed.get_rank() + */ + CPythonUtils::PythonObject torch = CPythonUtils::PythonObject::Import("torch"); + if (!torch.IsModule()) { + return -1; + } + + CPythonUtils::PythonObject distributed = torch.Get("distributed"); + if (distributed.IsNone()) { + return -1; + } + + if (!distributed.Get("is_initialized").Call()) { + return -1; + } + + CPythonUtils::PythonObject rank = distributed.Get("get_rank").Call(); + int32_t id; + if (rank.To(id) != 0) { + return -1; + } + return id; +} + +static int32_t GetRankID_MS() +{ + constexpr const char* kRankId = "RANK_ID"; + const char* rankIdEnv = getenv(kRankId); + if (rankIdEnv == nullptr) { + return -1; + } + + std::string rankId(rankIdEnv); + std::istringstream iss(rankId); + int32_t id = -1; + if (!(iss >> id) || id < 0) { + return -1; + } + + return id; +} + +int32_t GetRankID() +{ + if (!DebuggerConfig::GetInstance().IsCfgLoaded()) { + return -1; + } + + static int32_t id = -1; + if (id >= 0) { + return id; + } + + if (DebuggerConfig::GetInstance().GetFramework() == DebuggerFramework::FRAMEWORK_PYTORCH) { + id = GetRankID_PT(); + } else if (DebuggerConfig::GetInstance().GetFramework() == DebuggerFramework::FRAMEWORK_MINDSPORE) { + id = GetRankID_MS(); + } + + return id; +} + +} +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/base/Environment.hpp b/debug/accuracy_tools/msprobe/ccsrc/base/Environment.hpp new file mode 100644 index 0000000000000000000000000000000000000000..187c6f23d32bf90602fad93765f7e916a412fb1b --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/base/Environment.hpp @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace MindStudioDebugger { +namespace Environment { + +/* -1表示获取失败或者还未初始化 */ +int32_t GetRankID(); + +} +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/base/ErrorInfos.cpp b/debug/accuracy_tools/msprobe/ccsrc/base/ErrorInfos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b07554a9fe10609ab4fa03357877b2f7630bd55e --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/base/ErrorInfos.cpp @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "utils/FileUtils.hpp" +#include "ErrorInfos.hpp" + +namespace MindStudioDebugger { + +static std::mutex errInfoMtx; +static std::ofstream logOfs; +DebuggerErrLevel ErrorInfosManager::topLevel = DebuggerErrLevel::LEVEL_NONE; +DebuggerErrLevel ErrorInfosManager::threshold = DebuggerErrLevel::LEVEL_INFO; + +static std::map ErrorLevelString = { + {DebuggerErrLevel::LEVEL_CRITICAL, "CRITICAL"}, + {DebuggerErrLevel::LEVEL_ERROR, "ERROR"}, + {DebuggerErrLevel::LEVEL_WARNING, "WARNING"}, + {DebuggerErrLevel::LEVEL_INFO, "INFO"}, + {DebuggerErrLevel::LEVEL_DEBUG, "DEBUG"}, + {DebuggerErrLevel::LEVEL_NONE, "NONE"}, +}; + +static std::map ErrnoString = { + {DebuggerErrno::OK, "OK"}, + {DebuggerErrno::ERROR, "ERROR"}, + + {DebuggerErrno::ERROR_FILE_NOT_EXISTS, "FILE_NOT_EXISTS"}, + {DebuggerErrno::ERROR_FILE_ALREADY_EXISTS, "FILE_ALREADY_EXISTS"}, + {DebuggerErrno::ERROR_FAILED_TO_OPEN_FILE, "FAILED_TO_OPEN_FILE"}, + {DebuggerErrno::ERROR_FAILED_TO_WRITE_FILE, "FAILED_TO_WRITE_FILE"}, + {DebuggerErrno::ERROR_DIR_NOT_EXISTS, "DIR_NOT_EXISTS"}, + {DebuggerErrno::ERROR_PERMISSION_DENINED, "PERMISSION_DENINED"}, + {DebuggerErrno::ERROR_NOT_ALLOW_SOFTLINK, "NOT_ALLOW_SOFTLINK"}, + {DebuggerErrno::ERROR_ILLEGAL_FILE_TYPE, "ILLEGAL_FILE_TYPE"}, + {DebuggerErrno::ERROR_PATH_TOO_LOOG, "PATH_TOO_LOOG"}, + {DebuggerErrno::ERROR_PATH_TOO_DEEP, "PATH_TOO_DEEP"}, + {DebuggerErrno::ERROR_PATH_CONTAINS_INVALID_CHAR, "PATH_CONTAINS_INVALID_CHAR"}, + {DebuggerErrno::ERROR_FILE_TOO_LARGE, "FILE_TOO_LARGE"}, + {DebuggerErrno::ERROR_UNKNOWN_FILE_SUFFIX, "UNKNOWN_FILE_SUFFIX"}, + {DebuggerErrno::ERROR_CANNOT_PARSE_PATH, "CANNOT_PARSE_PATH"}, + + {DebuggerErrno::ERROR_INVALID_OPERATION, "INVALID_OPERATION"}, + {DebuggerErrno::ERROR_INVALID_FORMAT, "INVALID_FORMAT"}, + {DebuggerErrno::ERROR_INVALID_VALUE, "INVALID_VALUE"}, + {DebuggerErrno::ERROR_UNKNOWN_FIELD, "UNKNOWN_FIELD"}, + {DebuggerErrno::ERROR_UNKNOWN_VALUE, "UNKNOWN_VALUE"}, + {DebuggerErrno::ERROR_UNKNOWN_TRANS, "UNKNOWN_TRANS"}, + {DebuggerErrno::ERROR_FIELD_NOT_EXISTS, "FIELD_NOT_EXISTS"}, + {DebuggerErrno::ERROR_VALUE_OVERFLOW, "VALUE_OVERFLOW"}, + + {DebuggerErrno::ERROR_NO_MEMORY, "NO_MEMORY"}, + {DebuggerErrno::ERROR_BUFFER_OVERFLOW, "BUFFER_OVERFLOW"}, + {DebuggerErrno::ERROR_SYSCALL_FAILED, "SYSCALL_FAILED"}, + {DebuggerErrno::ERROR_OPERATION_FAILED, "OPERATION_FAILED"}, + + {DebuggerErrno::ERROR_DEPENDENCY_NOT_FIND, "DEPENDENCY_NOT_FIND"}, + {DebuggerErrno::ERROR_EXTERNAL_API_ERROR, "EXTERNAL_API_ERROR"}, +}; + +void ErrorInfosManager::LogErrorInfo(DebuggerErrLevel level, DebuggerErrno errId, const std::string& info) +{ + if (level < threshold) { + return; + } + + std::lock_guard lk(errInfoMtx); + std::ostream& output = logOfs.is_open() ? logOfs : std::cout; + output << "[" << ErrorLevelString[level] << "]"; + if (errId != DebuggerErrno::NONE) { + output << "[" << ErrnoString[errId] << "]"; + } + output << info << std::endl; + + if (level > topLevel) { + topLevel = level; + } + + return; +} + +DebuggerErrLevel ErrorInfosManager::GetTopErrLevelInDuration() +{ + std::lock_guard lk(errInfoMtx); + DebuggerErrLevel ret = topLevel; + topLevel = DebuggerErrLevel::LEVEL_NONE; + return ret; +} + +void ErrorInfosManager::SetLogPath(const std::string& path) +{ + std::lock_guard lk(errInfoMtx); + if (logOfs.is_open()) { + logOfs.close(); + } + + if (path.empty()) { + return; + } + + FileUtils::OpenFile(path, logOfs); +} + +__attribute__((constructor)) void InitDebuggerThreshold() +{ + const char* msprobeLogLevelEnv = getenv("MSPROBE_LOG_LEVEL"); + if (msprobeLogLevelEnv == nullptr) { + return; + } + + int msprobeLogLevel = 1; + try { + msprobeLogLevel = std::stoi(msprobeLogLevelEnv); + } catch (const std::exception& e) { + return; + } + + if (msprobeLogLevel >= static_cast(DebuggerErrLevel::LEVEL_DEBUG) && + msprobeLogLevel <= static_cast(DebuggerErrLevel::LEVEL_CRITICAL)) { + ErrorInfosManager::SetLogThreshold(static_cast(msprobeLogLevel)); + } +} + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/base/ErrorInfos.hpp b/debug/accuracy_tools/msprobe/ccsrc/base/ErrorInfos.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6c740a6a36cfd7692b793dfa7625789771731289 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/base/ErrorInfos.hpp @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include "include/ErrorCode.hpp" + +namespace MindStudioDebugger { + +enum class DebuggerErrLevel { + LEVEL_NONE = -1, /* 无 */ + LEVEL_DEBUG = 0, /* 仅作为调试信息,不影响功能 */ + LEVEL_INFO, /* 用户需要感知的信息,一般不影响功能 */ + LEVEL_WARNING, /* 告警,可能会影响部分功能,但基础功能还能继续运行 */ + LEVEL_ERROR, /* 功能发生错误,本模块无法继续正常执行 */ + LEVEL_CRITICAL, /* 系统级严重错误,需要立即强制停止程序执行,无法屏蔽 */ +}; + +class ErrorInfosManager { +public: + static void LogErrorInfo(DebuggerErrLevel level, DebuggerErrno errId, const std::string& info); + static DebuggerErrLevel GetTopErrLevelInDuration(); + static void SetLogPath(const std::string& path); + static void SetLogThreshold(DebuggerErrLevel t) { threshold = t; } +private: + static DebuggerErrLevel topLevel; + static DebuggerErrLevel threshold; +}; + +inline void CleanErrorInfoCache() { + ErrorInfosManager::GetTopErrLevelInDuration(); +} + +#ifdef __DEBUG__ + +#define SOURCE_CODE_INFO \ + ("[" + std::string(__FILE__) + ":" + std::to_string(__LINE__) + " @ " + std::string(__FUNCTION__) + "]:") +#define LOG_CRITICAL(errid, msg) \ + ErrorInfosManager::LogErrorInfo(DebuggerErrLevel::LEVEL_CRITICAL, errid, SOURCE_CODE_INFO + (msg)) +#define LOG_ERROR(errid, msg) \ + ErrorInfosManager::LogErrorInfo(DebuggerErrLevel::LEVEL_ERROR, errid, SOURCE_CODE_INFO + (msg)) +#define LOG_WARNING(errid, msg) \ + ErrorInfosManager::LogErrorInfo(DebuggerErrLevel::LEVEL_WARNING, errid, SOURCE_CODE_INFO + (msg)) +#define LOG_INFO(msg) \ + ErrorInfosManager::LogErrorInfo(DebuggerErrLevel::LEVEL_INFO, DebuggerErrno::NONE, SOURCE_CODE_INFO + (msg)) +#define LOG_DEBUG(msg) \ + ErrorInfosManager::LogErrorInfo(DebuggerErrLevel::LEVEL_DEBUG, DebuggerErrno::NONE, SOURCE_CODE_INFO + (msg)) +#define DEBUG_FUNC_TRACE() \ + ErrorInfosManager::LogErrorInfo(DebuggerErrLevel::LEVEL_DEBUG, DebuggerErrno::NONE, \ + "TRACE: enter " + std::string(__FUNCTION__)) + +#else + +#define LOG_CRITICAL(errid, msg) ErrorInfosManager::LogErrorInfo(DebuggerErrLevel::LEVEL_CRITICAL, errid, msg) +#define LOG_ERROR(errid, msg) ErrorInfosManager::LogErrorInfo(DebuggerErrLevel::LEVEL_ERROR, errid, msg) +#define LOG_WARNING(errid, msg) ErrorInfosManager::LogErrorInfo(DebuggerErrLevel::LEVEL_WARNING, errid, msg) +#define LOG_INFO(msg) ErrorInfosManager::LogErrorInfo(DebuggerErrLevel::LEVEL_INFO, DebuggerErrno::NONE, msg) +#define LOG_DEBUG(msg) +#define DEBUG_FUNC_TRACE() + +#endif + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d6620e62e951df9e9d558cb04ae9e5ee1140bbb7 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.cpp @@ -0,0 +1,767 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include "include/Macro.hpp" +#include "utils/FileUtils.hpp" +#include "utils/FileOperation.hpp" +#include "utils/DataUtils.hpp" +#include "utils/MathUtils.hpp" +#include "core/AclTensor.hpp" +#include "base/ErrorInfos.hpp" +#include "proto/AclDumpMsg.pb.h" +#include "AclDumpDataProcessor.hpp" + +namespace MindStudioDebugger { + +namespace AclDumpMsg = toolkit::dumpdata; + +constexpr size_t kDhaAtomicAddInfoSize = 128; +constexpr size_t kL2AtomicAddInfoSize = 128; +constexpr size_t kAiCoreInfoSize = 256; +constexpr size_t kDhaAtomicAddStatusSize = 256; +constexpr size_t kL2AtomicAddStatusSize = 256; +constexpr size_t kUint64Size = sizeof(uint64_t); +constexpr const char* debugFileSign = "Opdebug.Node_OpDebug."; + +constexpr const char* kStatsHeaderInout = "Input/Output"; +constexpr const char* kStatsHeaderId = "Index"; +constexpr const char* kStatsHeaderDataSize = "Data Size"; +constexpr const char* kStatsHeaderDataType = "Data Type"; +constexpr const char* kStatsHeaderFormat = "Format"; +constexpr const char* kStatsHeaderShape = "Shape"; +constexpr const char* kStatsHeaderMax = "Max Value"; +constexpr const char* kStatsHeaderMin = "Min Value"; +constexpr const char* kStatsHeaderAvg = "Avg Value"; +constexpr const char* kStatsHeaderL2Norm = "L2 Norm Value"; +constexpr const char* kStatsHeaderMD5 = "MD5 Value"; +constexpr const char* kStatsHeaderNan = "Nan Count"; +constexpr const char* kStatsHeaderNegInf = "Negative Inf Count"; +constexpr const char* kStatsHeaderPosInf = "Positive Inf Count"; +constexpr const char* kRankId = "RANK_ID"; +constexpr const char* kDigitalNumbers = "0123456789"; + +static const std::map summaryOptionHeaderStrMap = { + {DebuggerSummaryOption::MAX, kStatsHeaderMax}, + {DebuggerSummaryOption::MIN, kStatsHeaderMin}, + {DebuggerSummaryOption::MEAN, kStatsHeaderAvg}, + {DebuggerSummaryOption::L2NORM, kStatsHeaderL2Norm}, + {DebuggerSummaryOption::NAN_CNT, kStatsHeaderNan}, + {DebuggerSummaryOption::NEG_INF_CNT, kStatsHeaderNegInf}, + {DebuggerSummaryOption::POS_INF_CNT, kStatsHeaderPosInf}, + {DebuggerSummaryOption::MD5, kStatsHeaderMD5}, +}; + +class AclTensorStats { +public: + AclTensorStats() = default; + explicit AclTensorStats(const std::map& input) : stats(input) {} + ~AclTensorStats() = default; + + std::string& operator[](DebuggerSummaryOption opt) { return stats[opt]; } + std::string GetCsvHeader() const; + std::string GetCsvValue() const; + +private: + std::map stats; +}; + +std::string AclTensorStats::GetCsvHeader() const +{ + std::string ret(""); + for (auto it = stats.begin(); it != stats.end(); it++) { + ret.append(summaryOptionHeaderStrMap.at(it->first)); + ret.append(","); + } + + if (!ret.empty()) { + ret.pop_back(); + } + return ret; +} + +std::string AclTensorStats::GetCsvValue() const +{ + std::string ret(""); + for (auto it = stats.begin(); it != stats.end(); it++) { + ret.append(it->second); + ret.append(","); + } + + if (!ret.empty()) { + ret.pop_back(); + } + return ret; +} + +AclDumpDataProcessor::~AclDumpDataProcessor() +{ + while (!buffer.empty()) { + delete buffer.front(); + buffer.pop(); + } +} + +std::string AclDumpDataProcessor::ToString() const +{ + return "AclDumpDataProcessor(path=" + dumpPath + ",completed=" + std::to_string(completed) + ",len=" + + std::to_string(totalLen) + ")"; +} + +DebuggerErrno AclDumpDataProcessor::PushData(const acldumpChunk *chunk) +{ + DEBUG_FUNC_TRACE(); + if (completed) { + LOG_WARNING(DebuggerErrno::ERROR_INVALID_OPERATION, + ToString() + " receive data when completed. Some errors may occur."); + return DebuggerErrno::ERROR_INVALID_OPERATION; + } + + /* 防止最后一包处理出错导致processor残留,此处先设置完成标记位 */ + if (chunk->isLastChunk) { + completed = true; + } + + size_t len = chunk->bufLen; + /* 防止正负翻转 */ + if (SIZE_MAX - len < totalLen || totalLen + len > kMaxDataLen || len == 0) { + LOG_ERROR(DebuggerErrno::ERROR_BUFFER_OVERFLOW, ToString() + ": buffer overflow(cached size " + + std::to_string(totalLen) + ", receiving size " + std::to_string(len) + ")."); + errorOccurred = true; + return DebuggerErrno::ERROR_BUFFER_OVERFLOW; + } + + std::vector *p = new std::vector(len); + if (p == nullptr) { + LOG_ERROR(DebuggerErrno::ERROR_NO_MEMORY, "Acl dump data processor(" + dumpPath + "): Alloc failed(" + + std::to_string(len) + " bytes)."); + errorOccurred = true; + return DebuggerErrno::ERROR_NO_MEMORY; + } + + if (memcpy(p->data(), chunk->dataBuf, len) == nullptr) { + LOG_ERROR(DebuggerErrno::ERROR_SYSCALL_FAILED, ToString() + ": Failed to copy data;"); + delete p; + errorOccurred = true; + return DebuggerErrno::ERROR_SYSCALL_FAILED; + } + + buffer.push(p); + totalLen += len; + if (!chunk->isLastChunk) { + return DebuggerErrno::OK; + } + + completed = true; + DebuggerErrno ret = ConcatenateData(); + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "Acl dump data processor(" + dumpPath + "): Failed to concatenate data."); + errorOccurred = true; + return ret; + } + LOG_DEBUG(ToString() + " is completed."); + + return DebuggerErrno::OK; +} + +DebuggerErrno AclDumpDataProcessor::ConcatenateData() +{ + DEBUG_FUNC_TRACE(); + if (!completed) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_OPERATION, "Acl dump data processor(" + dumpPath + + "): Data is incomplete."); + return DebuggerErrno::ERROR_INVALID_OPERATION; + } + + if (buffer.empty()) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_VALUE, "Acl dump data processor(" + dumpPath + "): No data."); + return DebuggerErrno::ERROR_INVALID_VALUE; + } + + /* 为了减少数据重复拷贝,此处只整合一次,不再剥数据头,用偏移来取数据段 */ + if (buffer.size() > 1) { + std::vector *p = new std::vector(totalLen); + if (p == nullptr) { + LOG_ERROR(DebuggerErrno::ERROR_NO_MEMORY, "Acl dump data processor(" + dumpPath + "): Alloc failed(" + + std::to_string(totalLen) + " bytes)."); + return DebuggerErrno::ERROR_NO_MEMORY; + } + + size_t offset = 0; + uint8_t* msg = p->data(); + while (!buffer.empty()) { + if (memcpy(msg + offset, buffer.front()->data(), buffer.front()->size()) == nullptr) { + delete p; + LOG_ERROR(DebuggerErrno::ERROR_SYSCALL_FAILED, "Acl dump data processor(" + dumpPath + + "): Failed to copy data."); + return DebuggerErrno::ERROR_SYSCALL_FAILED; + } + offset += buffer.front()->size(); + delete buffer.front(); + buffer.pop(); + } + buffer.push(p); + } + + if (FileUtils::GetFileSuffix(dumpPath) == CSV_SUFFIX) { + dataSegOffset = 0; + dataSegLen = totalLen; + return DebuggerErrno::OK; + } + + headerSegOffset = sizeof(uint64_t); + if (totalLen < headerSegOffset) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_FORMAT, "Acl dump data processor(" + dumpPath + + "): Invalid data length " + std::to_string(totalLen) + "."); + return DebuggerErrno::ERROR_INVALID_FORMAT; + } + + headerSegLen = *(reinterpret_cast(buffer.front()->data())); + if (totalLen < headerSegOffset + headerSegLen) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_FORMAT, "Acl dump data processor(" + dumpPath + + "): Invalid header len " + std::to_string(headerSegLen) + "/" + std::to_string(totalLen) + "."); + return DebuggerErrno::ERROR_INVALID_FORMAT; + } + + dataSegOffset = headerSegOffset + headerSegLen; + dataSegLen = totalLen - dataSegOffset; + return DebuggerErrno::OK; +} + +static nlohmann::json ParseOverflowInfo(const uint8_t* data) +{ + DEBUG_FUNC_TRACE(); + uint32_t index = 0; + nlohmann::json overflowInfo; + uint64_t modelId = DataUtils::UnpackUint64Value_Le(data); + index += kUint64Size; + uint64_t streamId = DataUtils::UnpackUint64Value_Le(data + index); + index += kUint64Size; + uint64_t taskId = DataUtils::UnpackUint64Value_Le(data + index); + index += kUint64Size; + uint64_t taskType = DataUtils::UnpackUint64Value_Le(data + index); + index += kUint64Size; + uint64_t pcStart = DataUtils::UnpackUint64Value_Le(data + index); + index += kUint64Size; + uint64_t paraBase = DataUtils::UnpackUint64Value_Le(data + index); + + overflowInfo["model_id"] = modelId; + overflowInfo["stream_id"] = streamId; + overflowInfo["task_id"] = taskId; + overflowInfo["task_type"] = taskType; + overflowInfo["pc_start"] = DataUtils::U64ToHexString(pcStart); + overflowInfo["para_base"] = DataUtils::U64ToHexString(paraBase); + return overflowInfo; +} + +static DebuggerErrno DumpOpDebugDataToDisk(const std::string& dumpPath, AclDumpMsg::DumpData& dumpData, + const uint8_t* data, size_t dataLen) +{ + DEBUG_FUNC_TRACE(); + std::string outPath = dumpPath + ".output."; + uint32_t num = dumpData.output().size(); + for (uint32_t slot = 0; slot < num; slot++) { + uint32_t offset = 0; + // parse DHA Atomic Add info + nlohmann::json dhaAtomicAddInfo = ParseOverflowInfo(data + offset); + offset += kDhaAtomicAddInfoSize; + // parse L2 Atomic Add info + nlohmann::json l2AtomicAddInfo = ParseOverflowInfo(data + offset); + offset += kL2AtomicAddInfoSize; + // parse AICore info + nlohmann::json aiCoreInfo = ParseOverflowInfo(data + offset); + offset += kAiCoreInfoSize; + // parse DHA Atomic Add status + dhaAtomicAddInfo["status"] = DataUtils::UnpackUint64Value_Le(data + offset); + offset += kDhaAtomicAddStatusSize; + // parse L2 Atomic Add status + l2AtomicAddInfo["status"] = DataUtils::UnpackUint64Value_Le(data + offset); + offset += kL2AtomicAddStatusSize; + // parse AICore status + uint64_t kernelCode = DataUtils::UnpackUint64Value_Le(data + offset); + offset += kUint64Size; + uint64_t blockIdx = DataUtils::UnpackUint64Value_Le(data + offset); + offset += kUint64Size; + uint64_t status = DataUtils::UnpackUint64Value_Le(data + offset); + aiCoreInfo["kernel_code"] = DataUtils::U64ToHexString(kernelCode); + aiCoreInfo["block_idx"] = blockIdx; + aiCoreInfo["status"] = status; + + nlohmann::json opdebugData; + opdebugData["DHA Atomic Add"] = dhaAtomicAddInfo; + opdebugData["L2 Atomic Add"] = l2AtomicAddInfo; + opdebugData["AI Core"] = aiCoreInfo; + + // save json to file + std::string filePath = outPath + std::to_string(slot) + "." + JSON_SUFFIX; + DebuggerErrno ret = FileOperation::DumpJson(filePath, opdebugData); + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "Failed to dump data to " + filePath + "."); + return ret; + } + } + return DebuggerErrno::OK; +} + +static DebuggerErrno ConvertFormatDeviceToHost(AclTensorInfo& tensor) +{ + DEBUG_FUNC_TRACE(); + if (tensor.deviceFmt == tensor.hostFmt || AclTensor::SizeOfTensor(tensor) == 0) { + LOG_DEBUG(tensor + ": No need to convert format."); + return DebuggerErrno::OK; + } + + DebuggerErrno ret = AclTensor::TransFormatD2H(tensor); + if (ret == DebuggerErrno::ERROR_UNKNOWN_TRANS) { + tensor.hostFmt = tensor.deviceFmt; + LOG_WARNING(DebuggerErrno::ERROR_INVALID_FORMAT, "Do not support convert format from " + + std::to_string(tensor.deviceFmt) + " to " + std::to_string(tensor.hostFmt) + "."); + return DebuggerErrno::OK; + } + + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, tensor + ": Failed to convert format."); + return ret; + } + + LOG_DEBUG(tensor + ": Convert format successfully."); + return DebuggerErrno::OK; +} + +static std::string MappingFilePath(const std::string& originPath) +{ + /* adump一次最多传10个tensor数据,输入输出数超过10的算子会分包,但是时序上是连续的,此处缓存上一次的映射 */ + static std::string lastOriName; + static std::string lastMappingPath; + + if (lastOriName == originPath && !lastMappingPath.empty()) { + return lastMappingPath; + } + + std::string dir = FileUtils::GetParentDir(originPath); + std::string suffix = FileUtils::GetFileSuffix(originPath); + std::string mappingName; + uint32_t retry = 10; + constexpr uint32_t randFileNameLen = 32; + do { + mappingName = MathUtils::RandomString(randFileNameLen, '0', '9'); + if (!suffix.empty()) { + mappingName.append(".").append(suffix); + } + if (!FileUtils::IsPathExist(dir + "/" + mappingName)) { + break; + } + } while (--retry); + + if (retry == 0) { + LOG_ERROR(DebuggerErrno::ERROR, "Failed to map path " + originPath + "."); + return std::string(); + } + + DebuggerErrno ret; + FileUtils::CreateDir(dir); + std::ofstream ofs; + constexpr const char* mapFileName = "mapping.csv"; + + ret = FileUtils::OpenFile(dir + "/" + mapFileName, ofs, std::ofstream::app); + if (ret != DebuggerErrno::OK) { + LOG_ERROR(DebuggerErrno::ERROR, "Failed to open mapping file " + dir + "/" + mapFileName + "."); + return std::string(); + } + + ofs << mappingName << "," << FileUtils::GetFileName(originPath) << "\n"; + if (ofs.fail()) { + LOG_ERROR(DebuggerErrno::ERROR_FAILED_TO_WRITE_FILE, "Failed to write file " + dir + "/" + mapFileName + "."); + ofs.close(); + return std::string(); + } + ofs.close(); + lastOriName = originPath; + lastMappingPath = dir + "/" + mappingName; + return lastMappingPath; +} + +static DebuggerErrno StandardizedDumpPath(std::string& originPath) +{ + std::string filename = FileUtils::GetFileName(originPath); + if (filename.length() <= FileUtils::FILE_NAME_MAX) { + return DebuggerErrno::OK; + } + + std::string mappingPath = MappingFilePath(originPath); + if (mappingPath.empty()) { + LOG_ERROR(DebuggerErrno::ERROR, "Failed to open mapping file " + originPath + "."); + return DebuggerErrno::ERROR; + } + + originPath = std::move(mappingPath); + return DebuggerErrno::OK; +} + +static std::string GenDataPath(const std::string& path) { + LOG_DEBUG("Original acl data path is " + path); + std::string outputPath = DebuggerConfig::GetInstance().GetOutputPath(); + std::string dataPath; + if (path.compare(0, outputPath.length(), outputPath) != 0) { + return path; + } + dataPath = path.substr(outputPath.length()); + const std::vector items = FileUtils::SplitPath(dataPath); + constexpr const size_t expectSegLen = 9; + constexpr const size_t rankIdPos = 0; + constexpr const size_t timeStampPos = 1; + constexpr const size_t stepIdPos = 2; + constexpr const size_t dataNamePos = 8; + + if (items.size() >= expectSegLen) { + dataPath = outputPath; + if (dataPath.at(dataPath.length() - 1) != '/') { + dataPath.append("/"); + } + /* + * ACL 接口返回数据的路径格式如下 + * {dump_path}/rank_{rank_id}/{time stamp}/step_{step_id}/{time}/{device_id}/{model_name}/{model_id}/{iteration_id}/{data name} + * items[0] 表示 rank_{rank_id} + * items[1] 表示 {time stamp} + * items[2] 表示 step_{step_id} + * items[8] 表示 {data name} + */ + dataPath.append(items[rankIdPos] + "/"); + dataPath.append(items[timeStampPos] + "/"); + dataPath.append(items[stepIdPos] + "/"); + dataPath.append(items[dataNamePos]); + return dataPath; + } + return path; +} + +inline std::string GetTensorInfoSuffix(AclTensorInfo& tensor) +{ + return "." + tensor.inout + "." + std::to_string(tensor.slot) + + "." + DataUtils::GetFormatString(tensor.hostFmt) + "." + DataUtils::GetDTypeString(tensor.dtype); +} + +static DebuggerErrno DumpOneAclTensorFmtBin(AclTensorInfo& tensor) +{ + DebuggerErrno ret; + std::string dumpPathSlot = tensor.dumpPath + GetTensorInfoSuffix(tensor); + if (StandardizedDumpPath(dumpPathSlot) != DebuggerErrno::OK) { + LOG_ERROR(DebuggerErrno::ERROR, "Failed to standardize path " + dumpPathSlot + "."); + return DebuggerErrno::ERROR; + } + + std::ofstream ofs; + ret = FileUtils::OpenFile(dumpPathSlot, ofs, std::ios::out | std::ios::binary); + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "Failed to open file " + dumpPathSlot + "."); + return ret; + } + + ofs.write(reinterpret_cast(tensor.aclData), tensor.dataSize); + if (ofs.fail()) { + LOG_ERROR(DebuggerErrno::ERROR_FAILED_TO_WRITE_FILE, "Failed to write file " + dumpPathSlot + "."); + ret = DebuggerErrno::ERROR_FAILED_TO_WRITE_FILE; + } + ofs.close(); + return ret; +} + + +static DebuggerErrno DumpOneAclTensorFmtNpy(AclTensorInfo& tensor) +{ + DEBUG_FUNC_TRACE(); + DebuggerErrno ret; + if (tensor.dataSize == 0) { + LOG_INFO(tensor + ": Data size is 0. No need to dump."); + return DebuggerErrno::OK; + } + + if (tensor.dtype == AclDtype::DT_BF16) { + ret = AclTensor::TransDtype(tensor, AclDtype::DT_FLOAT); + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, tensor + ": Failed to transform dtype from bf16 to fp32."); + return ret; + } + } + + // dump_path: dump_dir/op_type.op_name.task_id.stream_id.timestamp + std::string dumpPathSlot = tensor.dumpPath + GetTensorInfoSuffix(tensor) + "." + NPY_SUFFIX; + + if (StandardizedDumpPath(dumpPathSlot) != DebuggerErrno::OK) { + LOG_ERROR(DebuggerErrno::ERROR, "Failed to standardize path " + dumpPathSlot + "."); + return DebuggerErrno::ERROR; + } + + if (tensor.transBuf.empty()) { + ret = FileOperation::DumpNpy(dumpPathSlot, tensor.aclData, tensor.dataSize, tensor.dtype, tensor.hostShape); + } else { + ret = FileOperation::DumpNpy(dumpPathSlot, tensor.transBuf.data(), tensor.transBuf.size(), tensor.dtype, + tensor.hostShape); + } + + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, tensor + ": Failed to dump as npy."); + return ret; + } + + LOG_DEBUG(tensor + ": dump successfully."); + + return ret; +} + +static DebuggerErrno WriteOneTensorStatToDisk(const AclTensorInfo& tensor, const AclTensorStats& stat) +{ + DEBUG_FUNC_TRACE(); + static constexpr auto csvHeaderComm = "Input/Output,Index,Data Size,Data Type,Format,Shape"; + std::string dumpPath = tensor.dumpPath; + std::string csvHeader; + std::ofstream ofs; + DebuggerErrno ret; + + if (FileUtils::GetFileSuffix(dumpPath) != CSV_SUFFIX) { + dumpPath.append(".").append(CSV_SUFFIX); + } + + if (StandardizedDumpPath(dumpPath) != DebuggerErrno::OK) { + LOG_ERROR(DebuggerErrno::ERROR, "Failed to standardize path " + dumpPath + "."); + return DebuggerErrno::ERROR; + } + + if (FileUtils::IsPathExist(dumpPath)) { + if (!FileUtils::IsRegularFile(dumpPath)) { + LOG_ERROR(DebuggerErrno::ERROR_ILLEGAL_FILE_TYPE, dumpPath + " exists and is not a regular file."); + return DebuggerErrno::ERROR_ILLEGAL_FILE_TYPE; + } + ret = FileUtils::OpenFile(dumpPath, ofs, std::ofstream::app); + } else { + csvHeader = csvHeaderComm; + csvHeader.append(","); + csvHeader.append(stat.GetCsvHeader()); + ret = FileUtils::OpenFile(dumpPath, ofs); + } + + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, tensor + ": Failed to open file " + dumpPath + "."); + return ret; + } + + /* map会根据键值自动排序,此处可以保障头和值的顺序,直接追加写即可 */ + if (!csvHeader.empty()) { + ofs << csvHeader << '\n'; + } + + ofs << tensor.inout << ','; + ofs << tensor.slot << ','; + ofs << tensor.dataSize << ','; + ofs << DataUtils::GetDTypeString(tensor.dtype) << ','; + ofs << DataUtils::GetFormatString(tensor.hostFmt) << ','; + ofs << DataUtils::GetShapeString(tensor.hostShape) << ','; + ofs << stat.GetCsvValue() << '\n'; + + if (ofs.fail()) { + LOG_ERROR(DebuggerErrno::ERROR_FAILED_TO_WRITE_FILE, tensor + ": Failed to write file " + dumpPath + "."); + ret = DebuggerErrno::ERROR_FAILED_TO_WRITE_FILE; + } + ofs.close(); + return ret; +} + +static AclTensorStats CalTensorSummary(AclTensorInfo& tensor, std::vector& opt) +{ + DEBUG_FUNC_TRACE(); + AclTensorStats stat; + if (ELE_IN_VECTOR(opt, DebuggerSummaryOption::MD5)) { + const uint8_t* data = tensor.transBuf.empty() ? tensor.aclData : tensor.transBuf.data(); + stat[DebuggerSummaryOption::MD5] = MathUtils::CalculateMD5(data, tensor.dataSize); + } + return stat; +} + +static DebuggerErrno DumpOneAclTensor(AclTensorInfo& tensor, std::vector& opt) +{ + DEBUG_FUNC_TRACE(); + if (tensor.dumpOriginData || !FileOperation::IsDtypeSupportByNpy(tensor.dtype)) { + return DumpOneAclTensorFmtBin(tensor); + } + + DebuggerErrno ret = ConvertFormatDeviceToHost(tensor); + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, tensor + ": Failed to convert format to host."); + return ret; + } + + if (!opt.empty()) { + AclTensorStats stat = CalTensorSummary(tensor, opt); + return WriteOneTensorStatToDisk(tensor, stat); + } + + return DumpOneAclTensorFmtNpy(tensor); +} + +static void DumpAclTensor(std::vector::iterator begin, std::vector::iterator end, + std::vector opt) +{ + DEBUG_FUNC_TRACE(); + DebuggerErrno ret = DebuggerErrno::OK; + for (auto it = begin; it != end; it++) { + ret = DumpOneAclTensor(*it, opt); + if (ret != DebuggerErrno::OK) { + LOG_WARNING(ret, *it + ": Failed to dump to disk."); + break; + } + } + return; +} + +static DebuggerErrno DumpTensorDataToDisk(const std::string& dumpPath, AclDumpMsg::DumpData& dumpData, + const uint8_t* data, size_t dataLen, std::vector& opt) +{ + DEBUG_FUNC_TRACE(); + std::vector aclTensorInfos; + uint64_t offset = 0; + uint32_t slot = 0; + for (auto& tensor : dumpData.input()) { + aclTensorInfos.push_back(AclTensor::ParseAttrsFromDumpData(dumpPath, data + offset, tensor, "input", slot)); + offset += tensor.size(); + slot++; + } + + slot = 0; + for (auto& tensor : dumpData.output()) { + aclTensorInfos.push_back(AclTensor::ParseAttrsFromDumpData(dumpPath, data + offset, tensor, "output", slot)); + offset += tensor.size(); + slot++; + } + + if (aclTensorInfos.empty()) { + return DebuggerErrno::OK; + } + + if (offset > dataLen) { + LOG_ERROR(DebuggerErrno::ERROR_VALUE_OVERFLOW, dumpPath + ": offset overflow " + std::to_string(offset) + "/" + + std::to_string(dataLen) + "."); + return DebuggerErrno::ERROR_VALUE_OVERFLOW; + } + + /* 根据tensor的数据量,1MB以下串行,1MB以上多线程并发,最大并发量为 最大线程数/4 */ + constexpr int kMaxTensorSize = 1024 * 1024; + if (offset < kMaxTensorSize) { + DumpAclTensor(aclTensorInfos.begin(), aclTensorInfos.end(), opt); + } else { + size_t concurrent = std::max(1, std::thread::hardware_concurrency() / 4); + concurrent = std::min(concurrent, aclTensorInfos.size()); + size_t total = aclTensorInfos.size(); + size_t batch = MathUtils::DivCeil(total, concurrent); + size_t cur = 0; + std::vector threads; + std::vector::iterator begin = aclTensorInfos.begin(); + + threads.reserve(concurrent); + while (cur < total) { + threads.emplace_back(std::thread(&DumpAclTensor, begin + cur, begin + std::min(total, cur + batch), opt)); + cur += batch; + } + + for (auto& t : threads) { + if (t.joinable()) { + t.join(); + } + } + } + + DebuggerErrLevel err = ErrorInfosManager::GetTopErrLevelInDuration(); + return err >= DebuggerErrLevel::LEVEL_ERROR ? DebuggerErrno::ERROR : DebuggerErrno::OK; +} + +static DebuggerErrno DumpStatsDataToDisk(const std::string& dumpPath, const uint8_t* data, size_t dataLen) +{ + DEBUG_FUNC_TRACE(); + std::ofstream ofs; + DebuggerErrno ret; + + std::string path = dumpPath; + if (StandardizedDumpPath(path) != DebuggerErrno::OK) { + LOG_ERROR(DebuggerErrno::ERROR, "Failed to standardize path " + path + "."); + return DebuggerErrno::ERROR; + } + + if (FileUtils::IsPathExist(path)) { + if (!FileUtils::IsRegularFile(path)) { + LOG_ERROR(DebuggerErrno::ERROR_ILLEGAL_FILE_TYPE, path + " exists and is not a regular file."); + return DebuggerErrno::ERROR_ILLEGAL_FILE_TYPE; + } + ret = FileUtils::OpenFile(path, ofs, std::ofstream::app); + } else { + ret = FileUtils::OpenFile(path, ofs); + } + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "Failed to open file " + path + "."); + return ret; + } + + /* 统计量模式adump返回的数据就是csv格式的字符流,直接落盘即可 */ + ofs.write(reinterpret_cast(data), dataLen); + if (ofs.fail()) { + LOG_ERROR(DebuggerErrno::ERROR_FAILED_TO_WRITE_FILE, "Failed to write file " + path + "."); + ret = DebuggerErrno::ERROR_FAILED_TO_WRITE_FILE; + } + ofs.close(); + return ret; +} + +DebuggerErrno AclDumpDataProcessor::DumpToDisk() +{ + DEBUG_FUNC_TRACE(); + if (!completed) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_OPERATION, ToString() + ": Data is incomplete."); + return DebuggerErrno::ERROR_INVALID_OPERATION; + } + + uint8_t* msg = buffer.front()->data(); + AclDumpMsg::DumpData dumpData; + if (headerSegLen > 0) { + if (!dumpData.ParseFromArray(msg + headerSegOffset, headerSegLen)) { + LOG_ERROR(DebuggerErrno::ERROR_INVALID_FORMAT, ToString() + ": Failed to parse header."); + return DebuggerErrno::ERROR_INVALID_FORMAT; + } + } + + const std::string dataPath = GenDataPath(dumpPath); + DebuggerErrno ret; + if (FileUtils::GetFileName(dumpPath).find(debugFileSign) == 0 && + DebuggerConfig::GetInstance().GetOverflowCheckCfg() != nullptr) { + ret = DumpOpDebugDataToDisk(dataPath, dumpData, msg + dataSegOffset, dataSegLen); + } else if (DebuggerConfig::GetInstance().GetStatisticsCfg() != nullptr && + hostAnalysisOpts.empty()) { + ret = DumpStatsDataToDisk(dataPath, msg + dataSegOffset, dataSegLen); + } else { + ret = DumpTensorDataToDisk(dataPath, dumpData, msg + dataSegOffset, dataSegLen, hostAnalysisOpts); + } + + if (ret != DebuggerErrno::OK) { + LOG_ERROR(DebuggerErrno::ERROR_OPERATION_FAILED, ToString() + ": Failed to dump to disk."); + } + + return ret; +} + +} diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.hpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4ce2ab6e8c8709437791aba9699ec76184cb6761 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumpDataProcessor.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include "include/ErrorCode.hpp" +#include "base/DebuggerConfig.hpp" +#include "third_party/ACL/AclApi.hpp" + +namespace MindStudioDebugger { + +constexpr size_t kMaxDataLen = 4ULL * 1024 * 1024 * 1024; + +class AclDumpDataProcessor { +public: + AclDumpDataProcessor(const std::string& path, const std::vector& opts) : + dumpPath{path}, hostAnalysisOpts{opts} {}; + ~AclDumpDataProcessor(); + + bool IsCompleted() const {return completed;} + bool ErrorOccurred() const {return errorOccurred;} + DebuggerErrno PushData(const acldumpChunk *chunk); + DebuggerErrno DumpToDisk(); + std::string ToString() const; + +private: + DebuggerErrno ConcatenateData(); + + std::string dumpPath; + bool completed{false}; + bool errorOccurred{false}; + size_t totalLen{0}; + size_t headerSegOffset{0}; + size_t headerSegLen{0}; + size_t dataSegOffset{0}; + size_t dataSegLen{0}; + std::queue*> buffer; + std::vector hostAnalysisOpts; +}; + +} + diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5729153371a11613f962926b924f8ff2f1a074b3 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.cpp @@ -0,0 +1,472 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include "include/Macro.hpp" +#include "utils/FileUtils.hpp" +#include "utils/FileOperation.hpp" +#include "third_party/ACL/AclApi.hpp" +#include "base/Environment.hpp" +#include "base/ErrorInfos.hpp" +#include "AclDumper.hpp" + +namespace MindStudioDebugger { + +constexpr const char* kAclDumpScene = "dump_scene"; +constexpr const char* kSceneNormal = "normal"; +constexpr const char* kSceneException ="lite_exception"; + +constexpr const char* kAclDumpPath = "dump_path"; +constexpr const char* kAclDumpStep = "dump_step"; + +constexpr const char* kAclDumpList = "dump_list"; +constexpr const char* kAclDumpLayer = "layer"; +constexpr const char* kAclDumpModel = "model_name"; + +constexpr const char* kAclDumpMode = "dump_mode"; +constexpr const char* kAclModeInput = "input"; +constexpr const char* kAclModeOutput = "output"; +constexpr const char* kAclModeAll = "all"; + +constexpr const char* kAclDumpOpSwitch = "dump_op_switch"; +constexpr const char* kAclDumpDebug = "dump_debug"; +constexpr const char* kAclSwitchOn = "on"; +constexpr const char* kAclSwitchOff = "off"; + +constexpr const char* kAclDumpData = "dump_data"; +constexpr const char* kAclDumpTensor = "tensor"; +constexpr const char* kAclDumpStats = "stats"; + +constexpr const char* kAclDumpStatsOpt = "dump_stats"; +constexpr const char* kAclDumpStatsMax = "Max"; +constexpr const char* kAclDumpStatsMin = "Min"; +constexpr const char* kAclDumpStatsAvg = "Avg"; +constexpr const char* kAclDumpStatsNorn = "L2norm"; +constexpr const char* kAclDumpStatsNan = "Nan"; +constexpr const char* kAclDumpStatsNegInf = "Negative Inf"; +constexpr const char* kAclDumpStatsPosInf = "Positive Inf"; + +constexpr const size_t kProcessorNumMax = 100; + +inline std::string GenAclJsonPath(const std::string& dumpPath, uint32_t rank) +{ + return std::move(dumpPath + "/acl_dump_" + std::to_string(rank) + "." + JSON_SUFFIX); +} + +/* 这里几个转换函数,映射和DebuggerConfigFieldMap类似,但是此处是对接ACL规则的,本质上不是一回事,因此单写一套 */ +static std::string GenDumpInoutString(DebuggerDataInOut mode) +{ + static std::map dumpModeMap = { + {DebuggerDataInOut::INOUT_INPUT, kAclModeInput}, + {DebuggerDataInOut::INOUT_OUTPUT, kAclModeOutput}, + {DebuggerDataInOut::INOUT_BOTH, kAclModeAll}, + }; + + auto it = dumpModeMap.find(mode); + if (it == dumpModeMap.end()) { + return kAclModeAll; + } else { + return it->second; + } +} + +static std::vector GenStatsOptions(const std::vector& options) +{ + static std::map summaryOptMap = { + {DebuggerSummaryOption::MAX, kAclDumpStatsMax}, + {DebuggerSummaryOption::MIN, kAclDumpStatsMin}, + {DebuggerSummaryOption::MEAN, kAclDumpStatsAvg}, + {DebuggerSummaryOption::L2NORM, kAclDumpStatsNorn}, + {DebuggerSummaryOption::NAN_CNT, kAclDumpStatsNan}, + {DebuggerSummaryOption::NEG_INF_CNT, kAclDumpStatsNegInf}, + {DebuggerSummaryOption::POS_INF_CNT, kAclDumpStatsPosInf}, + }; + + std::vector output; + for (auto& ele : options) { + auto it = summaryOptMap.find(ele); + if (it != summaryOptMap.end()) { + output.emplace_back(it->second); + } + } + return output; +} + +static std::string GenDumpPath(const std::string& path) +{ + std::string timestamp; + std::string dumpPath; + + time_t pTime; + time (&pTime); + char cTime[15]; + strftime(cTime, sizeof(cTime), "%Y%m%d%H%M%S", localtime(&pTime)); + timestamp = cTime; + + int32_t rankId = Environment::GetRankID(); + if (rankId < 0) { + rankId = 0; + } + + dumpPath = path + "/rank_" + std::to_string(rankId) + "/" + timestamp; + return dumpPath; +} + +bool AclDumper::IsIterNeedDump(uint32_t iterId) +{ + const DebuggerConfig& cfg = DebuggerConfig::GetInstance(); + if (!cfg.IsCfgLoaded()) { + return false; + } + + return cfg.IsStepHits(iterId); +} + +bool AclDumper::IsCfgEnableAclDumper() +{ + DebuggerConfig& cfg = DebuggerConfig::GetInstance(); + if (!cfg.IsCfgLoaded() || cfg.GetDebugLevel() != DebuggerLevel::L2) { + return false; + } + const std::vector& tasks = cfg.GetTaskList(); + return (ELE_IN_VECTOR(tasks, DebuggerTaskType::TASK_DUMP_TENSOR) || + ELE_IN_VECTOR(tasks, DebuggerTaskType::TASK_DUMP_STATISTICS) || + ELE_IN_VECTOR(tasks, DebuggerTaskType::TASK_OVERFLOW_CHECK)); +} + +std::string AclDumper::GetDumpPath(uint32_t curStep) const +{ + if (!initialized || foreDumpPath.empty()) { + return ""; + } + return foreDumpPath + "/step_" + std::to_string(curStep); +} + +DebuggerErrno AclDumper::AclDumpGenTensorJson(std::shared_ptr dumpTensorCfg, uint32_t rank, + uint32_t curStep, const char** kernels) +{ + DEBUG_FUNC_TRACE(); + nlohmann::json aclDumpJson; + bool needDump = AclDumper::IsIterNeedDump(curStep); + const std::string& dumpPath = DebuggerConfig::GetInstance().GetOutputPath(); + std::string fullDumpPath; + if (needDump) { + fullDumpPath = GetDumpPath(curStep); + FileUtils::CreateDir(fullDumpPath, true); + } else { + fullDumpPath = dumpPath; + } + + aclDumpJson[kAclDumpPath] = fullDumpPath; + aclDumpJson[kAclDumpMode] = GenDumpInoutString(dumpTensorCfg->inout); + aclDumpJson[kAclDumpData] = kAclDumpTensor; + aclDumpJson[kAclDumpList] = nlohmann::json::array(); + aclDumpJson[kAclDumpOpSwitch] = kAclSwitchOn; + + if (!needDump) { + /* 这里沿用mindspore框架的方案,用一个大数0x7FFFFFFF表示不需要dump;这个方案非常奇怪,后续可以看下能否优化 */ + aclDumpJson[kAclDumpStep] = std::to_string(INT_MAX); + } else { + std::vector kernelsList = dumpTensorCfg->matcher.GenRealKernelList(kernels); + if (!kernelsList.empty()) { + aclDumpJson[kAclDumpList].push_back({{kAclDumpLayer, kernelsList}}); + } + } + + nlohmann::json content = {{"dump", aclDumpJson}}; + LOG_DEBUG("AclDumpGenTensorJson dump json to " + GenAclJsonPath(dumpPath, rank)); + return FileOperation::DumpJson(GenAclJsonPath(dumpPath, rank), content); +} + +DebuggerErrno AclDumper::AclDumpGenStatJson(std::shared_ptr statisticsCfg, uint32_t rank, + uint32_t curStep, const char** kernels) +{ + DEBUG_FUNC_TRACE(); + nlohmann::json aclDumpJson; + bool needDump = AclDumper::IsIterNeedDump(curStep); + const std::string& dumpPath = DebuggerConfig::GetInstance().GetOutputPath(); + std::string fullDumpPath; + if (needDump) { + fullDumpPath = GetDumpPath(curStep); + FileUtils::CreateDir(fullDumpPath, true); + } else { + fullDumpPath = dumpPath; + } + + aclDumpJson[kAclDumpPath] = fullDumpPath; + aclDumpJson[kAclDumpMode] = GenDumpInoutString(statisticsCfg->inout); + aclDumpJson[kAclDumpList] = nlohmann::json::array(); + aclDumpJson[kAclDumpOpSwitch] = kAclSwitchOn; + + /* 如果需要host侧分析,下给acl的任务还是dump tensor,然后在host侧转成统计量 */ + if (!hostAnalysisOpt.empty()) { + aclDumpJson[kAclDumpData] = kAclDumpTensor; + } else { + aclDumpJson[kAclDumpData] = kAclDumpStats; + aclDumpJson[kAclDumpStatsOpt] = GenStatsOptions(statisticsCfg->summaryOption); + } + + if (!needDump) { + aclDumpJson[kAclDumpStep] = std::to_string(INT_MAX); + } else { + std::vector kernelsList = statisticsCfg->matcher.GenRealKernelList(kernels); + if (!kernelsList.empty()){ + aclDumpJson[kAclDumpList].push_back({{kAclDumpLayer, kernelsList}}); + } + } + + nlohmann::json content = {{"dump", aclDumpJson}}; + LOG_DEBUG("AclDumpGenStatJson dump json to " + GenAclJsonPath(dumpPath, rank)); + return FileOperation::DumpJson(GenAclJsonPath(dumpPath, rank), content); +} + +DebuggerErrno AclDumper::AclDumpGenOverflowJson(std::shared_ptr overflowCfg, uint32_t rank, + uint32_t curStep) +{ + DEBUG_FUNC_TRACE(); + nlohmann::json aclDumpJson; + bool needDump = AclDumper::IsIterNeedDump(curStep); + const std::string& dumpPath = DebuggerConfig::GetInstance().GetOutputPath(); + std::string fullDumpPath; + if (needDump) { + fullDumpPath = GetDumpPath(curStep); + FileUtils::CreateDir(fullDumpPath, true); + } else { + fullDumpPath = dumpPath; + } + + DebuggerErrno ret = FileUtils::CreateDir(fullDumpPath, true); + if (ret != DebuggerErrno::OK) { + return ret; + } + + aclDumpJson[kAclDumpPath] = fullDumpPath; + aclDumpJson[kAclDumpDebug] = kAclSwitchOn; + if (!needDump) { + aclDumpJson[kAclDumpStep] = std::to_string(INT_MAX); + } + nlohmann::json content = {{"dump", aclDumpJson}}; + LOG_DEBUG("AclDumpGenOverflowJson dump json to " + GenAclJsonPath(dumpPath, rank)); + return FileOperation::DumpJson(GenAclJsonPath(dumpPath, rank), content); +} + +static DebuggerErrno InitAcl() +{ + DEBUG_FUNC_TRACE(); + nlohmann::json aclInitJson; + std::string aclInitJsonPath = FileUtils::GetAbsPath("./aclinit.json"); + if (aclInitJsonPath.empty()) { + LOG_ERROR(DebuggerErrno::ERROR_CANNOT_PARSE_PATH, "Failed to get full path of aclinit.json."); + return DebuggerErrno::ERROR_CANNOT_PARSE_PATH; + } + + constexpr const char* AclErrMsgOn = "1"; + aclInitJson["err_msg_mode"] = AclErrMsgOn; + LOG_DEBUG("InitAcl dump json to " + aclInitJsonPath); + FileOperation::DumpJson(aclInitJsonPath, aclInitJson); + aclError ret; + try { + ret = CALL_ACL_API(aclInit, aclInitJsonPath.c_str()); + } catch (const std::runtime_error& e) { + LOG_ERROR(DebuggerErrno::ERROR_DEPENDENCY_NOT_FIND, "Cannot find function aclInit."); + return DebuggerErrno::ERROR_DEPENDENCY_NOT_FIND; + } + + /* 此处框架可能会初始化,如果报重复初始化错误,忽略即可 */ + if (ret != ACL_SUCCESS && ret != ACL_ERROR_REPEAT_INITIALIZE) { + LOG_ERROR(DebuggerErrno::ERROR_EXTERNAL_API_ERROR, "Failed to init acl(" + std::to_string(ret) + ")."); + return DebuggerErrno::ERROR_EXTERNAL_API_ERROR; + } + + LOG_DEBUG("InitAcl succeed"); + return DebuggerErrno::OK; +} + +int32_t AclDumpCallBack(const acldumpChunk* chunk, int32_t len) +{ + AclDumper& dumper = AclDumper::GetInstance(); + dumper.OnAclDumpCallBack(chunk, len); + return 0; +} + +DebuggerErrno AclDumper::Initialize() +{ + DEBUG_FUNC_TRACE(); + DebuggerErrno ret; + aclError aclRet; + const DebuggerConfig& cfg = DebuggerConfig::GetInstance(); + std::shared_ptr statsCfg = cfg.GetStatisticsCfg(); + std::shared_ptr tensorCfg = cfg.GetDumpTensorCfg(); + std::shared_ptr overflowCheckCfg = cfg.GetOverflowCheckCfg(); + + ret = InitAcl(); + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "Failed to call InitAcl."); + return ret; + } + + foreDumpPath = GenDumpPath(cfg.GetOutputPath()); + + bool needCallback = false; + if (statsCfg != nullptr) { + if (ELE_IN_VECTOR(statsCfg->summaryOption, DebuggerSummaryOption::MD5)) { + hostAnalysisOpt = {DebuggerSummaryOption::MD5}; + } + needCallback = true; + } + + if (tensorCfg != nullptr && tensorCfg->fileFormat == DebuggerDumpFileFormat::FILE_FORMAT_NPY) { + needCallback = true; + } + + if (overflowCheckCfg != nullptr) { + needCallback = true; + } + + if (needCallback) { + LOG_DEBUG("Register acl dump callback."); + /* 上面aclInit成功,此处认为acldumpRegCallback符号也存在,不会抛出异常 */ + aclRet = CALL_ACL_API(acldumpRegCallback, AclDumpCallBack, 0); + if (aclRet != ACL_SUCCESS) { + LOG_ERROR(DebuggerErrno::ERROR_EXTERNAL_API_ERROR, + "Failed to register acldump callback(" + std::to_string(aclRet) + ")."); + return DebuggerErrno::ERROR_EXTERNAL_API_ERROR; + } + } + LOG_DEBUG("AclDumper::Initialize succeed"); + return DebuggerErrno::OK; +} + +void AclDumper::OnAclDumpCallBack(const acldumpChunk* chunk, int32_t len) +{ + DEBUG_FUNC_TRACE(); + std::string dumpPath = FileUtils::GetAbsPath(chunk->fileName); + auto it = dataProcessors.find(dumpPath); + if (it == dataProcessors.end()) { + if (dataProcessors.size() > kProcessorNumMax) { + LOG_ERROR(DebuggerErrno::ERROR_BUFFER_OVERFLOW, "The number of processors has reached the upper limit."); + return; + } + dataProcessors[dumpPath] = std::make_shared(dumpPath, hostAnalysisOpt); + } + + std::shared_ptr processor = dataProcessors[dumpPath]; + DebuggerErrno ret = processor->PushData(chunk); + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "Failed to push data " + dumpPath + "."); + } + + LOG_DEBUG("Acl dump data processor " + dumpPath + " receive data, len=" + + std::to_string(chunk->bufLen)); + + if (!processor->IsCompleted()) { + return; + } + + if (!processor->ErrorOccurred()) { + ret = processor->DumpToDisk(); + } else { + ret = DebuggerErrno::ERROR; + } + + dataProcessors.erase(dumpPath); + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "Failed to write data " + dumpPath + " to disk."); + } + return; +} + +void AclDumper::SetDump(uint32_t rank, uint32_t curStep, ExtArgs& args) +{ + DEBUG_FUNC_TRACE(); + DebuggerErrno ret; + DebuggerConfig& cfg = DebuggerConfig::GetInstance(); + if (aclDumpHasSet || !cfg.IsRankHits(rank) || !IsCfgEnableAclDumper()) { + return; + } + + if (!initialized) { + ret = Initialize(); + if(ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "AclDumper initialization failed."); + return; + } + initialized = true; + } + + /* 和acl dump相关的三个任务 */ + std::shared_ptr dumpTensorCfg = cfg.GetDumpTensorCfg(); + std::shared_ptr statisticsCfg = cfg.GetStatisticsCfg(); + std::shared_ptr overflowCheckCfg = cfg.GetOverflowCheckCfg(); + + /* 当前只能三选一 */ + const char** kernels = GetExtArgs(args, MindStudioExtensionArgs::ALL_KERNEL_NAMES); + if (dumpTensorCfg != nullptr) { + ret = AclDumpGenTensorJson(dumpTensorCfg, rank, curStep, kernels); + } else if (statisticsCfg != nullptr) { + ret = AclDumpGenStatJson(statisticsCfg, rank, curStep, kernels); + } else if (overflowCheckCfg != nullptr) { + ret = AclDumpGenOverflowJson(overflowCheckCfg, rank, curStep); + } + + if (ret != DebuggerErrno::OK) { + LOG_ERROR(ret, "AclDumper failed to generate cfg file."); + return; + } + + aclError aclRet; + aclRet = CALL_ACL_API(aclmdlInitDump); + if (aclRet != ACL_SUCCESS) { + LOG_ERROR(DebuggerErrno::ERROR_EXTERNAL_API_ERROR, + "Failed to init acldump(" + std::to_string(aclRet) + ")."); + return; + } + + const std::string& dumpPath = DebuggerConfig::GetInstance().GetOutputPath(); + aclRet = CALL_ACL_API(aclmdlSetDump, GenAclJsonPath(dumpPath, rank).c_str()); + if (aclRet != ACL_SUCCESS) { + LOG_ERROR(DebuggerErrno::ERROR_EXTERNAL_API_ERROR, + "Failed to enable acldump(" + std::to_string(aclRet) + ")."); + return; + } + + aclDumpHasSet = true; + return; +} + +void AclDumper::FinalizeDump(ExtArgs& args) +{ + DEBUG_FUNC_TRACE(); + if (!aclDumpHasSet) { + return; + } + + aclError aclRet = CALL_ACL_API(aclmdlFinalizeDump); + if (aclRet != ACL_SUCCESS) { + LOG_ERROR(DebuggerErrno::ERROR_EXTERNAL_API_ERROR, + "Failed to finalize acldump(" + std::to_string(aclRet) + ")."); + + } + + aclDumpHasSet = false; +} + + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.hpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ff1a40ae752bfc45ddcf14817d64e3df8d8f83e8 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclDumper.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "include/ExtArgs.hpp" +#include "base/DebuggerConfig.hpp" +#include "AclDumpDataProcessor.hpp" + +namespace MindStudioDebugger { + +class AclDumper { +public: + static AclDumper& GetInstance() { + static AclDumper instance_; + return instance_; + } + + static bool IsIterNeedDump(uint32_t iterId); + static bool IsCfgEnableAclDumper(); + + void SetDump(uint32_t rank, uint32_t curStep, ExtArgs& args); + void FinalizeDump(ExtArgs& args); + void OnAclDumpCallBack(const acldumpChunk* chunk, int32_t len); + + std::string GetDumpPath(uint32_t curStep) const; + +private: + AclDumper() = default; + ~AclDumper() = default; + explicit AclDumper(const AclDumper &obj) = delete; + AclDumper& operator=(const AclDumper &obj) = delete; + explicit AclDumper(AclDumper &&obj) = delete; + AclDumper& operator=(AclDumper &&obj) = delete; + + DebuggerErrno Initialize(); + DebuggerErrno AclDumpGenTensorJson(std::shared_ptr dumpTensorCfg, uint32_t rank, + uint32_t curStep, const char** kernels); + DebuggerErrno AclDumpGenStatJson(std::shared_ptr statisticsCfg, uint32_t rank, + uint32_t curStep, const char** kernels); + DebuggerErrno AclDumpGenOverflowJson(std::shared_ptr overflowCfg, uint32_t rank, + uint32_t curStep); + bool initialized{false}; + bool aclDumpHasSet{false}; + std::string foreDumpPath; + std::vector hostAnalysisOpt; + std::map> dataProcessors; +}; + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclTensor.cpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclTensor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..45adff4962156f87f52c17166bc3b381f07f2978 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclTensor.cpp @@ -0,0 +1,798 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "utils/DataUtils.hpp" +#include "utils/MathUtils.hpp" +#include "base/ErrorInfos.hpp" +#include "AclTensor.hpp" + +namespace MindStudioDebugger { +namespace AclDumpMsg = toolkit::dumpdata; +namespace AclTensor { + +using namespace MathUtils; + +constexpr int64_t kCubeSize = 16; +constexpr int64_t kCube16 = kCubeSize; +constexpr int64_t kCube32 = 32; +constexpr int64_t kCube64 = 64; +constexpr int64_t kCubeSize_C04 = 4; + +constexpr size_t hwH = 1; +constexpr size_t hwW = 2; +constexpr size_t fnzW1 = 4; +constexpr size_t fnzH1 = 3; +constexpr size_t fnzH0 = 2; +constexpr size_t fnzW0 = 1; +constexpr size_t fzN0 = 1; +constexpr size_t fzNi = 2; +constexpr size_t fzC0 = 3; + +using TensorTransFunc = DebuggerErrno (*)(AclTensorInfo &); + +static DebuggerErrno FRAC_Z_TO_NCHW(AclTensorInfo& tensor); +static DebuggerErrno FRAC_NZ_TO_NCHW(AclTensorInfo& tensor); +static DebuggerErrno NC1HWC0_TO_NCHW(AclTensorInfo& tensor); +static DebuggerErrno NDC1HWC0_TO_NCDHW(AclTensorInfo& tensor); +static DebuggerErrno C1HWNCoC0_TO_NCHW(AclTensorInfo& tensor); +static DebuggerErrno NC1HWC0_C04_TO_NCHW(AclTensorInfo& tensor); +static DebuggerErrno FRAC_Z3D_TO_NCDHW(AclTensorInfo& tensor); + +const static std::unordered_set kSupportedDtypes = { + AclDtype::DT_UNDEFINED, + AclDtype::DT_FLOAT, + AclDtype::DT_FLOAT16, + AclDtype::DT_INT8, + AclDtype::DT_UINT8, + AclDtype::DT_INT16, + AclDtype::DT_UINT16, + AclDtype::DT_INT32, + AclDtype::DT_INT64, + AclDtype::DT_UINT32, + AclDtype::DT_UINT64, + AclDtype::DT_BOOL, + AclDtype::DT_DOUBLE, + AclDtype::DT_BF16, + AclDtype::DT_COMPLEX64, + AclDtype::DT_COMPLEX128, +}; + +const static std::unordered_set kSupportedFormat = { + AclFormat::FORMAT_NCHW, + AclFormat::FORMAT_NHWC, + AclFormat::FORMAT_ND, + AclFormat::FORMAT_NC1HWC0, + AclFormat::FORMAT_FRACTAL_Z, + AclFormat::FORMAT_NC1HWC0_C04, + AclFormat::FORMAT_FRACTAL_Z_C04, + AclFormat::FORMAT_NC1KHKWHWC0, + AclFormat::FORMAT_HWCN, + AclFormat::FORMAT_NDHWC, + AclFormat::FORMAT_NCDHW, + AclFormat::FORMAT_DHWCN, + AclFormat::FORMAT_DHWNC, + AclFormat::FORMAT_NDC1HWC0, + AclFormat::FORMAT_FRACTAL_Z_3D, + AclFormat::FORMAT_C1HWNCoC0, + AclFormat::FORMAT_FRACTAL_NZ, + AclFormat::FORMAT_FRACTAL_ZN_LSTM, + AclFormat::FORMAT_NCL, +}; + +const static std::map, TensorTransFunc> formatTransFuncMap = { + /* {{from, to}, function} */ + {{AclFormat::FORMAT_HWCN, AclFormat::FORMAT_NCHW}, nullptr}, + {{AclFormat::FORMAT_NHWC, AclFormat::FORMAT_NCHW}, nullptr}, + {{AclFormat::FORMAT_FRACTAL_Z, AclFormat::FORMAT_NCHW}, FRAC_Z_TO_NCHW}, + {{AclFormat::FORMAT_FRACTAL_NZ, AclFormat::FORMAT_NCHW}, FRAC_NZ_TO_NCHW}, + {{AclFormat::FORMAT_NC1HWC0, AclFormat::FORMAT_NCHW}, NC1HWC0_TO_NCHW}, + {{AclFormat::FORMAT_NDC1HWC0, AclFormat::FORMAT_NCHW}, NDC1HWC0_TO_NCDHW}, + {{AclFormat::FORMAT_C1HWNCoC0, AclFormat::FORMAT_NCHW}, C1HWNCoC0_TO_NCHW}, + {{AclFormat::FORMAT_NC1HWC0_C04, AclFormat::FORMAT_NCHW}, NC1HWC0_C04_TO_NCHW}, + {{AclFormat::FORMAT_FRACTAL_Z_3D, AclFormat::FORMAT_NCHW}, FRAC_Z3D_TO_NCDHW}, +}; + +const static std::unordered_map dtypeTransMap = { + {AclDumpMsg::OutputDataType::DT_UNDEFINED, AclDtype::DT_UNDEFINED}, + {AclDumpMsg::OutputDataType::DT_FLOAT, AclDtype::DT_FLOAT}, + {AclDumpMsg::OutputDataType::DT_FLOAT16, AclDtype::DT_FLOAT16}, + {AclDumpMsg::OutputDataType::DT_INT8, AclDtype::DT_INT8}, + {AclDumpMsg::OutputDataType::DT_UINT8, AclDtype::DT_UINT8}, + {AclDumpMsg::OutputDataType::DT_INT16, AclDtype::DT_INT16}, + {AclDumpMsg::OutputDataType::DT_UINT16, AclDtype::DT_UINT16}, + {AclDumpMsg::OutputDataType::DT_INT32, AclDtype::DT_INT32}, + {AclDumpMsg::OutputDataType::DT_INT64, AclDtype::DT_INT64}, + {AclDumpMsg::OutputDataType::DT_UINT32, AclDtype::DT_UINT32}, + {AclDumpMsg::OutputDataType::DT_UINT64, AclDtype::DT_UINT64}, + {AclDumpMsg::OutputDataType::DT_BOOL, AclDtype::DT_BOOL}, + {AclDumpMsg::OutputDataType::DT_DOUBLE, AclDtype::DT_DOUBLE}, + {AclDumpMsg::OutputDataType::DT_STRING, AclDtype::DT_STRING}, + {AclDumpMsg::OutputDataType::DT_DUAL_SUB_INT8, AclDtype::DT_DUAL_SUB_INT8}, + {AclDumpMsg::OutputDataType::DT_DUAL_SUB_UINT8, AclDtype::DT_DUAL_SUB_UINT8}, + {AclDumpMsg::OutputDataType::DT_COMPLEX64, AclDtype::DT_COMPLEX64}, + {AclDumpMsg::OutputDataType::DT_COMPLEX128, AclDtype::DT_COMPLEX128}, + {AclDumpMsg::OutputDataType::DT_QINT8, AclDtype::DT_QINT8}, + {AclDumpMsg::OutputDataType::DT_QINT16, AclDtype::DT_QINT16}, + {AclDumpMsg::OutputDataType::DT_QINT32, AclDtype::DT_QINT32}, + {AclDumpMsg::OutputDataType::DT_QUINT8, AclDtype::DT_QUINT8}, + {AclDumpMsg::OutputDataType::DT_QUINT16, AclDtype::DT_QUINT16}, + {AclDumpMsg::OutputDataType::DT_RESOURCE, AclDtype::DT_RESOURCE}, + {AclDumpMsg::OutputDataType::DT_STRING_REF, AclDtype::DT_STRING_REF}, + {AclDumpMsg::OutputDataType::DT_DUAL, AclDtype::DT_DUAL}, + {AclDumpMsg::OutputDataType::DT_VARIANT, AclDtype::DT_VARIANT}, + {AclDumpMsg::OutputDataType::DT_BF16, AclDtype::DT_BF16}, + {AclDumpMsg::OutputDataType::DT_INT4, AclDtype::DT_INT4}, + {AclDumpMsg::OutputDataType::DT_UINT1, AclDtype::DT_UINT1}, + {AclDumpMsg::OutputDataType::DT_INT2, AclDtype::DT_INT2}, + {AclDumpMsg::OutputDataType::DT_UINT2, AclDtype::DT_UINT2}, +}; + +const static std::unordered_map formatTransMap = { + {AclDumpMsg::OutputFormat::FORMAT_NCHW, AclFormat::FORMAT_NCHW}, + {AclDumpMsg::OutputFormat::FORMAT_NHWC, AclFormat::FORMAT_NHWC}, + {AclDumpMsg::OutputFormat::FORMAT_ND, AclFormat::FORMAT_ND}, + {AclDumpMsg::OutputFormat::FORMAT_NC1HWC0, AclFormat::FORMAT_NC1HWC0}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_Z, AclFormat::FORMAT_FRACTAL_Z}, + {AclDumpMsg::OutputFormat::FORMAT_NC1C0HWPAD, AclFormat::FORMAT_NC1C0HWPAD}, + {AclDumpMsg::OutputFormat::FORMAT_NHWC1C0, AclFormat::FORMAT_NHWC1C0}, + {AclDumpMsg::OutputFormat::FORMAT_FSR_NCHW, AclFormat::FORMAT_FSR_NCHW}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_DECONV, AclFormat::FORMAT_FRACTAL_DECONV}, + {AclDumpMsg::OutputFormat::FORMAT_C1HWNC0, AclFormat::FORMAT_C1HWNC0}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_DECONV_TRANSPOSE, AclFormat::FORMAT_FRACTAL_DECONV_TRANSPOSE}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS, AclFormat::FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS}, + {AclDumpMsg::OutputFormat::FORMAT_NC1HWC0_C04, AclFormat::FORMAT_NC1HWC0_C04}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_Z_C04, AclFormat::FORMAT_FRACTAL_Z_C04}, + {AclDumpMsg::OutputFormat::FORMAT_CHWN, AclFormat::FORMAT_CHWN}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS, AclFormat::FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS}, + {AclDumpMsg::OutputFormat::FORMAT_HWCN, AclFormat::FORMAT_HWCN}, + {AclDumpMsg::OutputFormat::FORMAT_NC1KHKWHWC0, AclFormat::FORMAT_NC1KHKWHWC0}, + {AclDumpMsg::OutputFormat::FORMAT_BN_WEIGHT, AclFormat::FORMAT_BN_WEIGHT}, + {AclDumpMsg::OutputFormat::FORMAT_FILTER_HWCK, AclFormat::FORMAT_FILTER_HWCK}, + {AclDumpMsg::OutputFormat::FORMAT_HASHTABLE_LOOKUP_LOOKUPS, AclFormat::FORMAT_HASHTABLE_LOOKUP_LOOKUPS}, + {AclDumpMsg::OutputFormat::FORMAT_HASHTABLE_LOOKUP_KEYS, AclFormat::FORMAT_HASHTABLE_LOOKUP_KEYS}, + {AclDumpMsg::OutputFormat::FORMAT_HASHTABLE_LOOKUP_VALUE, AclFormat::FORMAT_HASHTABLE_LOOKUP_VALUE}, + {AclDumpMsg::OutputFormat::FORMAT_HASHTABLE_LOOKUP_OUTPUT, AclFormat::FORMAT_HASHTABLE_LOOKUP_OUTPUT}, + {AclDumpMsg::OutputFormat::FORMAT_HASHTABLE_LOOKUP_HITS, AclFormat::FORMAT_HASHTABLE_LOOKUP_HITS}, + {AclDumpMsg::OutputFormat::FORMAT_C1HWNCoC0, AclFormat::FORMAT_C1HWNCoC0}, + {AclDumpMsg::OutputFormat::FORMAT_MD, AclFormat::FORMAT_MD}, + {AclDumpMsg::OutputFormat::FORMAT_NDHWC, AclFormat::FORMAT_NDHWC}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_ZZ, AclFormat::FORMAT_FRACTAL_ZZ}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_NZ, AclFormat::FORMAT_FRACTAL_NZ}, + {AclDumpMsg::OutputFormat::FORMAT_NCDHW, AclFormat::FORMAT_NCDHW}, + {AclDumpMsg::OutputFormat::FORMAT_DHWCN, AclFormat::FORMAT_DHWCN}, + {AclDumpMsg::OutputFormat::FORMAT_NDC1HWC0, AclFormat::FORMAT_NDC1HWC0}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_Z_3D, AclFormat::FORMAT_FRACTAL_Z_3D}, + {AclDumpMsg::OutputFormat::FORMAT_CN, AclFormat::FORMAT_CN}, + {AclDumpMsg::OutputFormat::FORMAT_NC, AclFormat::FORMAT_NC}, + {AclDumpMsg::OutputFormat::FORMAT_DHWNC, AclFormat::FORMAT_DHWNC}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_Z_3D_TRANSPOSE, AclFormat::FORMAT_FRACTAL_Z_3D_TRANSPOSE}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_ZN_LSTM, AclFormat::FORMAT_FRACTAL_ZN_LSTM}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_Z_G, AclFormat::FORMAT_FRACTAL_Z_G}, + {AclDumpMsg::OutputFormat::FORMAT_RESERVED, AclFormat::FORMAT_RESERVED}, + {AclDumpMsg::OutputFormat::FORMAT_ALL, AclFormat::FORMAT_ALL}, + {AclDumpMsg::OutputFormat::FORMAT_NULL, AclFormat::FORMAT_NULL}, + {AclDumpMsg::OutputFormat::FORMAT_ND_RNN_BIAS, AclFormat::FORMAT_ND_RNN_BIAS}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_ZN_RNN, AclFormat::FORMAT_FRACTAL_ZN_RNN}, + {AclDumpMsg::OutputFormat::FORMAT_YUV, AclFormat::FORMAT_YUV}, + {AclDumpMsg::OutputFormat::FORMAT_YUV_A, AclFormat::FORMAT_YUV_A}, + {AclDumpMsg::OutputFormat::FORMAT_NCL, AclFormat::FORMAT_NCL}, + {AclDumpMsg::OutputFormat::FORMAT_FRACTAL_Z_WINO, AclFormat::FORMAT_FRACTAL_Z_WINO}, + {AclDumpMsg::OutputFormat::FORMAT_C1HWC0, AclFormat::FORMAT_C1HWC0}, +}; + +enum kAxis4D : int { kN = 0, kC, kH, kW, kNchwDims }; +enum Axis5D : int { + N_ncdhw = 0, + C_ncdhw, + D_ncdhw, + H_ncdhw, + W_ncdhw, + kNcdhw, + N_ndc1hwc0 = 0, + D_ndc1hwc0, + C1_ndc1hwc0, + H_ndc1hwc0, + W_ndc1hwc0, + C0_ndc1hwc0 +}; + +static inline AclDtype transAclDtype2MS(AclDumpMsg::OutputDataType dt) +{ + auto it = dtypeTransMap.find(dt); + if (it != dtypeTransMap.end()) { + return it->second; + } + return AclDtype::DT_MAX; +} + +static inline AclFormat transAclFormat2MS(AclDumpMsg::OutputFormat fmt) +{ + auto it = formatTransMap.find(fmt); + if (it != formatTransMap.end()) { + return it->second; + } + return AclFormat::FORMAT_MAX; +} + +static size_t EleNumOfTensor(const AclTensorInfo& tensor, bool host = true) { + size_t num = 1; + const AclShape& shape = host ? tensor.hostShape : tensor.deviceShape; + for (auto dim : shape) { + if (dim <= 0) { + /* For dynamic shape which has negative dimensions, data size should be zero. */ + return 0; + } + + if (SIZE_MAX / dim < num) { + throw std::out_of_range(tensor + ": Count of element over size_t."); + } + num *= static_cast(dim); + } + return num; +} + +static inline size_t SizeOfAclDType(const AclTensorInfo& tensor) { + return DataUtils::SizeOfDType(tensor.dtype); +} + +static inline size_t SizeOfAclDType(const AclDtype& dtype) { + return DataUtils::SizeOfDType(dtype); +} + +size_t SizeOfTensor(const AclTensorInfo& tensor, bool host) { + size_t num = EleNumOfTensor(tensor, host); + size_t eleSize = SizeOfAclDType(tensor); + if (num != 0 && SIZE_MAX / num < eleSize) { + throw std::runtime_error(tensor + ": Size over size_t."); + } + return num * eleSize; +} + +static inline int64_t GetCubeSizeByType(const AclDtype& dtype) { + if (dtype == AclDtype::DT_UINT8 || dtype == AclDtype::DT_INT8) { + return kCube32; + } + + if (dtype == AclDtype::DT_INT4) { + return kCube64; + } + + return kCube16; +} + +static inline void AssertDim(const AclShape& shape, size_t dim) +{ + if (shape.size() != dim) { + throw std::runtime_error("Dimension of tensor is expected to be " + std::to_string(dim) + + ", but actually " + std::to_string(shape.size()) +"."); + } +} + +static inline void AssertConsis(const AclTensorInfo& tensor) +{ + if (EleNumOfTensor(tensor, false) * SizeOfAclDType(tensor) != tensor.dataSize) { + throw std::runtime_error(tensor + ": The internal data of Tensor is inconsistent."); + } +} + +template +AclTensorInfo ParseAttrsFromDumpData(const std::string& dumpPath, const uint8_t* data, const T& tensor, + const std::string& io, uint32_t slot) +{ + AclDumpMsg::OutputDataType oriDtype = tensor.data_type(); + AclDtype dtype = transAclDtype2MS(oriDtype); + bool dumpOriginData = false; + size_t dataSize = static_cast(tensor.size()); + if (dtype == AclDtype::DT_MAX || kSupportedDtypes.find(dtype) == kSupportedDtypes.end()) { + dumpOriginData = true; + } + + AclDumpMsg::OutputFormat oriDeviceFmt = tensor.format(); + AclFormat dFmt = transAclFormat2MS(oriDeviceFmt); + if (dFmt == AclFormat::FORMAT_MAX || kSupportedFormat.find(dFmt) == kSupportedFormat.end()) { + dumpOriginData = true; + } + + AclShape dShape; + std::transform(tensor.shape().dim().begin(), tensor.shape().dim().end(), std::back_inserter(dShape), + DataUtils::SizeToS64); + AclShape hShape; + for (auto d : tensor.original_shape().dim()) { + if (d > INT64_MAX) { + LOG_WARNING(DebuggerErrno::ERROR_VALUE_OVERFLOW, + "The value(" + std::to_string(d) + ") exceeds the max value of int64_t, " + + "this maybe caused by the unfixed shape operaters."); + hShape.clear(); + break; + } + hShape.push_back(DataUtils::SizeToS64(d)); + } + + // convert format to host format. It can be either NCHW or ND (non 4-dimemsions). + AclFormat hFmt; + if (hShape.size() == kDim4) { + hFmt = AclFormat::FORMAT_NCHW; + } else if (hShape.empty()) { + hFmt = dFmt; + hShape = dShape; + LOG_WARNING(DebuggerErrno::NONE, + "Tensor(" + dumpPath + "): The host shape is empty, use device shape as host shape."); + } else { + hFmt = AclFormat::FORMAT_ND; + } + + int32_t subFormat = tensor.sub_format(); + return AclTensorInfo{dumpPath, data, dtype, dFmt, hFmt, dShape, hShape, dataSize, subFormat, io, slot, dumpOriginData}; +} + +template AclTensorInfo ParseAttrsFromDumpData( + const std::string& dumpPath, const uint8_t* data, const AclDumpMsg::OpOutput& tensor, const std::string& io, + uint32_t slot); +template AclTensorInfo ParseAttrsFromDumpData( + const std::string& dumpPath, const uint8_t* data, const AclDumpMsg::OpInput& tensor, const std::string& io, + uint32_t slot); + +static inline void AllocTensorTransBuf(AclTensorInfo& tensor) +{ + tensor.transBuf.resize(SizeOfTensor(tensor)); +} + +static DebuggerErrno FRAC_Z_TO_NCHW_WITH_GROUPS(AclTensorInfo& tensor) +{ + AssertDim(tensor.hostShape, kDim4); + AssertConsis(tensor); + AllocTensorTransBuf(tensor); + + auto nDim = tensor.hostShape[kN]; + auto cDim = tensor.hostShape[kC]; + auto hDim = tensor.hostShape[kH]; + auto wDim = tensor.hostShape[kW]; + auto groups = tensor.subFormat; + auto cinOri = cDim; + auto coutOri = nDim / groups; + + if (cinOri == 0 || coutOri == 0) { + LOG_WARNING(DebuggerErrno::ERROR_INVALID_VALUE, tensor + ": cin/cout ori must not equal to 0."); + return DebuggerErrno::ERROR_INVALID_VALUE; + } + + auto cubeK = GetCubeSizeByType(tensor.dtype); + auto eMult = std::min(Lcm(Lcm(cinOri, cubeK) / cinOri, Lcm(coutOri, kCubeSize) / cinOri), + static_cast(groups)); + if (eMult == 0) { + LOG_WARNING(DebuggerErrno::ERROR_INVALID_VALUE, + tensor + ": The value of e_mult should be greater than 0."); + return DebuggerErrno::ERROR_INVALID_VALUE; + } + + auto cinOpt = AlignCeil(eMult * cinOri, cubeK); + auto coutOpt = AlignCeil(eMult * coutOri, kCubeSize); + auto c1Dim = cinOpt / cubeK; + const uint8_t* src = tensor.aclData; + uint8_t* dst = tensor.transBuf.data(); + auto dtypeSize = SizeOfAclDType(tensor); + + for (int64_t g = 0; g < groups; ++g) { + for (int64_t c = 0; c < cDim; ++c) { + for (int64_t h = 0; h < hDim; ++h) { + for (int64_t w = 0; w < wDim; ++w) { + for (int64_t n = 0; n < coutOri; ++n) { + int64_t eVal = g % eMult; + int64_t dstCi = eVal * cinOri + c; + int64_t dstCo = eVal * coutOri + n; + int64_t srcCo = g * coutOri + n; + int64_t temporary = dstCi % cubeK; + int64_t devIdx = (g / eMult) * c1Dim * hDim * wDim * coutOpt * cubeK + + (dstCi / cubeK) * hDim * wDim * coutOpt * cubeK + h * wDim * coutOpt * cubeK + + w * coutOpt * cubeK + dstCo * cubeK + temporary; + int64_t hstIdx = srcCo * cDim * hDim * wDim + c * hDim * wDim + h * wDim + w; + /* 此处由偏移计算逻辑保障不会越界读写 */ + std::memcpy(dst + hstIdx * dtypeSize, src + devIdx * dtypeSize, dtypeSize); + } + } + } + } + } + return DebuggerErrno::OK; +} + +static DebuggerErrno FRAC_Z_TO_NCHW(AclTensorInfo& tensor) +{ + if (tensor.subFormat > 1) { + return FRAC_Z_TO_NCHW_WITH_GROUPS(tensor); + } + + AssertDim(tensor.hostShape, kDim4); + AssertConsis(tensor); + AllocTensorTransBuf(tensor); + + auto n0 = tensor.deviceShape.at(fzN0); + auto ni = tensor.deviceShape.at(fzNi); + auto c0 = tensor.deviceShape.at(fzC0); + auto n = tensor.hostShape[kN]; + auto c = tensor.hostShape[kC]; + auto h = tensor.hostShape[kH]; + auto w = tensor.hostShape[kW]; + auto nc = ni * n0; + auto ncc0 = nc * c0; + auto wncc0 = w * ncc0; + auto hwncc0 = h * wncc0; + auto hw = h * w; + auto chw = c * hw; + + if (c0 == 0) { + return DebuggerErrno::ERROR_INVALID_VALUE; + } + + const uint8_t* src = tensor.aclData; + uint8_t* dst = tensor.transBuf.data(); + auto dtypeSize = SizeOfAclDType(tensor); + for (int64_t nIdx = 0; nIdx < n; nIdx++) { + int64_t nHeadAddr = nIdx * chw; + for (int64_t cIdx = 0; cIdx < c; cIdx++) { + int64_t cHeadAddr = nHeadAddr + cIdx * hw; + for (int64_t hIdx = 0; hIdx < h; hIdx++) { + int64_t hHeadAddr = cHeadAddr + hIdx * w; + for (int64_t wIdx = 0; wIdx < w; wIdx++) { + auto dstIdx = hHeadAddr + wIdx; + auto c1Idx = cIdx / c0; + auto c0Idx = cIdx % c0; + auto ncIdx = nIdx; + auto srcIdx = c1Idx * hwncc0 + hIdx * wncc0 + wIdx * ncc0 + ncIdx * c0 + c0Idx; + /* 此处由偏移计算逻辑保障不会越界读写 */ + std::memcpy(dst + dstIdx * dtypeSize, src + srcIdx * dtypeSize, dtypeSize); + } + } + } + } + return DebuggerErrno::OK; +} + +static void TransShapeToHwNz(const AclShape &hostShape, AclShape& hwShape) +{ + if (hostShape.size() == kDim1) { + hwShape.push_back(1); + hwShape.push_back(1); + hwShape.push_back(hostShape[0]); + return; + } + auto size = hostShape.size(); + int64_t times = 1; + for (size_t i = 0; i != size - kDim2; i++) { + times *= hostShape[i]; + } + hwShape.push_back(times); + hwShape.push_back(hostShape[size - kDim2]); + hwShape.push_back(hostShape[size - kDim1]); +} + +static DebuggerErrno FRAC_NZ_TO_NCHW(AclTensorInfo& tensor) +{ + AssertConsis(tensor); + AllocTensorTransBuf(tensor); + + AclShape hwShape; + TransShapeToHwNz(tensor.hostShape, hwShape); + auto times = hwShape.at(0); + auto h = hwShape.at(hwH); + auto w = hwShape.at(hwW); + auto hw = h * w; + + auto shapeSize = tensor.deviceShape.size(); + if (shapeSize < kDim4) { + LOG_WARNING(DebuggerErrno::ERROR_INVALID_VALUE, tensor + ": Invalid shape size."); + return DebuggerErrno::ERROR_INVALID_VALUE; + } + + auto w1 = tensor.deviceShape[shapeSize - fnzW1]; + auto h1 = tensor.deviceShape[shapeSize - fnzH1]; + auto h0 = tensor.deviceShape[shapeSize - fnzH0]; + auto w0 = tensor.deviceShape[shapeSize - fnzW0]; + auto h1h0w0 = h1 * h0 * w0; + auto w1h1h0w0 = w1 * h1h0w0; + auto numW1 = w / w0; + + const uint8_t* src = tensor.aclData; + uint8_t* dst = tensor.transBuf.data(); + auto dtypeSize = SizeOfAclDType(tensor); + + for (int64_t timesIdx = 0; timesIdx < times; timesIdx++) { + auto timesHead = timesIdx * w1h1h0w0; + auto srcTimesHead = timesIdx * hw; + for (int64_t h1h0Idx = 0; h1h0Idx < h; h1h0Idx++) { + auto h1h0Head = timesHead + h1h0Idx * w0; + auto srcHHead = srcTimesHead + h1h0Idx * w; + for (int64_t w1Idx = 0; w1Idx < numW1; w1Idx++) { + for (int64_t i = 0; i < w0; ++i) { + int64_t srcIdx = h1h0Head + w1Idx * h1h0w0 + i; + int64_t dstIdx = srcHHead + w1Idx * w0 + i; + /* 此处由偏移计算逻辑保障不会越界读写 */ + std::memcpy(dst + dstIdx * dtypeSize, src + srcIdx * dtypeSize, dtypeSize); + } + } + auto w1Head = numW1 * w0; + for (int64_t w0Idx = 0; w1Head + w0Idx < w; w0Idx++) { + auto srcWIdx = w1Head + w0Idx; + int64_t srcIdx = h1h0Head + numW1 * h1h0w0 + w0Idx; + int64_t dstIdx = srcHHead + srcWIdx; + /* 此处由偏移计算逻辑保障不会越界读写 */ + std::memcpy(dst + dstIdx * dtypeSize, src + srcIdx * dtypeSize, dtypeSize); + } + } + } + return DebuggerErrno::OK; +} + +static DebuggerErrno NC1HWC0_TO_NCHW(AclTensorInfo& tensor) +{ + AssertDim(tensor.hostShape, kDim4); + AssertConsis(tensor); + AllocTensorTransBuf(tensor); + + auto n = tensor.hostShape[kN]; + auto c = tensor.hostShape[kC]; + auto h = tensor.hostShape[kH]; + auto w = tensor.hostShape[kW]; + auto c1 = tensor.deviceShape[kDim1]; + auto c0 = tensor.deviceShape[kDim4]; + + auto hw = h * w; + auto chw = c * hw; + auto wc0 = w * c0; + auto hwc0 = h * wc0; + auto c1hwc0 = c1 * hwc0; + + const uint8_t* src = tensor.aclData; + uint8_t* dst = tensor.transBuf.data(); + auto dtypeSize = SizeOfAclDType(tensor); + for (int64_t nIndex = 0; nIndex < n; nIndex++) { + int64_t nHeadAddr = nIndex * chw; + for (int64_t cIndex = 0; cIndex < c; cIndex++) { + int64_t cHeadAddr = nHeadAddr + cIndex * hw; + for (int64_t hIndex = 0; hIndex < h; hIndex++) { + int64_t hHeadAddr = cHeadAddr + hIndex * w; + for (int64_t wIndex = 0; wIndex < w; wIndex++) { + int64_t dstIdx = hHeadAddr + wIndex; + int64_t c1Index = cIndex / c0; + int64_t c0Index = cIndex % c0; + int64_t srcIdx = nIndex * c1hwc0 + c1Index * hwc0 + hIndex * wc0 + wIndex * c0 + c0Index; + /* 此处由偏移计算逻辑保障不会越界读写 */ + std::memcpy(dst + dstIdx * dtypeSize, src + srcIdx * dtypeSize, dtypeSize); + } + } + } + } + return DebuggerErrno::OK; +} + +static DebuggerErrno NDC1HWC0_TO_NCDHW(AclTensorInfo& tensor) +{ + AssertDim(tensor.hostShape, kDim5); + AssertConsis(tensor); + AllocTensorTransBuf(tensor); + + auto n = tensor.hostShape[N_ncdhw]; + auto c = tensor.hostShape[C_ncdhw]; + auto d = tensor.hostShape[D_ncdhw]; + auto h = tensor.hostShape[H_ncdhw]; + auto w = tensor.hostShape[W_ncdhw]; + auto c1 = tensor.deviceShape[C1_ndc1hwc0]; + auto c0 = tensor.deviceShape[C0_ndc1hwc0]; + + const int64_t cdhw = c * d * h * w; + const int64_t dhw = d * h * w; + const int64_t hw = h * w; + const int64_t dc1hwc0 = d * c1 * h * w * c0; + const int64_t c1hwc0 = c1 * h * w * c0; + const int64_t hwc0 = h * w * c0; + const int64_t wc0 = w * c0; + + const uint8_t* src = tensor.aclData; + uint8_t* dst = tensor.transBuf.data(); + auto dtypeSize = SizeOfAclDType(tensor); + for (int64_t nIndex = 0; nIndex < n; nIndex++) { + int64_t nHead = nIndex * cdhw; + for (int64_t cIndex = 0; cIndex < c; cIndex++) { + int64_t cHead = nHead + cIndex * dhw; + for (int64_t dIndex = 0; dIndex < d; dIndex++) { + int64_t dHead = cHead + dIndex * hw; + for (int64_t hIndex = 0; hIndex < h; hIndex++) { + int64_t hHead = dHead + hIndex * w; + for (int64_t wIndex = 0; wIndex < w; wIndex++) { + int64_t dstIdx = hHead + wIndex; + int64_t c1Index = cIndex / c0; + int64_t c0Index = cIndex % c0; + auto srcIdx = nIndex * dc1hwc0 + dIndex * c1hwc0 + c1Index * hwc0 + hIndex * wc0 + + wIndex * c0 + c0Index; + /* 此处由偏移计算逻辑保障不会越界读写 */ + std::memcpy(dst + dstIdx * dtypeSize, src + srcIdx * dtypeSize, dtypeSize); + } + } + } + } + } + return DebuggerErrno::OK; +} + +static DebuggerErrno C1HWNCoC0_TO_NCHW(AclTensorInfo& tensor) +{ + AssertDim(tensor.hostShape, kDim4); + AssertConsis(tensor); + AllocTensorTransBuf(tensor); + + auto n = tensor.hostShape[kN]; + auto c = tensor.hostShape[kC]; + auto h = tensor.hostShape[kH]; + auto w = tensor.hostShape[kW]; + const int coIdx = 4; + const int c0Idx = 5; + auto co = tensor.deviceShape[coIdx]; + auto c0 = tensor.deviceShape[c0Idx]; + auto cubeK = GetCubeSizeByType(tensor.dtype); + + const uint8_t* src = tensor.aclData; + uint8_t* dst = tensor.transBuf.data(); + auto dtypeSize = SizeOfAclDType(tensor); + for (int64_t nIndex = 0; nIndex < n; nIndex++) { + for (int64_t cIndex = 0; cIndex < c; cIndex++) { + for (int64_t hIndex = 0; hIndex < h; hIndex++) { + for (int64_t wIndex = 0; wIndex < w; wIndex++) { + int64_t dstIdx = nIndex * c * h * w + cIndex * h * w + hIndex * w + wIndex; + int64_t c1Index = cIndex / cubeK; + int64_t c0Index = cIndex % cubeK; + int64_t coIndex = c0Index; + int64_t srcIdx = c1Index * h * w * n * co * c0 + hIndex * w * n * co * c0 + wIndex * n * co * c0 + + nIndex * co * c0 + coIndex * c0 + c0Index; + /* 此处由偏移计算逻辑保障不会越界读写 */ + std::memcpy(dst + dstIdx * dtypeSize, src + srcIdx * dtypeSize, dtypeSize); + } + } + } + } + return DebuggerErrno::OK; +} + +static DebuggerErrno NC1HWC0_C04_TO_NCHW(AclTensorInfo& tensor) +{ + return NC1HWC0_TO_NCHW(tensor); +} + +static DebuggerErrno FRAC_Z3D_TO_NCDHW(AclTensorInfo& tensor) +{ + AssertDim(tensor.hostShape, kDim5); + AssertConsis(tensor); + AllocTensorTransBuf(tensor); + + auto n = tensor.hostShape[N_ncdhw]; + auto c = tensor.hostShape[C_ncdhw]; + auto d = tensor.hostShape[D_ncdhw]; + auto h = tensor.hostShape[H_ncdhw]; + auto w = tensor.hostShape[W_ncdhw]; + constexpr int kFZ3D_C0 = 3; + auto c0 = tensor.deviceShape[kFZ3D_C0]; + auto cube_k = GetCubeSizeByType(tensor.dtype); + auto c1 = DivCeil(c, cube_k); + constexpr int64_t kNiSize = 16; + auto n1n0 = AlignCeil(n, kNiSize); + auto n1n0c0 = n1n0 * c0; + auto wn1n0c0 = w * n1n0c0; + auto hwn1n0c0 = h * wn1n0c0; + auto c1hwn1n0c0 = c1 * hwn1n0c0; + auto hw = h * w; + auto dhw = d * hw; + auto cdhw = c * dhw; + + const uint8_t* src = tensor.aclData; + uint8_t* dst = tensor.transBuf.data(); + auto dtypeSize = SizeOfAclDType(tensor); + for (int64_t nIdx = 0; nIdx < n; nIdx++) { + int64_t nHead = nIdx * cdhw; + for (int64_t cIdx = 0; cIdx < c; cIdx++) { + int64_t cHead = nHead + cIdx * dhw; + for (int64_t dIdx = 0; dIdx < d; dIdx++) { + int64_t dHead = cHead + dIdx * hw; + for (int64_t hIdx = 0; hIdx < h; hIdx++) { + int64_t hHead = dHead + hIdx * w; + for (int64_t wI = 0; wI < w; wI++) { + int64_t dstIdx = hHead + wI; + int64_t c1I = cIdx / c0; + int64_t c0I = cIdx % c0; + int64_t ncIdx = nIdx; + int64_t srcIdx = dIdx * c1hwn1n0c0 + c1I * c1hwn1n0c0 + hIdx * wn1n0c0 + wI * n1n0c0 + + ncIdx * c0 + c0I; + /* 此处由偏移计算逻辑保障不会越界读写 */ + std::memcpy(dst + dstIdx * dtypeSize, src + srcIdx * dtypeSize, dtypeSize); + } + } + } + } + } + return DebuggerErrno::OK; +} + +DebuggerErrno TransFormatD2H(AclTensorInfo& tensor) +{ + AclFormat from = tensor.deviceFmt; + AclFormat to = tensor.hostFmt; + auto it = formatTransFuncMap.find(std::make_pair(from, to)); + if (it == formatTransFuncMap.end()) { + return DebuggerErrno::ERROR_UNKNOWN_TRANS; + } + + try { + return it->second(tensor); + } catch (const std::exception& e) { + LOG_ERROR(DebuggerErrno::ERROR_OPERATION_FAILED, tensor + ": Failed to conver dtype from " + + std::to_string(from) + " to " + std::to_string(to) + "(" + e.what() + ")."); + return DebuggerErrno::ERROR_OPERATION_FAILED; + } +} + +static void TransBf16ToFp32(const uint8_t* input, size_t num, uint8_t* output, size_t bufferSize) +{ + if (bufferSize < num * sizeof(float)) { + LOG_ERROR(DebuggerErrno::ERROR_BUFFER_OVERFLOW, "Insufficient space for converting data from bf16 to fp32."); + return; + } + const DataUtils::BFloat16* in = reinterpret_cast(input); + float* out = reinterpret_cast(output); + + for (size_t i = 0; i < num; i++) { + out[i] = static_cast(in[i]); + } +} + +DebuggerErrno TransDtype(AclTensorInfo& tensor, AclDtype to) +{ + + const static std::set> kSupportedDtypeTrans = { + {AclDtype::DT_BF16, AclDtype::DT_FLOAT}, + }; + + if (tensor.dtype == to) { + return DebuggerErrno::OK; + } + + if (kSupportedDtypeTrans.find({tensor.dtype, to}) == kSupportedDtypeTrans.end()) { + return DebuggerErrno::ERROR_UNKNOWN_TRANS; + } + + std::vector buffer; + AssertConsis(tensor); + size_t bufferSize = EleNumOfTensor(tensor) * SizeOfAclDType(to); + buffer.reserve(bufferSize); + const uint8_t* input = tensor.transBuf.empty() ? tensor.aclData : tensor.transBuf.data(); + uint8_t* output = buffer.data(); + + /* 目前仅支持bf16->fp32,若有通用转换需求再用更泛化的方式重写 */ + if (tensor.dtype == AclDtype::DT_BF16 && to == AclDtype::DT_FLOAT) { + TransBf16ToFp32(input, EleNumOfTensor(tensor), output, bufferSize); + } + + tensor.transBuf = std::move(buffer); + return DebuggerErrno::OK; +} + +} +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/AclTensor.hpp b/debug/accuracy_tools/msprobe/ccsrc/core/AclTensor.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8b5ba5b06d935d5aaa2dff35e921b9072db6aa1a --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/core/AclTensor.hpp @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "include/ErrorCode.hpp" +#include "proto/AclDumpMsg.pb.h" +#include "utils/DataUtils.hpp" + +namespace MindStudioDebugger { + +using AclShape = DataUtils::TensorShape; +using AclDtype = DataUtils::DataType; +using AclFormat = DataUtils::TensorFormat; + +constexpr uint8_t kDim1 = 1; +constexpr uint8_t kDim2 = 2; +constexpr uint8_t kDim3 = 3; +constexpr uint8_t kDim4 = 4; +constexpr uint8_t kDim5 = 5; +constexpr uint8_t kDim6 = 6; + +struct AclTensorInfo { + std::string dumpPath; + const uint8_t* aclData; + AclDtype dtype; + AclFormat deviceFmt; + AclFormat hostFmt; + AclShape deviceShape; + AclShape hostShape; + size_t dataSize; + int32_t subFormat; + std::string inout; + uint32_t slot; + bool dumpOriginData; + std::vector transBuf; + + std::string ToString() const { + return "AclTensor(path=" + dumpPath + ",dtype=" + std::to_string(dtype) + ",inout=" + inout + ")"; + } +}; + +inline std::string operator+(const std::string& s, const AclTensorInfo& tensor) { + return s + tensor.ToString(); +} + +inline std::string operator+(const AclTensorInfo& tensor, const std::string& s) { + return tensor.ToString() + s; +} + +namespace AclTensor { +size_t SizeOfTensor(const AclTensorInfo& tensor, bool host=true); +template +AclTensorInfo ParseAttrsFromDumpData(const std::string &dumpPath, const uint8_t* data, const T& tensor, + const std::string& io, uint32_t slot); +DebuggerErrno TransFormatD2H(AclTensorInfo& tensor); +DebuggerErrno TransDtype(AclTensorInfo& tensor, AclDtype to); + +} +} diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/PrecisionDebugger.cpp b/debug/accuracy_tools/msprobe/ccsrc/core/PrecisionDebugger.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d4d74f1962222558c88c576b8ffbd8c474e152f2 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/core/PrecisionDebugger.cpp @@ -0,0 +1,157 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "base/ErrorInfos.hpp" +#include "base/DebuggerConfig.hpp" +#include "third_party/ACL/AclApi.hpp" +#include "PrecisionDebugger.hpp" + +namespace MindStudioDebugger { + +void PrecisionDbgTaskBase::Register() +{ + PrecisionDebugger::GetInstance().RegisterDebuggerTask(this); +} + +void PrecisionDebugger::RegisterDebuggerTask(PrecisionDbgTaskBase* task) +{ + DEBUG_FUNC_TRACE(); + std::vector::iterator iter; + const DebuggerConfig& cfg = DebuggerConfig::GetInstance(); + + if (cfg.IsCfgLoaded() && !task->Condition(cfg)) { + return; + } + + for (iter = subDebuggers.begin(); iter != subDebuggers.end(); ++iter) { + if (*iter == task) { + return; + } + } + + for (iter = subDebuggers.begin(); iter != subDebuggers.end(); ++iter) { + if ((*iter)->Priority() > task->Priority()) { + break; + } + } + + subDebuggers.insert(iter, task); + + if (cfg.IsCfgLoaded()) { + /* 如果配置还没加载,先加入到缓存中,等加载时再根据条件过滤一遍 */ + task->Initialize(cfg); + LOG_DEBUG("PrecisionDebugger: " + task->Name() + " registered."); + } + return; +} + +void PrecisionDebugger::UnRegisterDebuggerTask(PrecisionDbgTaskBase* task) +{ + DEBUG_FUNC_TRACE(); + for (auto iter = subDebuggers.begin(); iter != subDebuggers.end(); iter++) { + if (*iter == task) { + LOG_DEBUG("PrecisionDebugger: " + task->Name() + " unregistered."); + subDebuggers.erase(iter); + return; + } + } + + return; +} + +int32_t PrecisionDebugger::Initialize(const std::string& framework, const std::string& cfgFile) +{ + DEBUG_FUNC_TRACE(); + + int32_t ret = DebuggerConfig::GetInstance().LoadConfig(framework, cfgFile); + if (ret != 0) { + return ret; + } + + if(AscendCLApi::LoadAclApi() != DebuggerErrno::OK) { + return -1; + } + + const DebuggerConfig& cfg = DebuggerConfig::GetInstance(); + for (auto iter = subDebuggers.begin(); iter != subDebuggers.end(); ) { + if (!(*iter)->Condition(cfg)) { + iter = subDebuggers.erase(iter); + } else { + (*iter)->Initialize(cfg); + LOG_DEBUG("PrecisionDebugger: " + (*iter)->Name() + " registered."); + iter++; + } + } + + initialized = true; + return 0; +} + +void PrecisionDebugger::Start() +{ + DEBUG_FUNC_TRACE(); + if (!initialized) { + return; + } + + enable = true; + + for (auto task : subDebuggers) { + task->OnStart(); + } +} + +void PrecisionDebugger::Stop() +{ + DEBUG_FUNC_TRACE(); + if (!initialized) { + return; + } + + enable = false; + CALL_ACL_API(aclrtSynchronizeDevice); + + for (auto task : subDebuggers) { + task->OnStop(); + } +} + +void PrecisionDebugger::Step() +{ + return Step(1); +} + +void PrecisionDebugger::Step(uint32_t step) +{ + DEBUG_FUNC_TRACE(); + if (!initialized) { + return; + } + + if (step > UINT32_MAX - curStep) { + throw std::runtime_error("Step over upper limit(4294967295)."); + } + curStep += step; + CALL_ACL_API(aclrtSynchronizeDevice); + + for (auto task : subDebuggers) { + task->OnStep(curStep); + } +} + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/PrecisionDebugger.hpp b/debug/accuracy_tools/msprobe/ccsrc/core/PrecisionDebugger.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fbc22c016c40285a90a3de5989684098639256c9 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/core/PrecisionDebugger.hpp @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "base/DebuggerConfig.hpp" + +namespace MindStudioDebugger { + +class PrecisionDbgTaskBase { +public: + virtual bool Condition(const DebuggerConfig& cfg) const = 0; + virtual std::string Name() const = 0; + virtual uint32_t Priority() const {return 100;} + + virtual void Initialize(const DebuggerConfig& cfg) {}; + virtual void OnStart() {}; + virtual void OnStop() {}; + virtual void OnStep(uint32_t curStep) {}; + + void Register(); + +protected: + PrecisionDbgTaskBase() = default; + ~PrecisionDbgTaskBase() = default; +}; + +class PrecisionDebugger { +public: + static PrecisionDebugger& GetInstance() { + static PrecisionDebugger instance_; + return instance_; + } + + int32_t Initialize(const std::string& framework, const std::string& cfgFile); + bool HasInitialized() const {return initialized;} + + void Start(); + void Stop(); + void Step(); + void Step(uint32_t step); + + bool IsEnable() const {return enable;} + uint32_t GetCurStep() const {return curStep;} + + void RegisterDebuggerTask(PrecisionDbgTaskBase* task); + void UnRegisterDebuggerTask(PrecisionDbgTaskBase* task); + +private: + PrecisionDebugger() = default; + ~PrecisionDebugger() = default; + explicit PrecisionDebugger(const PrecisionDebugger &obj) = delete; + PrecisionDebugger& operator=(const PrecisionDebugger &obj) = delete; + explicit PrecisionDebugger(PrecisionDebugger &&obj) = delete; + PrecisionDebugger& operator=(PrecisionDebugger &&obj) = delete; + + bool initialized{false}; + bool enable{false}; + uint32_t curStep{0}; + std::vector subDebuggers; +}; + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/mindspore/MSAclDumper.cpp b/debug/accuracy_tools/msprobe/ccsrc/core/mindspore/MSAclDumper.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2d80ed3ce1ab11ee5ddf9bad18583a6813f32529 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/core/mindspore/MSAclDumper.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "base/ErrorInfos.hpp" +#include "base/DebuggerConfig.hpp" +#include "base/Environment.hpp" +#include "core/AclDumper.hpp" +#include "MSAclDumper.hpp" + +namespace MindStudioDebugger { + +void MSAclDumper::OnStepBegin(uint32_t device, uint32_t curStep, ExtArgs& args) +{ + DEBUG_FUNC_TRACE(); + if (!PrecisionDebugger::GetInstance().IsEnable()) { + return; + } + const bool* isKbk = GetExtArgs(args, MindStudioExtensionArgs::IS_KBK); + if (isKbk != nullptr && *isKbk) { + /* acldump只用于非kbk场景 */ + return; + } + + int32_t rank = Environment::GetRankID(); + if (rank < 0) { + rank = static_cast(device); + } + + AclDumper::GetInstance().SetDump(rank, curStep, args); + return; +} + +void MSAclDumper::OnStepEnd(ExtArgs& args) +{ + DEBUG_FUNC_TRACE(); + AclDumper::GetInstance().FinalizeDump(args); +} + +__attribute__((constructor)) void RegisterMSAclDumper() +{ + MSAclDumper::GetInstance().Register(); +} + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/mindspore/MSAclDumper.hpp b/debug/accuracy_tools/msprobe/ccsrc/core/mindspore/MSAclDumper.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cd09bf51af0dac67065d51b8ce60c20f011cd585 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/core/mindspore/MSAclDumper.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include "include/ExtArgs.hpp" +#include "core/PrecisionDebugger.hpp" + +namespace MindStudioDebugger { + +class MSAclDumper : public PrecisionDbgTaskBase { +public: + static MSAclDumper& GetInstance() { + static MSAclDumper instance_; + return instance_; + } + + std::string Name() const override {return "MindSpore AclDumper";} + bool Condition(const DebuggerConfig& cfg) const override { + return cfg.GetFramework() == DebuggerFramework::FRAMEWORK_MINDSPORE && + cfg.GetDebugLevel() == DebuggerLevel::L2; + } + + void OnStepBegin(uint32_t device, uint32_t curStep, ExtArgs& args); + void OnStepEnd(ExtArgs& args); + +private: + MSAclDumper() = default; + ~MSAclDumper() = default; + explicit MSAclDumper(const MSAclDumper &obj) = delete; + MSAclDumper& operator=(const MSAclDumper &obj) = delete; + explicit MSAclDumper(MSAclDumper &&obj) = delete; + MSAclDumper& operator=(MSAclDumper &&obj) = delete; +}; + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/mindspore/MindSporeTrigger.cpp b/debug/accuracy_tools/msprobe/ccsrc/core/mindspore/MindSporeTrigger.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5ed8ad1e646e463ab37c120a83a6fa8edbea58ce --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/core/mindspore/MindSporeTrigger.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "include/Macro.hpp" +#include "base/ErrorInfos.hpp" +#include "third_party/ACL/AclApi.hpp" +#include "MindSporeTrigger.hpp" +#include "MSAclDumper.hpp" + +namespace MindStudioDebugger { + +bool MindSporeTrigger::stepBeginFlag = false; + +void MindSporeTrigger::TriggerOnStepBegin(uint32_t device, uint32_t curStep, ExtArgs& args) +{ + DEBUG_FUNC_TRACE(); + CleanErrorInfoCache(); + + MSAclDumper::GetInstance().OnStepBegin(device, curStep, args); + stepBeginFlag = true; + + CleanErrorInfoCache(); + return; +} + +void MindSporeTrigger::TriggerOnStepEnd(ExtArgs& args) +{ + DEBUG_FUNC_TRACE(); + CleanErrorInfoCache(); + + if (!stepBeginFlag) { + return; + } + CALL_ACL_API(aclrtSynchronizeDevice); + MSAclDumper::GetInstance().OnStepEnd(args); + stepBeginFlag = false; + + CleanErrorInfoCache(); + return; +} + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/core/mindspore/MindSporeTrigger.hpp b/debug/accuracy_tools/msprobe/ccsrc/core/mindspore/MindSporeTrigger.hpp new file mode 100644 index 0000000000000000000000000000000000000000..022e5d7d4c14a9771681840b967b2ec3aebb811b --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/core/mindspore/MindSporeTrigger.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include "include/ExtArgs.hpp" + +namespace MindStudioDebugger { + +class MindSporeTrigger { +public: + static void TriggerOnStepBegin(uint32_t device, uint32_t curStep, ExtArgs& args); + static void TriggerOnStepEnd(ExtArgs& args); + static void LaunchPreDbg() {} + static void LaunchPostDbg() {} + +private: + MindSporeTrigger() = default; + ~MindSporeTrigger() = default; + + static bool stepBeginFlag; +}; + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/if/mindspore/MindSporeDbgHook.cpp b/debug/accuracy_tools/msprobe/ccsrc/if/mindspore/MindSporeDbgHook.cpp new file mode 100644 index 0000000000000000000000000000000000000000..42f3a2e5b61d5da021b2ef7da4a7b88c6dc2abbb --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/if/mindspore/MindSporeDbgHook.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GLIBCXX_USE_CXX11_ABI 0 + +#include +#include + +#include "include/Macro.hpp" +#include "include/ExtArgs.hpp" +#include "core/mindspore/MindSporeTrigger.hpp" + +EXPORT_SYMBOL void MS_DbgOnStepBegin(uint32_t device, int32_t curStep, + std::map exts) +{ + MindStudioDebugger::ExtArgs args; + const char** strBuf = nullptr; + for (auto& ext : exts) { + if (ext.first >= static_cast(MindStudioDebugger::MindStudioExtensionArgs::ARG_MAX)) { + continue; + } + /* mindspore使用了_GLIBCXX_USE_CXX11_ABI=0,为了解决CXX版本兼容问题,此处将string转char*使用 */ + if (ext.first == static_cast(MindStudioDebugger::MindStudioExtensionArgs::ALL_KERNEL_NAMES)) { + std::vector* ss = reinterpret_cast*>(ext.second); + strBuf = new const char*[(*ss).size() + 1]; + strBuf[(*ss).size()] = nullptr; + size_t i = 0; + for (std::string& s : *ss) { + strBuf[i] = s.c_str(); + i++; + } + args[static_cast(ext.first)] = reinterpret_cast(strBuf); + continue; + } + args[static_cast(ext.first)] = ext.second; + } + + MindStudioDebugger::MindSporeTrigger::TriggerOnStepBegin(device, static_cast(curStep), args); + if (strBuf != nullptr) { + delete[] strBuf; + } + + return; +} + +EXPORT_SYMBOL void MS_DbgOnStepEnd(std::map& exts) +{ + MindStudioDebugger::ExtArgs args; + for (auto& ext : exts) { + if (ext.first >= static_cast(MindStudioDebugger::MindStudioExtensionArgs::ARG_MAX)) { + continue; + } + args[static_cast(ext.first)] = ext.second; + } + return MindStudioDebugger::MindSporeTrigger::TriggerOnStepEnd(args); +} + + diff --git a/debug/accuracy_tools/msprobe/ccsrc/if/python/CPythonAgent.cpp b/debug/accuracy_tools/msprobe/ccsrc/if/python/CPythonAgent.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4b8fc03491e2c0792c3c707c272e7b587d60c7ad --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/if/python/CPythonAgent.cpp @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "utils/CPythonUtils.hpp" + +namespace MindStudioDebugger { + +PyDoc_STRVAR(CPythonAgentModuleDoc, +"A module for Python code to interact with C++ code.\n\ + \n\ +..."); + +static PyObject* CPythonAgentRegister(PyObject *module, PyObject *args) +{ + /* 预期2个参数,name和obj */ + if (args == nullptr || PyTuple_GET_SIZE(args) != 2) { + PyErr_SetString(PyExc_TypeError, "\'register_context\' expects 2 arguments."); + Py_RETURN_NONE; + } + + PyObject* obj = nullptr; + const char* name = nullptr; + if (!PyArg_ParseTuple(args, "sO", &name, &obj)) { + PyErr_SetString(PyExc_TypeError, "\"name\" should be a string and \"obj\" should be a python object."); + Py_RETURN_NONE; + } + + if (CPythonUtils::RegisterPythonObject(name, obj) != 0) { + if (CPythonUtils::IsPyObjRegistered(name)) { + PyErr_Format(PyExc_RuntimeError, "\"%s\" has been registered already.", name); + } else { + PyErr_Format(PyExc_RuntimeError, "Failed to register \"%s\".", name); + } + } + + Py_RETURN_NONE; +} + +static PyObject* CPythonAgentUnRegister(PyObject *module, PyObject *obj) +{ + CPythonUtils::PythonStringObject name(obj); + if(name.IsNone()) { + PyErr_SetString(PyExc_TypeError, "\"name\" should be a string."); + Py_RETURN_NONE; + } + + CPythonUtils::UnRegisterPythonObject(name); + Py_RETURN_NONE; +} + +static PyObject* CPythonAgentGetContext(PyObject *module, PyObject *obj) +{ + CPythonUtils::PythonStringObject name(obj); + if(name.IsNone()) { + PyErr_SetString(PyExc_TypeError, "\"name\" should be a string."); + Py_RETURN_NONE; + } + + return CPythonUtils::GetRegisteredPyObj(name).NewRef(); +} + +PyDoc_STRVAR(RegisterDoc, +"register_context(name, obj)\n--\n\nRegister a python object, which will be available on the backend."); +PyDoc_STRVAR(UnregisterDoc, +"unregister_context(name)\n--\n\nUnregister a python object."); +PyDoc_STRVAR(GetDoc, +"get_context(name)\n--\n\nGet a python object, which may be register by the backend."); + +static PyMethodDef CPythonAgentMethods[] = { + {"register_context", reinterpret_cast(CPythonAgentRegister), METH_VARARGS, RegisterDoc}, + {"unregister_context", reinterpret_cast(CPythonAgentUnRegister), METH_O, UnregisterDoc}, + {"get_context", reinterpret_cast(CPythonAgentGetContext), METH_O, GetDoc}, + {nullptr, nullptr, 0, nullptr} +}; + +static struct PyModuleDef g_CPythonAgentModule = { + PyModuleDef_HEAD_INIT, + "_msprobe_c.CPythonAgent", /* m_name */ + CPythonAgentModuleDoc, /* m_doc */ + -1, /* m_size */ + CPythonAgentMethods, /* m_methods */ +}; + +PyObject* GetCPythonAgentModule() +{ + return PyModule_Create(&g_CPythonAgentModule); +} + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/if/python/CPythonAgent.hpp b/debug/accuracy_tools/msprobe/ccsrc/if/python/CPythonAgent.hpp new file mode 100644 index 0000000000000000000000000000000000000000..103fa4430eb0f490654f30c1684b2427e062590c --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/if/python/CPythonAgent.hpp @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace MindStudioDebugger { +PyObject* GetCPythonAgentModule(); +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/if/python/MsProbeIfPython.cpp b/debug/accuracy_tools/msprobe/ccsrc/if/python/MsProbeIfPython.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a3a928e4d2a611c9b85fe2604379eecb70775381 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/if/python/MsProbeIfPython.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "PrecisionDebuggerIfPython.hpp" +#include "CPythonAgent.hpp" + +namespace MindStudioDebugger { + +PyDoc_STRVAR(MsProbeCModuleDoc, +"The part of the module msprobe that is implemented in CXX.\n\ +class _PrecisionDebugger: PrecisionDebugger in CXX \n\ +class _DebuggerConfig: Configuration data of PrecisionDebugger \n\ +class CPythonAgent: Used for front-end and back-end code interactions \n\ + \n\ +..."); + +static struct PyModuleDef g_MsProbeCModule = { + PyModuleDef_HEAD_INIT, + "_msprobe_c", /* m_name */ + MsProbeCModuleDoc, /* m_doc */ + -1, /* m_size */ + nullptr, /* m_methods */ +}; + +} + +PyMODINIT_FUNC PyInit__msprobe_c(void) +{ + PyObject* m = PyModule_Create(&MindStudioDebugger::g_MsProbeCModule); + if (m == nullptr) { + return nullptr; + } + + PyTypeObject* precisionDebugger = MindStudioDebugger::GetPyPrecisionDebuggerType(); + if (precisionDebugger == nullptr) { + PyErr_SetString(PyExc_ImportError, "Failed to create class _PrecisionDebugger."); + Py_DECREF(m); + return nullptr; + } + if (PyModule_AddObject(m, "_PrecisionDebugger", reinterpret_cast(precisionDebugger)) < 0) { + PyErr_SetString(PyExc_ImportError, "Failed to bind class _PrecisionDebugger."); + Py_DECREF(m); + return nullptr; + } + Py_INCREF(precisionDebugger); + + PyObject* cpyAgent = MindStudioDebugger::GetCPythonAgentModule(); + if (cpyAgent == nullptr) { + PyErr_SetString(PyExc_ImportError, "Failed to create submodule CPythonAgent."); + Py_DECREF(m); + return nullptr; + } + if (PyModule_AddObject(m, "CPythonAgent", cpyAgent) < 0) { + PyErr_SetString(PyExc_ImportError, "Failed to bind submodule CPythonAgent."); + Py_DECREF(m); + return nullptr; + } + Py_INCREF(cpyAgent); + + return m; +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/if/python/PrecisionDebuggerIfPython.cpp b/debug/accuracy_tools/msprobe/ccsrc/if/python/PrecisionDebuggerIfPython.cpp new file mode 100644 index 0000000000000000000000000000000000000000..da1cf3cf1c5d4c8894d0b12b5518657b5928a8d6 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/if/python/PrecisionDebuggerIfPython.cpp @@ -0,0 +1,188 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "utils/CPythonUtils.hpp" +#include "core/PrecisionDebugger.hpp" + +namespace MindStudioDebugger { + +static PyObject* NewPrecisionDebugger(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + if (type == nullptr || type->tp_alloc == nullptr) { + throw std::runtime_error("PrecisionDebugger: type or alloc is nullptr."); + } + + /* 单例,减少重复构造 */ + static PyObject *self = nullptr; + if (self == nullptr) { + self = type->tp_alloc(type, 0); + } + + Py_XINCREF(self); + return self; +} + +static int InitPrecisionDebugger(PyObject *self, PyObject *args, PyObject *kws) +{ + if (PrecisionDebugger::GetInstance().HasInitialized()) { + return 0; + } + + if (kws == nullptr) { + PyErr_SetString(PyExc_TypeError, "Need keywords arg'framework\'and \'config_path\'."); + return -1; + } + + CPythonUtils::PythonDictObject kwArgs(kws); + std::string framework = kwArgs.GetItem("framework"); + std::string cfgFile = kwArgs.GetItem("config_path"); + + if (PrecisionDebugger::GetInstance().Initialize(framework, cfgFile) != 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to load config, read log for more details."); + return -1; + } + + return 0; +} + +static PyObject* PrecisionDebuggerGetAttr(PyObject *self, PyObject *name) +{ + CPythonUtils::PythonStringObject attr(name); + + if (attr.IsNone()) { + PyErr_SetString(PyExc_TypeError, "Attribution should be a string."); + Py_RETURN_NONE; + } + + const char* s = attr.ToString().c_str(); + if (strcmp(s, "enable") == 0) { + return CPythonUtils::PythonObject::From(PrecisionDebugger::GetInstance().IsEnable()).NewRef(); + } else if (strcmp(s, "current_step") == 0) { + return CPythonUtils::PythonObject::From(PrecisionDebugger::GetInstance().GetCurStep()).NewRef(); + } + + PyObject* ret = PyObject_GenericGetAttr(self, name); + if (ret == nullptr) { + PyErr_Format(PyExc_AttributeError, "\'PrecisionDebugger\' object has no attribute \'%s\'", attr); + Py_RETURN_NONE; + } + + return ret; +} + +static PyObject* PrecisionDebuggerStart(PyObject *self) +{ + PrecisionDebugger::GetInstance().Start(); + Py_RETURN_NONE; +} + +static PyObject* PrecisionDebuggerStop(PyObject *self) +{ + PrecisionDebugger::GetInstance().Stop(); + Py_RETURN_NONE; +} + +static PyObject* PrecisionDebuggerStep(PyObject *self, PyObject *args) +{ + if (args == nullptr || PyTuple_GET_SIZE(args) == 0) { + PrecisionDebugger::GetInstance().Step(); + Py_RETURN_NONE; + } + + PyObject* increment = PyTuple_GetItem(args, 0); + if (!PyLong_Check(increment)) { + PyErr_SetString(PyExc_TypeError, "\'step\' should be a int."); + Py_RETURN_NONE; + } + + PrecisionDebugger::GetInstance().Step(PyLong_AsUnsignedLong(increment)); + Py_RETURN_NONE; +} + +PyDoc_STRVAR(StartDoc, +"start($self, /)\n--\n\nEnable debug."); +PyDoc_STRVAR(StopDoc, +"stop($self, /)\n--\n\nDisable debug."); +PyDoc_STRVAR(StepDoc, +"step($self, [increment])\n--\n\nUpdata step."); + +static PyMethodDef PrecisionDebuggerMethods[] = { + {"start", reinterpret_cast(PrecisionDebuggerStart), METH_NOARGS, StartDoc}, + {"stop", reinterpret_cast(PrecisionDebuggerStop), METH_NOARGS, StopDoc}, + {"step", reinterpret_cast(PrecisionDebuggerStep), METH_VARARGS, StepDoc}, + {nullptr, nullptr, 0, nullptr} +}; + +PyTypeObject PyPrecisionDebuggerType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "_msprobe_c._PrecisionDebugger", /* tp_name */ + 0, /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + 0, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PrecisionDebuggerGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + PrecisionDebuggerMethods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + &PyBaseObject_Type, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + InitPrecisionDebugger, /* tp_init */ + 0, /* tp_alloc */ + NewPrecisionDebugger, /* tp_new */ + PyObject_Del, /* tp_free */ +}; + +PyTypeObject* GetPyPrecisionDebuggerType() +{ + static bool init = false; + if (!init) { + if (PyType_Ready(&PyPrecisionDebuggerType) < 0) { + return nullptr; + } + init = true; + } + return &PyPrecisionDebuggerType; +} + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/if/python/PrecisionDebuggerIfPython.hpp b/debug/accuracy_tools/msprobe/ccsrc/if/python/PrecisionDebuggerIfPython.hpp new file mode 100644 index 0000000000000000000000000000000000000000..55e861c1ecf62a5326b3660a9846cf9458127e7a --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/if/python/PrecisionDebuggerIfPython.hpp @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace MindStudioDebugger { +PyTypeObject* GetPyPrecisionDebuggerType(); +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/include/ErrorCode.hpp b/debug/accuracy_tools/msprobe/ccsrc/include/ErrorCode.hpp new file mode 100644 index 0000000000000000000000000000000000000000..19ce6ce1b83a970406c6ca13c96175eaea97b04f --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/include/ErrorCode.hpp @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace MindStudioDebugger { + +enum class DebuggerErrno { + OK = 0, + ERROR, + NONE, + + /* 文件操作类 */ + ERROR_FILE_NOT_EXISTS = 100, + ERROR_FILE_ALREADY_EXISTS, + ERROR_FAILED_TO_OPEN_FILE, + ERROR_FAILED_TO_WRITE_FILE, + ERROR_DIR_NOT_EXISTS, + ERROR_PERMISSION_DENINED, + ERROR_NOT_ALLOW_SOFTLINK, + ERROR_ILLEGAL_FILE_TYPE, + ERROR_PATH_TOO_LOOG, + ERROR_PATH_TOO_DEEP, + ERROR_PATH_CONTAINS_INVALID_CHAR, + ERROR_FILE_TOO_LARGE, + ERROR_UNKNOWN_FILE_SUFFIX, + ERROR_CANNOT_PARSE_PATH, + + /* 数据解析类 */ + ERROR_INVALID_OPERATION = 200, + ERROR_INVALID_FORMAT, + ERROR_INVALID_VALUE, + ERROR_UNKNOWN_FIELD, + ERROR_UNKNOWN_VALUE, + ERROR_UNKNOWN_TRANS, + ERROR_FIELD_NOT_EXISTS, + ERROR_VALUE_OVERFLOW, + + /* 系统调用类 */ + ERROR_NO_MEMORY = 300, + ERROR_BUFFER_OVERFLOW, + ERROR_SYSCALL_FAILED, + ERROR_OPERATION_FAILED, + + /* 环境依赖类 */ + ERROR_DEPENDENCY_NOT_FIND = 400, + ERROR_CONFIGURATION_CONFLICTS, + ERROR_EXTERNAL_API_ERROR, +}; + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/include/ExtArgs.hpp b/debug/accuracy_tools/msprobe/ccsrc/include/ExtArgs.hpp new file mode 100644 index 0000000000000000000000000000000000000000..40624194e5690a974bf0b3881dfdc717ff01d064 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/include/ExtArgs.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace MindStudioDebugger { + +enum class MindStudioExtensionArgs { + ALL_KERNEL_NAMES = 0, /* const std::vector --> char** */ + IS_KBK = 1, /* bool */ + + /* Add before this line */ + ARG_MAX, +}; + +using ExtArgs = std::map; + +template +T GetExtArgs(ExtArgs& args, MindStudioExtensionArgs id) +{ + auto it = args.find(id); + if (it == args.end()) { + return nullptr; + } + + return reinterpret_cast(it->second); +} + +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/include/Macro.hpp b/debug/accuracy_tools/msprobe/ccsrc/include/Macro.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f366ab426f51c5150792605ee6bf03f899c76fd2 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/include/Macro.hpp @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#define EXPORT_SYMBOL extern "C" __attribute__((visibility("default"))) + +#define ELE_IN_VECTOR(vec, ele) (std::find((vec).begin(), (vec).end(), (ele)) != (vec).end()) diff --git a/debug/accuracy_tools/msprobe/ccsrc/third_party/ACL/AclApi.cpp b/debug/accuracy_tools/msprobe/ccsrc/third_party/ACL/AclApi.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1636c6998d9096b62e9a7f281c7e5ac1b4de4818 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/third_party/ACL/AclApi.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "base/ErrorInfos.hpp" +#include "AclApi.hpp" + +namespace MindStudioDebugger { +namespace AscendCLApi { + +using namespace MindStudioDebugger; + +constexpr const char* kLibAscendclName = "libascendcl.so"; +constexpr const char* kLibMSAscendName = "libmindspore_ascend.so.2"; + +using aclInitFuncType = aclError (*)(const char *); +using aclmdlInitDumpFuncType = aclError (*)(); +using aclmdlSetDumpFuncType = aclError (*)(const char *); +using aclmdlFinalizeDumpFuncType = aclError (*)(); +using acldumpRegCallbackFuncType = aclError (*)(AclDumpCallbackFuncType, int32_t); +using aclrtSynchronizeDeviceFuncType = aclError (*)(); + +static aclInitFuncType aclInitFunc = nullptr; +static aclmdlInitDumpFuncType aclmdlInitDumpFunc = nullptr; +static aclmdlSetDumpFuncType aclmdlSetDumpFunc = nullptr; +static aclmdlFinalizeDumpFuncType aclmdlFinalizeDumpFunc = nullptr; +static acldumpRegCallbackFuncType acldumpRegCallbackFunc = nullptr; +static aclrtSynchronizeDeviceFuncType aclrtSynchronizeDeviceFunc = nullptr; + +DebuggerErrno LoadAclApi() +{ + static void* hLibAscendcl = nullptr; + + if (hLibAscendcl != nullptr) { + LOG_INFO("No need to load acl api again."); + return DebuggerErrno::OK; + } + + hLibAscendcl = dlopen(kLibAscendclName, RTLD_LAZY); + if (hLibAscendcl == nullptr) { + LOG_ERROR(DebuggerErrno::ERROR_DEPENDENCY_NOT_FIND, + "Failed to search libascendcl.so." + std::string(dlerror())); + return DebuggerErrno::ERROR_DEPENDENCY_NOT_FIND; + } + + static const std::map functionMap = { + {"aclInit", reinterpret_cast(&aclInitFunc)}, + {"aclmdlInitDump", reinterpret_cast(&aclmdlInitDumpFunc)}, + {"aclmdlSetDump", reinterpret_cast(&aclmdlSetDumpFunc)}, + {"aclmdlFinalizeDump", reinterpret_cast(&aclmdlFinalizeDumpFunc)}, + {"aclrtSynchronizeDevice", reinterpret_cast(&aclrtSynchronizeDeviceFunc)}, + }; + + for (auto& iter : functionMap) { + if (*(iter.second) != nullptr) { + continue; + } + *(iter.second) = dlsym(hLibAscendcl, iter.first); + if (*(iter.second) == nullptr) { + LOG_ERROR(DebuggerErrno::ERROR_DEPENDENCY_NOT_FIND, "Failed to load function " + + std::string(iter.first) + " from libascendcl.so." + std::string(dlerror())); + dlclose(hLibAscendcl); + hLibAscendcl = nullptr; + return DebuggerErrno::ERROR_DEPENDENCY_NOT_FIND; + } + LOG_DEBUG("Load function " + std::string(iter.first) + " from libascendcl.so."); + } + + /* 规避adump的bug,mindspore场景优先使用libmindspore_ascend.so中的符号 */ + void* handler = dlopen(kLibMSAscendName, RTLD_LAZY); + std::string libName = kLibMSAscendName; + if (handler == nullptr) { + handler = hLibAscendcl; + libName = kLibAscendclName; + } + + acldumpRegCallbackFunc = reinterpret_cast(dlsym(handler, "acldumpRegCallback")); + if (acldumpRegCallbackFunc == nullptr) { + LOG_ERROR(DebuggerErrno::ERROR_DEPENDENCY_NOT_FIND, "Failed to load function acldumpRegCallback from " + + libName + "."); + } + LOG_DEBUG("Load function acldumpRegCallback from " + libName); + + if (handler != hLibAscendcl) { + dlclose(handler); + } + + return DebuggerErrno::OK; +} + +aclError ACLAPI_aclInit(const char* cfg) +{ + if (aclInitFunc == nullptr) { + throw std::runtime_error("API aclInit does not have a definition."); + } + return aclInitFunc(cfg); +} + +aclError ACLAPI_aclmdlInitDump() +{ + if (aclmdlInitDumpFunc == nullptr) { + throw std::runtime_error("API aclmdlInitDump does not have a definition."); + } + return aclmdlInitDumpFunc(); +} + +aclError ACLAPI_aclmdlSetDump(const char* cfg) +{ + if (aclmdlSetDumpFunc == nullptr) { + throw std::runtime_error("API aclmdlSetDump does not have a definition."); + } + return aclmdlSetDumpFunc(cfg); +} + +aclError ACLAPI_aclmdlFinalizeDump() +{ + if (aclmdlFinalizeDumpFunc == nullptr) { + throw std::runtime_error("API aclmdlFinalizeDump does not have a definition."); + } + return aclmdlFinalizeDumpFunc(); +} + +aclError ACLAPI_acldumpRegCallback(AclDumpCallbackFuncType messageCallback, int32_t flag) +{ + if (acldumpRegCallbackFunc == nullptr) { + throw std::runtime_error("API acldumpRegCallback does not have a definition."); + } + return acldumpRegCallbackFunc(messageCallback, flag); +} + +aclError ACLAPI_aclrtSynchronizeDevice() +{ + if (aclrtSynchronizeDeviceFunc == nullptr) { + throw std::runtime_error("API aclrtSynchronizeDevice does not have a definition."); + } + return aclrtSynchronizeDeviceFunc(); +} + +} +} diff --git a/debug/accuracy_tools/msprobe/ccsrc/third_party/ACL/AclApi.hpp b/debug/accuracy_tools/msprobe/ccsrc/third_party/ACL/AclApi.hpp new file mode 100644 index 0000000000000000000000000000000000000000..731ae2e2caacaa345605ec572c8dcd6dba091488 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/third_party/ACL/AclApi.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include "include/ErrorCode.hpp" + +extern "C" { + +typedef int aclError; +constexpr int ACL_SUCCESS = 0; +constexpr int ACL_ERROR_NONE = 0; +constexpr int ACL_ERROR_REPEAT_INITIALIZE = 100002; + +#define ACL_DUMP_MAX_FILE_PATH_LENGTH 4096 +typedef struct acldumpChunk { + char fileName[ACL_DUMP_MAX_FILE_PATH_LENGTH]; // 待落盘的Dump数据文件名,ACL_DUMP_MAX_FILE_PATH_LENGTH表示文件名最大长度,当前为4096 + uint32_t bufLen; // dataBuf数据长度,单位Byte + uint32_t isLastChunk; // 标识Dump数据是否为最后一个分片,0表示不是最后一个分片,1表示最后一个分片 + int64_t offset; // Dump数据文件内容的偏移,其中-1表示文件追加内容 + int32_t flag; // 预留Dump数据标识,当前数据无标识 + uint8_t dataBuf[0]; // Dump数据的内存地址 +} acldumpChunk; + +} + +namespace MindStudioDebugger { +namespace AscendCLApi { + +DebuggerErrno LoadAclApi(); + +using AclDumpCallbackFuncType = int32_t (*)(const acldumpChunk*, int32_t); +aclError ACLAPI_aclInit(const char* cfg); +aclError ACLAPI_aclmdlInitDump(); +aclError ACLAPI_aclmdlSetDump(const char* cfg); +aclError ACLAPI_aclmdlFinalizeDump(); +aclError ACLAPI_acldumpRegCallback(AclDumpCallbackFuncType messageCallback, int32_t flag); + +aclError ACLAPI_aclrtSynchronizeDevice(); + +#define CALL_ACL_API(func, ...) MindStudioDebugger::AscendCLApi::ACLAPI_##func(__VA_ARGS__) + +} +} diff --git a/debug/accuracy_tools/msprobe/ccsrc/third_party/ACL/AclDumpMsg.proto b/debug/accuracy_tools/msprobe/ccsrc/third_party/ACL/AclDumpMsg.proto new file mode 100644 index 0000000000000000000000000000000000000000..6ce5407bea3b6d10f4118a98170752b901d03ab8 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/third_party/ACL/AclDumpMsg.proto @@ -0,0 +1,143 @@ +syntax = "proto3"; +package toolkit.dumpdata; + +enum OutputDataType { + DT_UNDEFINED = 0; + DT_FLOAT = 1; + DT_FLOAT16 = 2; + DT_INT8 = 3; + DT_UINT8 = 4; + DT_INT16 = 5; + DT_UINT16 = 6; + DT_INT32 = 7; + DT_INT64 = 8; + DT_UINT32 = 9; + DT_UINT64 = 10; + DT_BOOL = 11; + DT_DOUBLE = 12; + DT_STRING = 13; + DT_DUAL_SUB_INT8 = 14; + DT_DUAL_SUB_UINT8 = 15; + DT_COMPLEX64 = 16; + DT_COMPLEX128 = 17; + DT_QINT8 = 18; + DT_QINT16 = 19; + DT_QINT32 = 20; + DT_QUINT8 = 21; + DT_QUINT16 = 22; + DT_RESOURCE = 23; + DT_STRING_REF = 24; + DT_DUAL = 25; + DT_VARIANT = 26; + DT_BF16 = 27; + DT_INT4 = 28; + DT_UINT1 = 29; + DT_INT2 = 30; + DT_UINT2 = 31; +} + +enum OutputFormat { + FORMAT_NCHW = 0; + FORMAT_NHWC = 1; + FORMAT_ND = 2; + FORMAT_NC1HWC0 = 3; + FORMAT_FRACTAL_Z = 4; + FORMAT_NC1C0HWPAD = 5; + FORMAT_NHWC1C0 = 6; + FORMAT_FSR_NCHW = 7; + FORMAT_FRACTAL_DECONV = 8; + FORMAT_C1HWNC0 = 9; + FORMAT_FRACTAL_DECONV_TRANSPOSE = 10; + FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11; + FORMAT_NC1HWC0_C04 = 12; + FORMAT_FRACTAL_Z_C04 = 13; + FORMAT_CHWN = 14; + FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15; + FORMAT_HWCN = 16; + FORMAT_NC1KHKWHWC0 = 17; + FORMAT_BN_WEIGHT = 18; + FORMAT_FILTER_HWCK = 19; + FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20; + FORMAT_HASHTABLE_LOOKUP_KEYS = 21; + FORMAT_HASHTABLE_LOOKUP_VALUE = 22; + FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23; + FORMAT_HASHTABLE_LOOKUP_HITS = 24; + FORMAT_C1HWNCoC0 = 25; + FORMAT_MD = 26; + FORMAT_NDHWC = 27; + FORMAT_FRACTAL_ZZ = 28; + FORMAT_FRACTAL_NZ = 29; + FORMAT_NCDHW = 30; + FORMAT_DHWCN = 31; // 3D filter input tensor format + FORMAT_NDC1HWC0 = 32; + FORMAT_FRACTAL_Z_3D=33; + FORMAT_CN = 34; + FORMAT_NC = 35; + FORMAT_DHWNC = 36; + FORMAT_FRACTAL_Z_3D_TRANSPOSE = 37; // 3D filter(transpose) input tensor format + FORMAT_FRACTAL_ZN_LSTM = 38; + FORMAT_FRACTAL_Z_G = 39; + FORMAT_RESERVED = 40; + FORMAT_ALL = 41; + FORMAT_NULL = 42; + FORMAT_ND_RNN_BIAS = 43; + FORMAT_FRACTAL_ZN_RNN = 44; + FORMAT_YUV = 45; + FORMAT_YUV_A = 46; + FORMAT_NCL = 47; + FORMAT_FRACTAL_Z_WINO = 48; + FORMAT_C1HWC0 = 49; + // Add new formats definition here + FORMAT_MAX = 0xff; +} + +message OriginalOp { + string name = 1; + uint32 output_index = 2; + OutputDataType data_type = 3; + OutputFormat format = 4; +} + +message Shape { + repeated uint64 dim = 1; +} + +message OpOutput { + OutputDataType data_type = 1; + OutputFormat format = 2; + Shape shape = 3; + OriginalOp original_op = 4; // the original op corresponding to the output + bytes data = 5; + uint64 size = 6; + Shape original_shape = 7; + int32 sub_format = 8; +} + +message OpInput { + OutputDataType data_type = 1; + OutputFormat format = 2; + Shape shape = 3; + bytes data = 4; + uint64 size = 5; + Shape original_shape = 6; + int32 sub_format = 7; +} + +enum BufferType { + L1 = 0; +} + +message OpBuffer { + BufferType buffer_type = 1; + bytes data = 2; + uint64 size = 3; +} + +message DumpData { + string version = 1; + uint64 dump_time = 2; + repeated OpOutput output = 3; + repeated OpInput input = 4; + repeated OpBuffer buffer = 5; + string op_name = 6; +} diff --git a/debug/accuracy_tools/msprobe/ccsrc/utils/CPythonUtils.cpp b/debug/accuracy_tools/msprobe/ccsrc/utils/CPythonUtils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fd944f62db4ff728d1aa2c5d1d5ff818bd5dcf62 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/utils/CPythonUtils.cpp @@ -0,0 +1,542 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "CPythonUtils.hpp" + +namespace MindStudioDebugger { +namespace CPythonUtils { + +static std::map PyObjMap = {}; + +int32_t RegisterPythonObject(const std::string& name, PythonObject obj) +{ + if (PyObjMap.find(name) != PyObjMap.end()) { + return -1; + } + + PyObjMap[name] = obj; + return 0; +} + +void UnRegisterPythonObject(const std::string& name) +{ + auto it = PyObjMap.find(name); + if (it == PyObjMap.end()) { + return; + } + + PyObjMap.erase(it); +} + +bool IsPyObjRegistered(const std::string& name) +{ + return PyObjMap.find(name) != PyObjMap.end(); +} + +PythonObject GetRegisteredPyObj(const std::string& name) +{ + auto it = PyObjMap.find(name); + if (it == PyObjMap.end()) { + return PythonObject(); + } + return it->second; +} + +PythonObject PythonObject::From(const PythonObject& input) +{ + return PythonObject(input); +} + +PythonObject PythonObject::From(const int32_t& input) +{ + return PythonNumberObject::From(input); +} + +PythonObject PythonObject::From(const uint32_t& input) +{ + return PythonNumberObject::From(input); +} + +PythonObject PythonObject::From(const double& input) +{ + return PythonNumberObject::From(input); + +} +PythonObject PythonObject::From(const std::string& input) +{ + return PythonStringObject::From(input); +} + +PythonObject PythonObject::From(const char* input) +{ + return PythonStringObject::From(input); +} + +PythonObject PythonObject::From(const bool& input) +{ + return PythonBoolObject::From(input); +} + +int32_t PythonObject::To(int32_t& output) const +{ + if (!PyLong_Check(ptr)) { + return -1; + } + output = static_cast(PyLong_AsLong(ptr)); + return 0; +} + +int32_t PythonObject::To(uint32_t& output) const +{ + if (!PyLong_Check(ptr)) { + return -1; + } + output = static_cast(PyLong_AsUnsignedLong(ptr)); + return 0; +} + +int32_t PythonObject::To(double& output) const +{ + if (!PyFloat_Check(ptr)) { + return -1; + } + + output = PyFloat_AsDouble(ptr); + return 0; +} + +int32_t PythonObject::To(std::string& output) const +{ + PyObject* strObj = PyObject_Str(ptr); + if (strObj == nullptr) { + return -1; + } + const char* s = PyUnicode_AsUTF8(strObj); + if (s == nullptr) { + Py_DECREF(strObj); + return -1; + } + output = std::string(s); + Py_DECREF(strObj); + return 0; +} + +int32_t PythonObject::To(bool& output) const +{ + output = static_cast(PyObject_IsTrue(ptr)); + return 0; +} + +PythonObject PythonObject::Get(const std::string& name, bool ignore) const +{ + PyObject* o = PyObject_GetAttrString(ptr, name.c_str()); + if (o == nullptr && ignore) { + PyErr_Clear(); + } + PythonObject ret(o); + Py_XDECREF(o); + return ret; +} + +PythonObject PythonObject::Call(bool ignore) +{ + if (!PyCallable_Check(ptr)) { + if (!ignore) { + PyErr_SetString(PyExc_TypeError, "Object is not callable."); + } + return PythonObject(); + } + + PyObject* o = PyObject_CallObject(ptr, nullptr); + if (o == nullptr && ignore) { + PyErr_Clear(); + } + PythonObject ret(o); + Py_XDECREF(o); + return ret; +} + +PythonObject PythonObject::Call(PythonTupleObject& args, bool ignore) +{ + if (!PyCallable_Check(ptr)) { + if (!ignore) { + PyErr_SetString(PyExc_TypeError, "Object is not callable."); + } + return PythonObject(); + } + + PyObject* o = PyObject_CallObject(ptr, args.IsNone() ? nullptr : args); + if (o == nullptr && ignore) { + PyErr_Clear(); + } + PythonObject ret(o); + Py_XDECREF(o); + return ret; +} + +PythonObject PythonObject::Call(PythonTupleObject& args, PythonDictObject& kwargs, bool ignore) +{ + if (!PyCallable_Check(ptr)) { + if (!ignore) { + PyErr_SetString(PyExc_TypeError, "Object is not callable."); + } + return PythonObject(); + } + + if (args.IsNone() || kwargs.IsNone()) { + if (!ignore) { + PyErr_SetString(PyExc_TypeError, "Call python object with invalid parameters."); + } + return PythonObject(); + } + + PyObject* o = PyObject_Call(ptr, args, kwargs); + if (o == nullptr && ignore) { + PyErr_Clear(); + } + PythonObject ret(o); + Py_XDECREF(o); + return ret; +} + +PythonObject PythonObject::GetGlobal(const std::string& name, bool ignore) +{ + PyObject *globals = PyEval_GetGlobals(); + if (globals == nullptr) { + if (ignore) { + PyErr_Clear(); + } + return PythonObject(); + } + + return PythonObject(PyDict_GetItemString(globals, name.c_str())); + +} + +PythonObject PythonObject::Import(const std::string& name, bool ignore) +{ + PyObject* m = PyImport_ImportModule(name.c_str()); + if (m == nullptr) { + if (ignore) { + PyErr_Clear(); + } + return PythonObject(); + } + PythonObject ret(m); + Py_XDECREF(m); + return ret; +} + +PythonNumberObject::PythonNumberObject() : PythonObject() +{ + PyObject* o = PyLong_FromLong(0); + SetPtr(o); + Py_XDECREF(o); +} + +PythonNumberObject::PythonNumberObject(PyObject* o) : PythonObject() +{ + if (!PyLong_Check(o) && !PyFloat_Check(o)) { + return; + } + + SetPtr(o); +} + +PythonNumberObject PythonNumberObject::From(const int32_t& input) +{ + PythonNumberObject ret; + PyObject* o = PyLong_FromLong(input); + if (o == nullptr) { + return ret; + } + ret.SetPtr(o); + Py_DECREF(o); + return ret; +} + +PythonNumberObject PythonNumberObject::From(const uint32_t& input) +{ + PythonNumberObject ret; + PyObject* o = PyLong_FromUnsignedLong(input); + if (o == nullptr) { + return ret; + } + ret.SetPtr(o); + Py_DECREF(o); + return ret; +} + +PythonNumberObject PythonNumberObject::From(const double& input) +{ + PythonNumberObject ret; + PyObject* o = PyFloat_FromDouble(input); + if (o == nullptr) { + return ret; + } + ret.SetPtr(o); + Py_DECREF(o); + return ret; +} + +PythonStringObject::PythonStringObject() : PythonObject() +{ + PyObject* o = PyUnicode_FromString(""); + SetPtr(o); + Py_XDECREF(o); +} + +PythonStringObject::PythonStringObject(PyObject* o) : PythonObject() +{ + if (!PyUnicode_Check(o)) { + return; + } + + SetPtr(o); +} + +PythonStringObject PythonStringObject::From(const std::string& input) +{ + PythonStringObject ret; + PyObject* o = PyUnicode_FromString(input.c_str()); + if (o == nullptr) { + return ret; + } + ret.SetPtr(o); + Py_DECREF(o); + return ret; +} + +PythonStringObject PythonStringObject::From(const char* input) +{ + PythonStringObject ret; + PyObject* o = PyUnicode_FromString(input); + if (o == nullptr) { + return ret; + } + ret.SetPtr(o); + Py_DECREF(o); + return ret; +} + +PythonBoolObject::PythonBoolObject() : PythonObject() +{ + SetPtr(Py_False); +} + +PythonBoolObject::PythonBoolObject(PyObject* o) : PythonObject() +{ + if (!PyBool_Check(o)) { + return; + } + + SetPtr(o); +} + +PythonBoolObject PythonBoolObject::From(const bool& input) +{ + PythonBoolObject ret; + PyObject* o = PyBool_FromLong(input); + if (o == nullptr) { + return ret; + } + ret.SetPtr(o); + Py_DECREF(o); + return ret; +} + +PythonListObject::PythonListObject() : PythonObject() +{ + PyObject* o = PyList_New(0); + SetPtr(o); + Py_XDECREF(o); +} + +PythonListObject::PythonListObject(size_t size) : PythonObject() +{ + PyObject* o = PyList_New(size); + SetPtr(o); + Py_XDECREF(o); +} + +PythonListObject::PythonListObject(PyObject* o) : PythonObject() +{ + if (!PyList_Check(o)) { + return; + } + + SetPtr(o); +} + +size_t PythonListObject::Size() const +{ + if (!PyList_Check(ptr)) { + return 0; + } + + return PyList_GET_SIZE(ptr); +} + +PythonObject PythonListObject::GetItem(size_t pos, bool ignore) +{ + if (!PyList_Check(ptr)) { + if (!ignore) { + PyErr_SetString(PyExc_TypeError, "Expect a list."); + } + return PythonObject(); + } + if (static_cast(PyList_GET_SIZE(ptr)) <= pos) { + if (!ignore) { + PyErr_SetString(PyExc_IndexError, "list index outof range"); + } + return PythonObject(); + } + + PyObject* o = PyList_GetItem(ptr, pos); + if (o == nullptr && ignore) { + PyErr_Clear(); + } + + return PythonObject(o); +} + +PythonListObject& PythonListObject::SetItem(size_t pos, PythonObject& item, bool ignore) +{ + if (!PyList_Check(ptr)) { + if (!ignore) { + PyErr_SetString(PyExc_TypeError, "Expect a list."); + } + return *this; + } + + if (static_cast(PyList_GET_SIZE(ptr)) <= pos) { + if (!ignore) { + PyErr_SetString(PyExc_IndexError, "list index outof range"); + } + return *this; + } + + if (PyList_SetItem(ptr, pos, item.NewRef()) != 0) { + if (ignore) { + PyErr_Clear(); + } + } + return *this; +} + +PythonListObject& PythonListObject::Insert(int64_t pos, PythonObject& item, bool ignore) +{ + if (!PyList_Check(ptr)) { + if (!ignore) { + PyErr_SetString(PyExc_TypeError, "Expect a list."); + } + return *this; + } + + if (PyList_Insert(ptr, pos, item) != 0) { + if (ignore) { + PyErr_Clear(); + } + } + + return *this; +} + +PythonTupleObject PythonListObject::ToTuple(bool ignore) +{ + if (!PyList_Check(ptr)) { + return PythonTupleObject(); + } + + PyObject* o = PyList_AsTuple(ptr); + if (o == nullptr && ignore) { + PyErr_Clear(); + } + PythonTupleObject ret(o); + Py_XDECREF(o); + return ret; +} + +PythonTupleObject::PythonTupleObject() : PythonObject() +{ + PyObject* o = PyTuple_New(0); + SetPtr(o); + Py_XDECREF(o); +} + +PythonTupleObject::PythonTupleObject(PyObject* o) : PythonObject() +{ + if (!PyTuple_Check(o)) { + return; + } + + SetPtr(o); +} + +size_t PythonTupleObject::Size() const +{ + if (!PyTuple_Check(ptr)) { + return 0; + } + + return PyTuple_GET_SIZE(ptr); +} + +PythonObject PythonTupleObject::GetItem(size_t pos, bool ignore) +{ + if (!PyTuple_Check(ptr)) { + if (!ignore) { + PyErr_SetString(PyExc_TypeError, "Expect a tuple."); + } + return PythonObject(); + } + if (static_cast(PyTuple_GET_SIZE(ptr)) <= pos) { + if (!ignore) { + PyErr_SetString(PyExc_IndexError, "tuple index outof range"); + } + return PythonObject(); + } + + PyObject* o = PyTuple_GetItem(ptr, pos); + if (o == nullptr && ignore) { + PyErr_Clear(); + } + + return PythonObject(o); +} + +PythonDictObject::PythonDictObject() : PythonObject() +{ + PyObject* o = PyDict_New(); + SetPtr(o); + Py_XDECREF(o); +} + +PythonDictObject::PythonDictObject(PyObject* o) : PythonObject() +{ + if (!PyDict_Check(o)) { + return; + } + + SetPtr(o); +} + +} +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/utils/CPythonUtils.hpp b/debug/accuracy_tools/msprobe/ccsrc/utils/CPythonUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..40ebcb1dafd505fd7dfa3bda1c2c1609cb60297a --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/utils/CPythonUtils.hpp @@ -0,0 +1,436 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace MindStudioDebugger { +namespace CPythonUtils { + +/* + * 对常用python类型做了cpp对接封装,对应关系为: + * ------------------------------------------- + * | python | cpp wrapper | + * |-----------------------------------------| + * | object | PythonObject | + * | str | PythonStringObject | + * | int/float | PythonNumberObject | + * | bool | PythonBoolObject | + * | list | PythonListObject | + * | tuple | PythonTupleObject | + * | dict | PythonDictObject | + * ------------------------------------------- + * + * 创建对象的方式: + * 1、通过原生PyObject*类型创建,PythonObject生命周期内会持有原生对象的一个引用 + * 2、通过From方法从c++对象创建 + * 3、通过GetGlobal、Import等方法从解释器上下文获取 + * 4、通过GetRegisteredPyObj获取到上下文的python对象 + * 5、通过已有PythonObject对象的Get、GetItem等方法获取子对象 + * + * 对象转换: + * 1、对于转换成PyObject*、bool、string的场景,支持隐式转换 + * 2、对于非通用类型转换,调用To方法,返回0表示成功 + * 3、对于list、tuple、dict类型,若元素类型都一样,支持直接转为vector/map类型,否则无法直接转换 + * 4、对于To方法: + * python维度支持bool()的都可以转bool(即并非只有bool类型支持转换,下同) + * 支持str()的都可以转string + * 可迭代对象(且元素支持转换)都可以转vector + * + * 对象传递: + * 1、子类可以安全传递或拷贝给PythonObject对象 + * 2、PythonObject传给子类时,若类型匹配,可以安全转递,否则会转为None + * 3、PythonObject或子类传递给需要PyObject*类型的cpthon原生接口时: + * 若原生接口是接管参数型,需要传递NewRef() + * 若原生接口是临时引用型,需要确保对象生命周期覆盖被调用的函数(不要原地构造临时对象) + */ + +class PythonObject; +class PythonNumberObject; +class PythonStringObject; +class PythonBoolObject; +class PythonListObject; +class PythonTupleObject; +class PythonDictObject; + +/* python侧使用_msprobe_c.CPythonAgent,cpp侧使用以下函数,进行python<--->cpp代码交互 */ +int32_t RegisterPythonObject(const std::string& name, PythonObject obj); +void UnRegisterPythonObject(const std::string& name); +bool IsPyObjRegistered(const std::string& name); +PythonObject GetRegisteredPyObj(const std::string& name); + +class PythonObject { +public: + PythonObject() { + Py_INCREF(Py_None); + ptr = Py_None; + } + PythonObject(PyObject* o) : ptr(o) { + if (ptr == nullptr) { + ptr = Py_None; + } + Py_XINCREF(ptr); + } + ~PythonObject() { + Py_XDECREF(ptr); + } + explicit PythonObject(const PythonObject &obj) : PythonObject(static_cast(obj)) {} + PythonObject& operator=(const PythonObject &obj) { + SetPtr(static_cast(obj)); + return *this; + } + + /* 获取全局对象 */ + static PythonObject GetGlobal(const std::string& name, bool ignore=true); + /* 获取模块对象;若其还未加载至缓存,则加载一遍 */ + static PythonObject Import(const std::string& name, bool ignore=true); + + /* From/To转换,统一放一份在基类,用于遍历迭代器等场景 */ + static PythonObject From(const PythonObject& input); + static PythonObject From(const int32_t& input); + static PythonObject From(const uint32_t& input); + static PythonObject From(const double& input); + static PythonObject From(const std::string& input); + static PythonObject From(const char* input); + static PythonObject From(const bool& input); + template + static PythonObject From(const std::vector& input); + template + static PythonObject From(const std::map& input); + int32_t To(int32_t& output) const; + int32_t To(uint32_t& output) const; + int32_t To(double& output) const; + int32_t To(std::string& output) const; + int32_t To(bool& output) const; + template + int32_t To(std::vector& output)const; + + bool IsNone() const {return ptr == Py_None;} + bool IsNumber() const {return PyLong_Check(ptr) || PyFloat_Check(ptr);} + bool IsString() const {return PyUnicode_Check(ptr);} + bool IsBool() const {return PyBool_Check(ptr);} + bool IsList() const {return PyList_Check(ptr);} + bool IsTuple() const {return PyTuple_Check(ptr);} + bool IsDict() const {return PyDict_Check(ptr);} + bool IsModule() const {return PyModule_Check(ptr);} + bool IsCallable() const {return PyCallable_Check(ptr);} + + /* 用于调用可调用对象,相当于python代码中的obj(),为了简单只实现了args+kwargs参数形式 */ + PythonObject Call(bool ignore=true); + PythonObject Call(PythonTupleObject& args, bool ignore=true); + PythonObject Call(PythonTupleObject& args, PythonDictObject& kwargs, bool ignore=true); + + /* 用于获取对象属性,相当于python代码中的obj.xx */ + PythonObject Get(const std::string& name, bool ignore=true) const; + PythonObject& NewRef() { + Py_XINCREF(ptr); + return *this; + } + std::string ToString() const { + std::string ret; + if (To(ret) == 0) { + return ret; + } + return std::string(); + } + + operator PyObject*() const {return ptr;} + operator bool() const {return static_cast(PyObject_IsTrue(ptr));} + operator std::string() const { + return ToString(); + } + PythonObject operator()(bool ignore=true) {return Call(ignore);} + PythonObject operator()(PythonTupleObject& args, bool ignore=true) {return Call(args, ignore);} + PythonObject operator()(PythonTupleObject& args, PythonDictObject& kwargs, bool ignore=true) { + return Call(args, kwargs, ignore); + } + +protected: + void SetPtr(PyObject* o) { + Py_XDECREF(ptr); + if (o == nullptr) { + o = Py_None; + } + Py_INCREF(o); + ptr = o; + } + + PyObject* ptr{nullptr}; + +private: + explicit PythonObject(PythonObject &&obj) = delete; + PythonObject& operator=(PythonObject &&obj) = delete; +}; + +class PythonNumberObject : public PythonObject { +public: + PythonNumberObject(); + PythonNumberObject(PyObject* o); + + static PythonNumberObject From(const int32_t& input); + static PythonNumberObject From(const uint32_t& input); + static PythonNumberObject From(const double& input); +}; + +class PythonStringObject : public PythonObject { +public: + PythonStringObject(); + PythonStringObject(PyObject* o); + + static PythonStringObject From(const std::string& input); + static PythonStringObject From(const char* input); +}; + +class PythonBoolObject : public PythonObject { +public: + PythonBoolObject(); + PythonBoolObject(PyObject* o); + + static PythonBoolObject From(const bool& input); +}; + +class PythonListObject : public PythonObject { +public: + PythonListObject(); + explicit PythonListObject(size_t size); + PythonListObject(PyObject* o); + + template + static PythonListObject From(const std::vector& input); + + size_t Size() const; + template + PythonListObject& Append(T value, bool ignore=true); + PythonObject GetItem(size_t pos, bool ignore=true); + PythonListObject& SetItem(size_t pos, PythonObject& item, bool ignore=true); + PythonListObject& Insert(int64_t pos, PythonObject& item, bool ignore=true); + PythonTupleObject ToTuple(bool ignore=true); +}; + +class PythonTupleObject : public PythonObject { +public: + PythonTupleObject(); + PythonTupleObject(PyObject* o); + + template + static PythonTupleObject From(const std::vector& input); + + size_t Size() const; + PythonObject GetItem(size_t pos, bool ignore=true); +}; + +class PythonDictObject : public PythonObject { +public: + PythonDictObject(); + PythonDictObject(PyObject* o); + + template + static PythonDictObject From(const std::map& input); + + template + PythonDictObject& Add(T1 key, T2 value, bool ignore=true); + template + PythonDictObject& Delete(T key, bool ignore=true); + template + PythonObject GetItem(T key, bool ignore=true); +}; + +/**************************************************************************************************/ +/**************************** 以下为模板函数的实现,调用者无需关注 ***********************************/ +/**************************************************************************************************/ +template +PythonObject PythonObject::From(const std::vector& input) +{ + return PythonListObject::From(input); +} + +template +PythonObject PythonObject::From(const std::map& input) +{ + return PythonDictObject::From(input); +} + +template +int32_t PythonObject::To(std::vector& output) const +{ + PyObject* item = nullptr; + PyObject* iter = PyObject_GetIter(ptr); + if (iter == nullptr) { + return -1; + } + + while ((item = PyIter_Next(iter)) != nullptr) { + T tmp; + if (PythonObject(item).To(tmp) != 0) { + goto error; + } + output.emplace_back(tmp); + Py_DECREF(item); + } + + Py_DECREF(iter); + return 0; +error: + Py_DECREF(item); + Py_DECREF(iter); + return -1; +} + +template +PythonListObject PythonListObject::From(const std::vector& input) +{ + PyObject* o = PyList_New(input.size()); + if (o == nullptr) { + return PythonListObject(); + } + + Py_ssize_t i = 0; + for (const T& ele : input) { + if (PyList_SetItem(o, i, PythonObject::From(ele).NewRef()) != 0) { + Py_DECREF(o); + return PythonListObject(); + } + i++; + } + + PythonListObject ret(o); + Py_DECREF(o); + return ret; +} + +template +PythonListObject& PythonListObject::Append(T value, bool ignore) +{ + if (!PyList_Check(ptr)) { + if (!ignore) { + PyErr_SetString(PyExc_TypeError, "Expect a list."); + } + return *this; + } + + PythonObject o = PythonObject::From(value); + PyList_Append(ptr, o); + return *this; +} + +template +PythonTupleObject PythonTupleObject::From(const std::vector& input) +{ + PyObject* o = PyTuple_New(input.size()); + if (o == nullptr) { + return PythonTupleObject(); + } + + Py_ssize_t i = 0; + + for (const T& ele : input) { + if (PyTuple_SetItem(o, i, PythonObject::From(ele).NewRef()) != 0) { + Py_DECREF(o); + return PythonTupleObject(); + } + i++; + } + + PythonTupleObject ret(o); + Py_DECREF(o); + return ret; +} + +template +PythonDictObject PythonDictObject::From(const std::map& input) +{ + PyObject* o = PyDict_New(); + if (o == nullptr) { + return PythonDictObject(); + } + for (const std::pair& pair : input) { + PythonObject key = PythonObject::From(pair.first); + PythonObject value = PythonObject::From(pair.second); + if (PyDict_SetItem(o, key.NewRef(), value.NewRef()) != 0) { + Py_DECREF(o); + return PythonDictObject(); + } + } + + PythonDictObject ret(o); + Py_DECREF(o); + return ret; +} + +template +PythonDictObject& PythonDictObject::Add(T1 key, T2 value, bool ignore) +{ + if (!PyDict_Check(ptr)) { + if (!ignore) { + PyErr_SetString(PyExc_TypeError, "Expect a dict."); + } + return *this; + } + + if (PyDict_SetItem(ptr, PythonObject::From(key).NewRef(), PythonObject::From(value).NewRef()) != 0) { + if (ignore) { + PyErr_Clear(); + } + } + return *this; +} + +template +PythonDictObject& PythonDictObject::Delete(T key, bool ignore) +{ + if (!PyDict_Check(ptr)) { + if (!ignore) { + PyErr_SetString(PyExc_TypeError, "Expect a dict."); + } + return *this; + } + + PythonObject o = PythonObject::From(key); + if (PyDict_DelItem(ptr, o) != 0) { + if (ignore) { + PyErr_Clear(); + } + } + return *this; +} + +template +PythonObject PythonDictObject::GetItem(T key, bool ignore) +{ + if (!PyDict_Check(ptr)) { + if (!ignore) { + PyErr_SetString(PyExc_TypeError, "Expect a dict."); + } + return *this; + } + + PythonObject o = PythonObject::From(key); + PyObject* item = PyDict_GetItem(ptr, o); + if (item == nullptr && ignore) { + PyErr_Clear(); + } + return PythonObject(item); +} + +} +} diff --git a/debug/accuracy_tools/msprobe/ccsrc/utils/DataUtils.cpp b/debug/accuracy_tools/msprobe/ccsrc/utils/DataUtils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c2d7df85294f7c96f0fe1a1b9458dfd2ad2e502c --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/utils/DataUtils.cpp @@ -0,0 +1,213 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include "DataUtils.hpp" + +namespace MindStudioDebugger { +namespace DataUtils { + +int64_t SizeToS64(size_t v) { + if (v > static_cast(INT64_MAX)) { + throw std::runtime_error("Value " + std::to_string(v) + "exceeds the maximum value of int64."); + } + return static_cast(v); +} + +std::string U64ToHexString(uint64_t v) { + std::stringstream ss; + ss << "0x" << std::hex << std::uppercase << v; + return std::move(ss.str()); +} + +BFloat16::BFloat16(float f32) +{ + if (std::isnan(f32)) { + value_ = BFloat16::nan_value; + } else { + union { + uint32_t U32; + float F32; + }; + F32 = f32; + uint32_t rounding_bias = ((U32 >> 16) & 1) + UINT32_C(0x7FFF); + value_ = static_cast((U32 + rounding_bias) >> 16); + } +} + +BFloat16::operator float() const +{ + float f32 = 0; + uint32_t tmp = value_; + tmp <<= 16; + std::memcpy(&f32, &tmp, sizeof(f32)); + return f32; +} + +const static std::unordered_map kTypeSizeMap = { + {DataType::DT_BOOL, 1}, + {DataType::DT_INT8, 1}, + {DataType::DT_UINT8, 1}, + {DataType::DT_INT16, 2}, + {DataType::DT_UINT16, 2}, + {DataType::DT_FLOAT16, 2}, + {DataType::DT_BF16, 2}, + {DataType::DT_INT32, 4}, + {DataType::DT_UINT32, 4}, + {DataType::DT_FLOAT, 4}, + {DataType::DT_INT64, 8}, + {DataType::DT_UINT64, 8}, + {DataType::DT_DOUBLE, 8}, + {DataType::DT_COMPLEX64, 8}, + {DataType::DT_COMPLEX128, 16}, +}; + +size_t SizeOfDType(DataType type) +{ + auto it = kTypeSizeMap.find(type); + if (it == kTypeSizeMap.end()) { + return 0; + } + return it->second; +} + +constexpr auto kOpDType_UNKNOWN = "UNKNOWN"; +const static std::unordered_map kDDTypeToStringMap = { + {DataType::DT_UNDEFINED, "UNDEFINED"}, + {DataType::DT_FLOAT, "FLOAT"}, + {DataType::DT_FLOAT16, "FLOAT16"}, + {DataType::DT_INT8, "INT8"}, + {DataType::DT_UINT8, "UINT8"}, + {DataType::DT_INT16, "INT16"}, + {DataType::DT_UINT16, "UINT16"}, + {DataType::DT_INT32, "INT32"}, + {DataType::DT_INT64, "INT64"}, + {DataType::DT_UINT32, "UINT32"}, + {DataType::DT_UINT64, "UINT64"}, + {DataType::DT_BOOL, "BOOL"}, + {DataType::DT_DOUBLE, "DOUBLE"}, + {DataType::DT_STRING, "STRING"}, + {DataType::DT_DUAL_SUB_INT8, "DUAL_SUB_INT8"}, + {DataType::DT_DUAL_SUB_UINT8, "DUAL_SUB_UINT8"}, + {DataType::DT_COMPLEX64, "COMPLEX64"}, + {DataType::DT_COMPLEX128, "COMPLEX128"}, + {DataType::DT_QINT8, "QINT8"}, + {DataType::DT_QINT16, "QINT16"}, + {DataType::DT_QINT32, "QINT32"}, + {DataType::DT_QUINT8, "QUINT8"}, + {DataType::DT_QUINT16, "QUINT16"}, + {DataType::DT_RESOURCE, "RESOURCE"}, + {DataType::DT_STRING_REF, "STRING_REF"}, + {DataType::DT_DUAL, "DUAL"}, + {DataType::DT_VARIANT, "VARIANT"}, + {DataType::DT_BF16, "BF16"}, + {DataType::DT_INT4, "INT4"}, + {DataType::DT_UINT1, "UINT1"}, + {DataType::DT_INT2, "INT2"}, + {DataType::DT_UINT2, "UINT2"}, +}; + +std::string GetDTypeString(DataType dtype) +{ + auto it = kDDTypeToStringMap.find(dtype); + if (it != kDDTypeToStringMap.end()) { + return it->second; + } + return kOpDType_UNKNOWN; +} + +constexpr auto kOpFormat_UNKNOWN = "UNKNOWN"; +const static std::unordered_map kFormatToStringMap = { + {TensorFormat::FORMAT_NCHW, "NCHW"}, + {TensorFormat::FORMAT_NHWC, "NHWC"}, + {TensorFormat::FORMAT_ND, "ND"}, + {TensorFormat::FORMAT_NC1HWC0, "NC1HWC0"}, + {TensorFormat::FORMAT_FRACTAL_Z, "FRACTAL_Z"}, + {TensorFormat::FORMAT_NC1C0HWPAD, "NC1C0HWPAD"}, + {TensorFormat::FORMAT_NHWC1C0, "NHWC1C0"}, + {TensorFormat::FORMAT_FSR_NCHW, "FSR_NCHW"}, + {TensorFormat::FORMAT_FRACTAL_DECONV, "FRACTAL_DECONV"}, + {TensorFormat::FORMAT_C1HWNC0, "C1HWNC0"}, + {TensorFormat::FORMAT_FRACTAL_DECONV_TRANSPOSE, "FRACTAL_DECONV_TRANSPOSE"}, + {TensorFormat::FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS, "FRACTAL_DECONV_SP_STRIDE_TRANS"}, + {TensorFormat::FORMAT_NC1HWC0_C04, "NC1HWC0_C04"}, + {TensorFormat::FORMAT_FRACTAL_Z_C04, "FRACTAL_Z_C04"}, + {TensorFormat::FORMAT_CHWN, "CHWN"}, + {TensorFormat::FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS, "FRACTAL_DECONV_SP_STRIDE8_TRANS"}, + {TensorFormat::FORMAT_HWCN, "HWCN"}, + {TensorFormat::FORMAT_NC1KHKWHWC0, "NC1KHKWHWC0"}, + {TensorFormat::FORMAT_BN_WEIGHT, "BN_WEIGHT"}, + {TensorFormat::FORMAT_FILTER_HWCK, "FILTER_HWCK"}, + {TensorFormat::FORMAT_HASHTABLE_LOOKUP_LOOKUPS, "HASHTABLE_LOOKUP_LOOKUPS"}, + {TensorFormat::FORMAT_HASHTABLE_LOOKUP_KEYS, "HASHTABLE_LOOKUP_KEYS"}, + {TensorFormat::FORMAT_HASHTABLE_LOOKUP_VALUE, "HASHTABLE_LOOKUP_VALUE"}, + {TensorFormat::FORMAT_HASHTABLE_LOOKUP_OUTPUT, "HASHTABLE_LOOKUP_OUTPUT"}, + {TensorFormat::FORMAT_HASHTABLE_LOOKUP_HITS, "HASHTABLE_LOOKUP_HITS"}, + {TensorFormat::FORMAT_C1HWNCoC0, "C1HWNCoC0"}, + {TensorFormat::FORMAT_MD, "MD"}, + {TensorFormat::FORMAT_NDHWC, "NDHWC"}, + {TensorFormat::FORMAT_FRACTAL_ZZ, "FRACTAL_ZZ"}, + {TensorFormat::FORMAT_FRACTAL_NZ, "FRACTAL_NZ"}, + {TensorFormat::FORMAT_NCDHW, "NCDHW"}, + {TensorFormat::FORMAT_DHWCN, "DHWCN"}, + {TensorFormat::FORMAT_NDC1HWC0, "NDC1HWC0"}, + {TensorFormat::FORMAT_FRACTAL_Z_3D, "FRACTAL_Z_3D"}, + {TensorFormat::FORMAT_CN, "CN"}, + {TensorFormat::FORMAT_NC, "NC"}, + {TensorFormat::FORMAT_DHWNC, "DHWNC"}, + {TensorFormat::FORMAT_FRACTAL_Z_3D_TRANSPOSE, "FRACTAL_Z_3D_TRANSPOSE"}, + {TensorFormat::FORMAT_FRACTAL_ZN_LSTM, "FRACTAL_ZN_LSTM"}, + {TensorFormat::FORMAT_FRACTAL_Z_G, "FRACTAL_Z_G"}, + {TensorFormat::FORMAT_RESERVED, "RESERVED"}, + {TensorFormat::FORMAT_ALL, "ALL"}, + {TensorFormat::FORMAT_NULL, "NULL"}, + {TensorFormat::FORMAT_ND_RNN_BIAS, "ND_RNN_BIAS"}, + {TensorFormat::FORMAT_FRACTAL_ZN_RNN, "FRACTAL_ZN_RNN"}, + {TensorFormat::FORMAT_YUV, "YUV"}, + {TensorFormat::FORMAT_YUV_A, "YUV_A"}, + {TensorFormat::FORMAT_NCL, "NCL"}, + {TensorFormat::FORMAT_FRACTAL_Z_WINO, "FRACTAL_Z_WINO"}, + {TensorFormat::FORMAT_C1HWC0, "C1HWC0"}, +}; + +std::string GetFormatString(TensorFormat fmt) +{ + auto it = kFormatToStringMap.find(fmt); + if (it != kFormatToStringMap.end()) { + return it->second; + } + return kOpFormat_UNKNOWN; +} + +std::string GetShapeString(const TensorShape& shape) +{ + std::ostringstream buffer; + buffer << "("; + for (size_t i = 0; i < shape.size(); i++) { + buffer << (i > 0 ? "," : "") << shape[i]; + } + buffer << ")"; + return buffer.str(); +} + +} +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/utils/DataUtils.hpp b/debug/accuracy_tools/msprobe/ccsrc/utils/DataUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f58e15a8c77719f62ddeef8ebbcd25a5b5ebf624 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/utils/DataUtils.hpp @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace MindStudioDebugger { +namespace DataUtils { + +inline uint64_t UnpackUint64Value_Le(const void* data) +{ + return le64toh(*reinterpret_cast(data)); +} +inline uint64_t UnpackUint64Value_Be(const void* data) +{ + return be64toh(*reinterpret_cast(data)); +} + +int64_t SizeToS64(size_t v); +std::string U64ToHexString(uint64_t v); + +class BFloat16 { +public: + static constexpr uint16_t value_mask = 0x7fff; + static constexpr uint16_t inf_value = 0x7f80; + static constexpr uint16_t nan_value = 0x7fc0; + static constexpr uint16_t true_value = 0x3c00; + static constexpr uint32_t f32_inf_value = 0x7f800000; + + BFloat16() = default; + ~BFloat16() = default; + BFloat16(const BFloat16 &other) noexcept = default; + BFloat16(BFloat16 &&other) noexcept = default; + BFloat16 &operator=(const BFloat16 &other) noexcept = default; + BFloat16 &operator=(BFloat16 &&other) noexcept = default; + + explicit BFloat16(float f); + explicit operator float() const; + BFloat16 operator+(const BFloat16& other) const + { return BFloat16(static_cast(*this) + static_cast(other)); } + float operator+(const float other) const { return static_cast(*this) + other; } +private: + uint16_t value_; +}; + +inline float operator+(const float fp32, const BFloat16& bf16) +{ + return fp32 + static_cast(bf16); +} + +using ShapeBaseType = int64_t; +using TensorShape = std::vector; + +enum DataType : int { + DT_UNDEFINED = 0, + DT_FLOAT = 1, + DT_FLOAT16 = 2, + DT_INT8 = 3, + DT_UINT8 = 4, + DT_INT16 = 5, + DT_UINT16 = 6, + DT_INT32 = 7, + DT_INT64 = 8, + DT_UINT32 = 9, + DT_UINT64 = 10, + DT_BOOL = 11, + DT_DOUBLE = 12, + DT_STRING = 13, + DT_DUAL_SUB_INT8 = 14, + DT_DUAL_SUB_UINT8 = 15, + DT_COMPLEX64 = 16, + DT_COMPLEX128 = 17, + DT_QINT8 = 18, + DT_QINT16 = 19, + DT_QINT32 = 20, + DT_QUINT8 = 21, + DT_QUINT16 = 22, + DT_RESOURCE = 23, + DT_STRING_REF = 24, + DT_DUAL = 25, + DT_VARIANT = 26, + DT_BF16 = 27, + DT_INT4 = 28, + DT_UINT1 = 29, + DT_INT2 = 30, + DT_UINT2 = 31, + /* Add before this line */ + DT_MAX +}; + +enum TensorFormat : int { + FORMAT_NCHW = 0, + FORMAT_NHWC = 1, + FORMAT_ND = 2, + FORMAT_NC1HWC0 = 3, + FORMAT_FRACTAL_Z = 4, + FORMAT_NC1C0HWPAD = 5, + FORMAT_NHWC1C0 = 6, + FORMAT_FSR_NCHW = 7, + FORMAT_FRACTAL_DECONV = 8, + FORMAT_C1HWNC0 = 9, + FORMAT_FRACTAL_DECONV_TRANSPOSE = 10, + FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11, + FORMAT_NC1HWC0_C04 = 12, + FORMAT_FRACTAL_Z_C04 = 13, + FORMAT_CHWN = 14, + FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15, + FORMAT_HWCN = 16, + FORMAT_NC1KHKWHWC0 = 17, + FORMAT_BN_WEIGHT = 18, + FORMAT_FILTER_HWCK = 19, + FORMAT_HASHTABLE_LOOKUP_LOOKUPS = 20, + FORMAT_HASHTABLE_LOOKUP_KEYS = 21, + FORMAT_HASHTABLE_LOOKUP_VALUE = 22, + FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23, + FORMAT_HASHTABLE_LOOKUP_HITS = 24, + FORMAT_C1HWNCoC0 = 25, + FORMAT_MD = 26, + FORMAT_NDHWC = 27, + FORMAT_FRACTAL_ZZ = 28, + FORMAT_FRACTAL_NZ = 29, + FORMAT_NCDHW = 30, + FORMAT_DHWCN = 31, + FORMAT_NDC1HWC0 = 32, + FORMAT_FRACTAL_Z_3D = 33, + FORMAT_CN = 34, + FORMAT_NC = 35, + FORMAT_DHWNC = 36, + FORMAT_FRACTAL_Z_3D_TRANSPOSE = 37, + FORMAT_FRACTAL_ZN_LSTM = 38, + FORMAT_FRACTAL_Z_G = 39, + FORMAT_RESERVED = 40, + FORMAT_ALL = 41, + FORMAT_NULL = 42, + FORMAT_ND_RNN_BIAS = 43, + FORMAT_FRACTAL_ZN_RNN = 44, + FORMAT_YUV = 45, + FORMAT_YUV_A = 46, + FORMAT_NCL = 47, + FORMAT_FRACTAL_Z_WINO = 48, + FORMAT_C1HWC0 = 49, + /* Add before this line */ + FORMAT_MAX +}; + +size_t SizeOfDType(DataType type); +std::string GetDTypeString(DataType dtype); +std::string GetFormatString(TensorFormat fmt); +std::string GetShapeString(const TensorShape& shape); + +} +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/utils/FileOperation.cpp b/debug/accuracy_tools/msprobe/ccsrc/utils/FileOperation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b21f772d22492970c2a8b3c3cfcdc3ad000e4447 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/utils/FileOperation.cpp @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "FileUtils.hpp" +#include "DataUtils.hpp" +#include "FileOperation.hpp" + +namespace MindStudioDebugger { +namespace FileOperation { + +using namespace MindStudioDebugger; +using DataType = DataUtils::DataType; +using NpyVersion = std::pair; + +struct NpyDtypeDescr { + char byteorder; + char type; + size_t length; + + std::string str() const { + std::ostringstream buffer; + buffer << "\'" << byteorder << type << length << "\'"; + return buffer.str(); + } +}; + +// npy file header start information +constexpr char kNpyMagicPrefix[] = "\x93NUMPY"; +constexpr size_t kNpyMagicLen = sizeof(kNpyMagicPrefix) - 1; +constexpr size_t kNpyArrayAlign = 64; +static const std::unordered_map npyTypeDescMap = { + {DataType::DT_BOOL, NpyDtypeDescr{'|', 'b', 1}}, {DataType::DT_INT8, NpyDtypeDescr{'|', 'i', 1}}, + {DataType::DT_INT16, NpyDtypeDescr{'<', 'i', 2}}, {DataType::DT_INT32, NpyDtypeDescr{'<', 'i', 4}}, + {DataType::DT_INT64, NpyDtypeDescr{'<', 'i', 8}}, {DataType::DT_UINT8, NpyDtypeDescr{'|', 'u', 1}}, + {DataType::DT_UINT16, NpyDtypeDescr{'<', 'u', 2}}, {DataType::DT_UINT32, NpyDtypeDescr{'<', 'u', 4}}, + {DataType::DT_UINT64, NpyDtypeDescr{'<', 'u', 8}}, {DataType::DT_FLOAT16, NpyDtypeDescr{'<', 'f', 2}}, + {DataType::DT_FLOAT, NpyDtypeDescr{'<', 'f', 4}}, {DataType::DT_DOUBLE, NpyDtypeDescr{'<', 'f', 8}}, + {DataType::DT_BF16, NpyDtypeDescr{'<', 'T', 2}}, {DataType::DT_COMPLEX128, NpyDtypeDescr{'<', 'c', 16}}, + {DataType::DT_COMPLEX64, NpyDtypeDescr{'<', 'c', 8}}, +}; + +DebuggerErrno DumpJson(const std::string &path, const nlohmann::json& content) +{ + DebuggerErrno ret; + std::ofstream ofs; + + ret = FileUtils::OpenFile(path, ofs); + if (ret != DebuggerErrno::OK) { + return ret; + } + + try { + ofs << content.dump(); + } catch (std::exception &e) { + ret = DebuggerErrno::ERROR_FAILED_TO_WRITE_FILE; + } + + if (ofs.fail()) { + ret = DebuggerErrno::ERROR_FAILED_TO_WRITE_FILE; + } + + ofs.close(); + return ret; +} + +inline static std::string NpyTransShapeToStr(const DataUtils::TensorShape &shape) +{ + std::ostringstream buffer; + buffer << "("; + for (const auto i : shape) { + buffer << std::to_string(i) << ","; + } + buffer << ")"; + return buffer.str(); +} + +inline static std::vector NpyLen2Bytes(size_t length, size_t lengthLen) { + std::vector buff; + lengthLen = std::min(lengthLen, static_cast(sizeof(length))); + for (size_t i = 0; i < lengthLen; i++) { + buff.emplace_back(length & 0xff); + length >>= CHAR_BIT; + } + return buff; +} + +static std::string GenerateNpyHeader(const DataUtils::TensorShape &shape, DataUtils::DataType dt, bool fortranOrder=false) +{ + auto typeDesc = npyTypeDescMap.find(dt); + if (typeDesc == npyTypeDescMap.end()) { + return std::string(); + } + + std::ostringstream buffer; + std::string fortranOrderStr = fortranOrder ? "True" : "False" ; + + buffer << "{"; + buffer << "'descr': " << typeDesc->second.str() << ", "; + buffer << "'fortran_order': " << fortranOrderStr << ", "; + buffer << "'shape': " << NpyTransShapeToStr(shape) << ", "; + buffer << "}"; + + std::string headerStr = buffer.str(); + NpyVersion version{1, 0}; + const size_t headerLen = headerStr.length(); + constexpr const size_t versionLen = 2; + constexpr const size_t maxLen = 65535; + constexpr const size_t lengthLenV1 = 2; + constexpr const size_t lengthLenV2 = 4; + size_t lengthLen = lengthLenV1; + + size_t totalLen = kNpyMagicLen + versionLen + lengthLen + headerLen + 1; + if (totalLen > maxLen) { + version = {2, 0}; + lengthLen = lengthLenV2; + totalLen = kNpyMagicLen + versionLen + lengthLen + headerLen + 1; + } + + const size_t padLen = kNpyArrayAlign - totalLen % kNpyArrayAlign; + const size_t paddingHeaderLen = headerLen + padLen + 1; + const std::string padding(padLen, ' '); + std::vector lengthBytes = NpyLen2Bytes(paddingHeaderLen, lengthLen); + std::ostringstream out; + out.write(kNpyMagicPrefix, DataUtils::SizeToS64(kNpyMagicLen)); + out.put(version.first); + out.put(version.second); + out.write(lengthBytes.data(), DataUtils::SizeToS64(lengthBytes.size())); + out << headerStr << padding << "\n"; + return out.str(); +} + +bool IsDtypeSupportByNpy(DataUtils::DataType dt) +{ + return npyTypeDescMap.find(dt) != npyTypeDescMap.end(); +} + +DebuggerErrno DumpNpy(const std::string &path, const uint8_t* data, size_t len, DataUtils::DataType dt, + const DataUtils::TensorShape& shape) +{ + DebuggerErrno ret; + std::string header = GenerateNpyHeader(shape, dt); + if (header.empty()) { + return DebuggerErrno::ERROR_INVALID_FORMAT; + } + + std::ofstream fd; + ret = FileUtils::OpenFile(path, fd, std::ios::out | std::ios::binary); + if (ret != DebuggerErrno::OK) { + return ret; + } + + fd << header; + fd.write(reinterpret_cast(data), len); + if (fd.fail()) { + ret = DebuggerErrno::ERROR_OPERATION_FAILED; + } + fd.close(); + + return ret; +} + +} +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/utils/FileOperation.hpp b/debug/accuracy_tools/msprobe/ccsrc/utils/FileOperation.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3f89263ae3621d33f5bbc8a67e86887d8063067e --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/utils/FileOperation.hpp @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include "include/ErrorCode.hpp" +#include "DataUtils.hpp" + +namespace MindStudioDebugger { + +constexpr const char* JSON_SUFFIX = "json"; +constexpr const char* NPY_SUFFIX = "npy"; +constexpr const char* CSV_SUFFIX = "csv"; + +namespace FileOperation { + +DebuggerErrno DumpJson(const std::string &path, const nlohmann::json& content); +bool IsDtypeSupportByNpy(DataUtils::DataType dt); +DebuggerErrno DumpNpy(const std::string &path, const uint8_t* data, size_t len, DataUtils::DataType dt, + const DataUtils::TensorShape& shape); + +} +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/utils/FileUtils.cpp b/debug/accuracy_tools/msprobe/ccsrc/utils/FileUtils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5faba020a786d323cfecd01c26938729e47e0981 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/utils/FileUtils.cpp @@ -0,0 +1,657 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "include/ErrorCode.hpp" +#include "FileUtils.hpp" + +/* 部分环境上c++版本比较老,这里不用filesystem库实现 */ + +namespace MindStudioDebugger { +namespace FileUtils { + +using namespace MindStudioDebugger; + +/********************* 基础检查函数库,不做过多校验,路径有效性由调用者保证 ******************/ +bool IsPathExist(const std::string& path) { + struct stat buffer; + return (stat(path.c_str(), &buffer) == 0); +} + +static std::string GetFullPath(const std::string &originPath) +{ + if (originPath.empty()) { + return ""; + } + if (originPath[0] == '/') { + return originPath; + } + + std::string cwd; + char cwdBuf[PATH_MAX]; + + if (getcwd(cwdBuf, PATH_MAX) == nullptr) { + return ""; + } + + cwd = cwdBuf; + std::string fullPath = std::move(cwd + pathSeparator + originPath); + + return fullPath; +} + +std::vector SplitPath(const std::string &path) +{ + std::vector tokens; + size_t len = path.length(); + size_t start = 0; + + while (start < len) { + size_t end = path.find(pathSeparator, start); + if (end == std::string::npos) { + end = len; + } + if (start != end) { + tokens.push_back(path.substr(start, end - start)); + } + start = end + 1; + } + return tokens; +} + +std::string GetAbsPath(const std::string &originPath) { + std::string fullPath = GetFullPath(originPath); + if (fullPath.empty()) { + return ""; + } + + std::vector tokens = SplitPath(fullPath); + std::vector tokensRefined; + + for (std::string& token : tokens) { + if (token.empty() || token == ".") { + continue; + } else if (token == "..") { + if (tokensRefined.empty()) { + return ""; + } + tokensRefined.pop_back(); + } else { + tokensRefined.emplace_back(token); + } + } + + if (tokensRefined.empty()) { + return "/"; + } + + std::string resolvedPath(""); + for (std::string& token : tokensRefined) { + resolvedPath.append("/").append(token); + } + + return resolvedPath; +} + +bool IsDir(const std::string& path) { + struct stat buffer; + if (stat(path.c_str(), &buffer) == 0) { + return (buffer.st_mode & S_IFDIR) != 0; + } + return false; +} + +bool IsRegularFile(const std::string& path) { + struct stat path_stat; + if (stat(path.c_str(), &path_stat) == 0) { + return S_ISREG(path_stat.st_mode); + } + return false; +} + +bool IsFileSymbolLink(const std::string& path) { + struct stat buffer; + if (lstat(path.c_str(), &buffer) == 0) { + if (S_ISLNK(buffer.st_mode)) { + return true; + } + } + return false; +} + +bool IsPathCharactersValid(const std::string& path) +{ + return std::regex_match(path, std::regex(FILE_VALID_PATTERN)); +} + +bool IsFileReadable(const std::string& path) +{ + return access(path.c_str(), R_OK) == 0; +} + +bool IsFileWritable(const std::string& path) +{ + return access(path.c_str(), W_OK) == 0; +} + +bool IsFileExecutable(const std::string& path) +{ + return (access(path.c_str(), R_OK) == 0) && (access(path.c_str(), X_OK) == 0); +} + +bool IsDirReadable(const std::string& path) +{ + return (access(path.c_str(), R_OK) == 0) && (access(path.c_str(), X_OK) == 0); +} + +std::string GetParentDir(const std::string& path) +{ + size_t found = path.find_last_of('/'); + if (found != std::string::npos) { + return path.substr(0, found); + } + return "."; +} + +std::string GetFileName(const std::string& path) +{ + size_t found = path.find_last_of('/'); + if (found != std::string::npos) { + return path.substr(found + 1); + } + return path; +} + +std::string GetFileBaseName(const std::string& path) +{ + std::string fileName = GetFileName(path); + size_t dotPos = fileName.find_last_of('.'); + if (dotPos != std::string::npos) { + return fileName.substr(0, dotPos); + } + return fileName; +} + +std::string GetFileSuffix(const std::string& path) +{ + std::string fileName = GetFileName(path); + size_t dotPos = fileName.find_last_of('.'); + if (dotPos != std::string::npos && dotPos + 1 < fileName.size()) { + return fileName.substr(dotPos + 1); + } + return ""; +} + +bool CheckFileRWX(const std::string& path, const std::string& permissions) +{ + if (permissions.find('r') != std::string::npos && !IsFileReadable(path)) { + return false; + } + if (permissions.find('w') != std::string::npos && !IsFileWritable(path)) { + return false; + } + if (permissions.find('x') != std::string::npos && !IsFileExecutable(path)) { + return false; + } + return true; +} + +bool IsPathLengthLegal(const std::string& path) +{ + if (path.length() > FULL_PATH_LENGTH_MAX || path.length() == 0) { + return false; + } + + std::vector tokens = SplitPath(path); + for (auto token : tokens) { + if (token.length() > FILE_NAME_LENGTH_MAX) { + return false; + } + } + + return true; +} + +bool IsPathDepthValid(const std::string& path) +{ + return std::count(path.begin(), path.end(), pathSeparator) <= PATH_DEPTH_MAX; +} + +bool IsFileOwner(const std::string& path) +{ + struct stat file_stat; + if (stat(path.c_str(), &file_stat) == 0) { + if (file_stat.st_uid == getuid()) { + return true; + } + } + return false; +} + +/****************** 文件操作函数库,会对入参做基本检查 ************************/ +DebuggerErrno DeleteFile(const std::string &path) { + if (!IsPathExist(path)) { + return DebuggerErrno::OK; + } + if (IsFileSymbolLink(path)) { + return DebuggerErrno::ERROR_NOT_ALLOW_SOFTLINK; + } + + if (remove(path.c_str()) == 0) { + return DebuggerErrno::OK; + } else { + return DebuggerErrno::ERROR_SYSCALL_FAILED; + } +} + +static DebuggerErrno DeleteDirRec(const std::string &path, uint32_t depth) +{ + if (depth > PATH_DEPTH_MAX) { + return DebuggerErrno::ERROR_PATH_TOO_DEEP; + } + + DebuggerErrno ret; + DIR* dir = opendir(path.c_str()); + if (dir == nullptr) { + return DebuggerErrno::ERROR_SYSCALL_FAILED; + } + + struct dirent* entry; + while ((entry = readdir(dir)) != nullptr) { + if (strcmp(entry->d_name, ".") == 0 || (strcmp(entry->d_name, "..") == 0)) { + continue; + } + std::string entryPath = path + "/" + entry->d_name; + if (entry->d_type == DT_DIR) { + ret = DeleteDirRec(entryPath, depth + 1); + if (ret != DebuggerErrno::OK) { + closedir(dir); + return ret; + } + } else if (entry->d_type == DT_REG || entry->d_type == DT_LNK) { + if (remove(entryPath.c_str()) != 0) { + closedir(dir); + return DebuggerErrno::ERROR_SYSCALL_FAILED; + } + } else { + closedir(dir); + return DebuggerErrno::ERROR_ILLEGAL_FILE_TYPE; + } + + } + + closedir(dir); + if (rmdir(path.c_str()) != 0) { + if (errno == EACCES || errno == EROFS) { + return DebuggerErrno::ERROR_PERMISSION_DENINED; + } else { + return DebuggerErrno::ERROR_SYSCALL_FAILED; + } + } + + return DebuggerErrno::OK; +} + +DebuggerErrno DeleteDir(const std::string &path, bool recursion) { + if (!IsPathExist(path)) { + return DebuggerErrno::OK; + } + if (IsFileSymbolLink(path)) { + return DebuggerErrno::ERROR_NOT_ALLOW_SOFTLINK; + } + + if (recursion) { + return DeleteDirRec(path, 0); + } + + if (rmdir(path.c_str()) != 0) { + return DebuggerErrno::ERROR_SYSCALL_FAILED; + } + + return DebuggerErrno::OK; +} + +static DebuggerErrno CreateDirAux(const std::string& path, bool recursion, mode_t mode) { + std::string parent = GetParentDir(path); + DebuggerErrno ret; + + if (!IsPathExist(parent)) { + if (!recursion) { + return DebuggerErrno::ERROR_DIR_NOT_EXISTS; + } + /* 递归创建父目录,由于前面已经判断过目录深度,此处递归是安全的 */ + ret = CreateDirAux(parent, recursion, mode); + if (ret != DebuggerErrno::OK) { + return ret; + } + } + + if (mkdir(path.c_str(), mode) != 0) { + if (errno == EACCES || errno == EROFS) { + return DebuggerErrno::ERROR_PERMISSION_DENINED; + } else { + return DebuggerErrno::ERROR_SYSCALL_FAILED; + } + } + return DebuggerErrno::OK; +} + +DebuggerErrno CreateDir(const std::string &path, bool recursion, mode_t mode) +{ + if (IsPathExist(path)) { + return DebuggerErrno::OK; + } + + std::string realPath = GetAbsPath(path); + if (realPath.empty()) { + return DebuggerErrno::ERROR_CANNOT_PARSE_PATH; + } + if (!IsPathLengthLegal(realPath)) { + return DebuggerErrno::ERROR_PATH_TOO_LOOG; + } + if (!IsPathCharactersValid(realPath)) { + return DebuggerErrno::ERROR_PATH_CONTAINS_INVALID_CHAR; + } + if (!IsPathDepthValid(realPath)) { + return DebuggerErrno::ERROR_PATH_TOO_DEEP; + } + + return CreateDirAux(realPath, recursion, mode); +} + +DebuggerErrno Chmod(const std::string& path, const mode_t& mode) +{ + if (!IsPathExist(path)) { + return DebuggerErrno::ERROR_FILE_NOT_EXISTS; + } + if (IsFileSymbolLink(path)) { + return DebuggerErrno::ERROR_NOT_ALLOW_SOFTLINK; + } + + std::string absPath = GetAbsPath(path); + if (absPath.empty()) { + return DebuggerErrno::ERROR_CANNOT_PARSE_PATH; + } + return chmod(absPath.c_str(), mode) == 0 ? DebuggerErrno::OK : DebuggerErrno::ERROR_SYSCALL_FAILED; +} + +DebuggerErrno GetFileSize(const std::string &path, size_t& size) { + struct stat path_stat; + if (stat(path.c_str(), &path_stat) != 0) { + return DebuggerErrno::ERROR_FILE_NOT_EXISTS; + } + if (!S_ISREG(path_stat.st_mode)) { + return DebuggerErrno::ERROR_ILLEGAL_FILE_TYPE; + } + + size = static_cast(path_stat.st_size); + return DebuggerErrno::OK; +} + +DebuggerErrno OpenFile(const std::string& path, std::ifstream& ifs, std::ios::openmode mode) +{ + std::string realPath = GetAbsPath(path); + DebuggerErrno ret = CheckFileBeforeRead(realPath); + if (ret != DebuggerErrno::OK) { + return ret; + } + + std::ifstream tmpifs(realPath, mode); + if (!tmpifs.is_open()) { + return DebuggerErrno::ERROR_FAILED_TO_OPEN_FILE; + } + + ifs = std::move(tmpifs); + return DebuggerErrno::OK; +} + +DebuggerErrno OpenFile(const std::string& path, std::ofstream& ofs, std::ios::openmode mode, mode_t permission) +{ + DebuggerErrno ret; + std::string realPath = GetAbsPath(path); + if (realPath.empty()) { + return DebuggerErrno::ERROR_CANNOT_PARSE_PATH; + } + + std::string parent = GetParentDir(realPath); + ret = CheckFileBeforeCreateOrWrite(realPath, true); + if (ret != DebuggerErrno::OK) { + return ret; + } + + if (!IsPathExist(parent)) { + ret = CreateDir(parent, true); + if (ret != DebuggerErrno::OK) { + return ret; + } + } + + if (!IsPathExist(path)) { + int fd = open(path.c_str(), O_CREAT | O_WRONLY, permission); + if (fd < 0) { + return DebuggerErrno::ERROR_FAILED_TO_OPEN_FILE; + } + close(fd); + } + + std::ofstream tmpofs(realPath, mode); + if (!tmpofs.is_open()) { + return DebuggerErrno::ERROR_FAILED_TO_OPEN_FILE; + } + + ofs = std::move(tmpofs); + return DebuggerErrno::OK; +} + +/******************************* 通用检查函数 **********************************/ +DebuggerErrno CheckFileSuffixAndSize(const std::string &path, FileType type) +{ + static const std::map> FileTypeCheckTbl = { + {FileType::PKL, {"kpl", MAX_PKL_SIZE}}, + {FileType::NUMPY, {"npy", MAX_NUMPY_SIZE}}, + {FileType::JSON, {"json", MAX_JSON_SIZE}}, + {FileType::PT, {"pt", MAX_PT_SIZE}}, + {FileType::CSV, {"csv", MAX_CSV_SIZE}}, + {FileType::YAML, {"yaml", MAX_YAML_SIZE}}, + }; + + size_t size; + DebuggerErrno ret = GetFileSize(path, size); + if (ret != DebuggerErrno::OK) { + return ret; + } + + if (type == FileType::COMMON) { + if (size > MAX_FILE_SIZE_DEFAULT) { + return DebuggerErrno::ERROR_FILE_TOO_LARGE; + } + return DebuggerErrno::OK; + } + + auto iter = FileTypeCheckTbl.find(type); + if (iter == FileTypeCheckTbl.end()) { + return DebuggerErrno::ERROR_UNKNOWN_FILE_SUFFIX; + } + + std::string suffix = GetFileSuffix(path); + if (suffix != iter->second.first) { + return DebuggerErrno::ERROR_UNKNOWN_FILE_SUFFIX; + } + if (size > iter->second.second) { + return DebuggerErrno::ERROR_FILE_TOO_LARGE; + } + + return DebuggerErrno::OK; +} + +DebuggerErrno CheckDirCommon(const std::string &path) +{ + std::string realPath = GetAbsPath(path); + if (realPath.empty()) { + return DebuggerErrno::ERROR_CANNOT_PARSE_PATH; + } + if (!IsPathExist(realPath)) { + return DebuggerErrno::ERROR_FILE_NOT_EXISTS; + } + if (!IsDir(realPath)) { + return DebuggerErrno::ERROR_ILLEGAL_FILE_TYPE; + } + if (!IsPathLengthLegal(realPath)) { + return DebuggerErrno::ERROR_PATH_TOO_LOOG; + } + if (!IsPathCharactersValid(realPath)) { + return DebuggerErrno::ERROR_PATH_CONTAINS_INVALID_CHAR; + } + if (!IsPathDepthValid(realPath)) { + return DebuggerErrno::ERROR_PATH_TOO_DEEP; + } + if (IsFileSymbolLink(path)) { + return DebuggerErrno::ERROR_NOT_ALLOW_SOFTLINK; + } + if (!IsDirReadable(path)) { + return DebuggerErrno::ERROR_PERMISSION_DENINED; + } + + return DebuggerErrno::OK; +} + +DebuggerErrno CheckFileBeforeRead(const std::string &path, const std::string& authority, FileType type) +{ + std::string realPath = GetAbsPath(path); + if (realPath.empty()) { + return DebuggerErrno::ERROR_CANNOT_PARSE_PATH; + } + if (!IsPathExist(realPath)) { + return DebuggerErrno::ERROR_FILE_NOT_EXISTS; + } + if (!IsPathLengthLegal(realPath)) { + return DebuggerErrno::ERROR_PATH_TOO_LOOG; + } + if (!IsPathCharactersValid(realPath)) { + return DebuggerErrno::ERROR_PATH_CONTAINS_INVALID_CHAR; + } + if (!IsPathDepthValid(realPath)) { + return DebuggerErrno::ERROR_PATH_TOO_DEEP; + } + if (IsFileSymbolLink(realPath)) { + return DebuggerErrno::ERROR_NOT_ALLOW_SOFTLINK; + } + if (!CheckFileRWX(realPath, authority)) { + return DebuggerErrno::ERROR_PERMISSION_DENINED; + } + + /* 如果是/dev/random之类的无法计算size的文件,不要用本函数check */ + return CheckFileSuffixAndSize(path, type); +} + +DebuggerErrno CheckFileBeforeCreateOrWrite(const std::string &path, bool overwrite) +{ + std::string realPath = GetAbsPath(path); + if (realPath.empty()) { + return DebuggerErrno::ERROR_CANNOT_PARSE_PATH; + } + if (!IsPathLengthLegal(realPath)) { + return DebuggerErrno::ERROR_PATH_TOO_LOOG; + } + if (!IsPathCharactersValid(realPath)) { + return DebuggerErrno::ERROR_PATH_CONTAINS_INVALID_CHAR; + } + if (!IsPathDepthValid(realPath)) { + return DebuggerErrno::ERROR_PATH_TOO_DEEP; + } + if (IsPathExist(realPath)) { + if (!overwrite) { + return DebuggerErrno::ERROR_FILE_ALREADY_EXISTS; + } + + /* 默认不允许覆盖其他用户创建的文件,若有特殊需求(如多用户通信管道等)由业务自行校验 */ + if (!IsFileWritable(realPath) || !IsFileOwner(realPath)) { + return DebuggerErrno::ERROR_PERMISSION_DENINED; + } + } + return DebuggerErrno::OK; +} + +/* 其他文件操作工具 */ +static DebuggerErrno ListAllAux(const std::string &path, std::vector& output, uint32_t depth) +{ + if (depth > PATH_DEPTH_MAX) { + return DebuggerErrno::ERROR_PATH_TOO_DEEP; + } + + DIR* dir = opendir(path.c_str()); + if (dir == nullptr) { + return DebuggerErrno::ERROR_FAILED_TO_OPEN_FILE; + } + + DebuggerErrno ret = DebuggerErrno::OK; + size_t max = output.capacity(); + size_t num = output.size(); + if (num >= max) { + return DebuggerErrno::OK; + } + + struct dirent* entry = nullptr; + while ((entry = readdir(dir)) != nullptr) { + if (strcmp(entry->d_name, ".") == 0 || (strcmp(entry->d_name, "..") == 0)) { + continue; + } + std::string entryPath = path + "/" + entry->d_name; + if (entry->d_type == DT_DIR) { + ret = ListAllAux(entryPath, output, depth + 1); + if (ret != DebuggerErrno::OK) { + closedir(dir); + return ret; + } + } else if (entry->d_type == DT_REG) { + output.emplace_back(entryPath); + if (++num >= max) { + break; + } + } + } + closedir(dir); + return DebuggerErrno::OK; +} + +std::vector ListAll(const std::string &path, size_t max) +{ + std::vector ret; + std::string realPath = GetAbsPath(path); + if (CheckDirCommon(realPath) != DebuggerErrno::OK) { + return ret; + } + ret.reserve(max); + + uint32_t depth = std::count(realPath.begin(), realPath.end(), pathSeparator); + ListAllAux(realPath, ret, depth); + ret.resize(ret.size()); + return ret; +} + +} +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/utils/FileUtils.hpp b/debug/accuracy_tools/msprobe/ccsrc/utils/FileUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5889c59075c7d4b10a28a003fbedcda62e1195e8 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/utils/FileUtils.hpp @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "include/ErrorCode.hpp" + +namespace MindStudioDebugger { + +constexpr const char pathSeparator = '/'; +constexpr const uint32_t FULL_PATH_LENGTH_MAX = 4096; +constexpr const uint32_t FILE_NAME_LENGTH_MAX = 255; +constexpr const uint32_t PATH_DEPTH_MAX = 32; +constexpr const char* FILE_VALID_PATTERN = "^[a-zA-Z0-9_.:/-]+$"; + +constexpr size_t MAX_PKL_SIZE = 1024ULL * 1024 * 1024; +constexpr size_t MAX_NUMPY_SIZE = 10ULL * 1024 * 1024 * 1024; +constexpr size_t MAX_JSON_SIZE = 1024ULL * 1024 * 1024; +constexpr size_t MAX_PT_SIZE = 10ULL * 1024 * 1024 * 1024; +constexpr size_t MAX_CSV_SIZE = 1024ULL * 1024 * 1024; +constexpr size_t MAX_YAML_SIZE = 10ULL * 1024 * 1024; +constexpr size_t MAX_FILE_SIZE_DEFAULT = 10ULL * 1024 * 1024 * 1024; + +constexpr mode_t NORMAL_FILE_MODE_DEFAULT = 0640; +constexpr mode_t READONLY_FILE_MODE_DEFAULT = 0440; +constexpr mode_t SCRIPT_FILE_MODE_DEFAULT = 0550; +constexpr mode_t NORMAL_DIR_MODE_DEFAULT = 0750; + +enum class FileType { + PKL, + NUMPY, + JSON, + PT, + CSV, + YAML, + + /* Add new type before this line. */ + COMMON +}; + +namespace FileUtils { + +constexpr const uint32_t FILE_NAME_MAX = 255; + +/* 基础检查函数库,不做过多校验,路径有效性由调用者保证 */ +bool IsPathExist(const std::string& path); +std::vector SplitPath(const std::string &path); +std::string GetAbsPath(const std::string &path); +bool IsDir(const std::string& path); +bool IsRegularFile(const std::string& path); +bool IsFileSymbolLink(const std::string& path); +bool IsPathCharactersValid(const std::string& path); +bool IsFileReadable(const std::string& path); +bool IsFileWritable(const std::string& path); +bool IsFileExecutable(const std::string& path); +bool IsDirReadable(const std::string& path); +std::string GetParentDir(const std::string& path); +std::string GetFileName(const std::string& path); +std::string GetFileBaseName(const std::string& path); +std::string GetFileSuffix(const std::string& path); +bool CheckFileRWX(const std::string& path, const std::string& permissions); +bool IsPathLengthLegal(const std::string& path); +bool IsPathDepthValid(const std::string& path); +bool IsFileOwner(const std::string& path); + +/* 文件操作函数库,会对入参做基本检查 */ +DebuggerErrno DeleteFile(const std::string &path); +DebuggerErrno DeleteDir(const std::string &path, bool recursion=false); +DebuggerErrno CreateDir(const std::string &path, bool recursion=false, mode_t mode=NORMAL_DIR_MODE_DEFAULT); +DebuggerErrno Chmod(const std::string& path, const mode_t& mode); +DebuggerErrno GetFileSize(const std::string &path, size_t& size); +DebuggerErrno OpenFile(const std::string& path, std::ifstream& ifs, std::ios::openmode mode=std::ios::in); +DebuggerErrno OpenFile(const std::string& path, std::ofstream& ofs, std::ios::openmode mode=std::ios::out, + mode_t permission=NORMAL_FILE_MODE_DEFAULT); + +/* 通用检查函数 */ +DebuggerErrno CheckFileSuffixAndSize(const std::string &path, FileType type); +DebuggerErrno CheckDirCommon(const std::string &path); +DebuggerErrno CheckFileBeforeRead(const std::string &path, const std::string& authority="r", + FileType type=FileType::COMMON); +DebuggerErrno CheckFileBeforeCreateOrWrite(const std::string &path, bool overwrite=false); + +/* 其他文件操作工具 */ +std::vector ListAll(const std::string &path, size_t max = 1024); + +} +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/utils/MathUtils.cpp b/debug/accuracy_tools/msprobe/ccsrc/utils/MathUtils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..27111d60c9f86f2ae9b2b2a00b804ab886917755 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/utils/MathUtils.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "openssl/md5.h" + +namespace MindStudioDebugger { +namespace MathUtils { + +float Random() +{ + std::mt19937 generator(std::random_device{}()); + std::uniform_real_distribution distribution(0.0f, 1.0f); + return distribution(generator); +} + +float Random(float floor, float ceil) +{ + std::mt19937 generator(std::random_device{}()); + std::uniform_real_distribution distribution(floor, ceil); + return distribution(generator); +} + +int32_t RandomInt(int32_t floor, int32_t ceil) +{ + std::mt19937 generator(std::random_device{}()); + std::uniform_int_distribution distribution(floor, ceil - 1); + + return distribution(generator); +} + +std::string RandomString(uint32_t len, char min, char max) +{ + std::mt19937 generator(std::random_device{}()); + std::string output(len, '\0'); + if (min > max) { + return output; + } + + std::uniform_int_distribution distribution(min, max); + for (uint32_t i = 0; i < len; i++) { + output[i] = distribution(generator); + } + + return output; +} + +std::string CalculateMD5(const uint8_t* data, size_t length) +{ + MD5_CTX md5ctx; + MD5_Init(&md5ctx); + MD5_Update(&md5ctx, data, length); + + unsigned char digest[MD5_DIGEST_LENGTH]; + MD5_Final(digest, &md5ctx); + + static const char hexchar[] = "0123456789abcdef"; + constexpr const uint8_t hexbase = 16; + constexpr const size_t byteToStrWidth = 2; + char md5string[MD5_DIGEST_LENGTH * byteToStrWidth + 1]; + for (int i = 0; i < MD5_DIGEST_LENGTH; i++) { + md5string[i * byteToStrWidth] = hexchar[digest[i] / hexbase]; + md5string[i * byteToStrWidth + 1] = hexchar[digest[i] % hexbase]; + } + md5string[sizeof(md5string) - 1] = '\0'; + + return std::string(md5string); +} + +} +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/utils/MathUtils.hpp b/debug/accuracy_tools/msprobe/ccsrc/utils/MathUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..141471ac8ce284ac1a7ab4b6db59f5d0da9a9fe2 --- /dev/null +++ b/debug/accuracy_tools/msprobe/ccsrc/utils/MathUtils.hpp @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace MindStudioDebugger { +namespace MathUtils { + +template +T Gcd(T a, T b) { + if (a == 0 || b == 0) { + return 0; + } + T c = b; + while (a % b != 0) { + c = a % b; + a = b; + b = c; + } + return c; +} + +template +T Lcm(T a, T b) { + if (a == 0 || b == 0) { + return 0; + } + T ret = (a * b) / (Gcd(a, b)); + return ret; +} + +template +T DivCeil(T v, T divisor) { + if (divisor == 0) { + return 0; + } + return (v + divisor - 1) / divisor; +} + +template +T AlignCeil(T v, T block) +{ + return DivCeil(v, block) * block; +} + +float Random(); +float Random(float floor, float ceil); +int32_t RandomInt(int32_t floor, int32_t ceil); +std::string RandomString(uint32_t len, char min=' ', char max='~'); + +std::string CalculateMD5(const uint8_t* data, size_t length); + +} +} \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/mindspore/__init__.py b/debug/accuracy_tools/msprobe/mindspore/__init__.py index e3ef097eb046c1d694b5bf7cf000f8c5753fed1e..de85031e7c7eba357ff08b36178c9fad5b34c2fe 100644 --- a/debug/accuracy_tools/msprobe/mindspore/__init__.py +++ b/debug/accuracy_tools/msprobe/mindspore/__init__.py @@ -13,5 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os + +try: + from msprobe.lib import _msprobe_c + os.environ["MS_HOOK_ENABLE"] = "on" + os.environ["HOOK_TOOL_PATH"] = _msprobe_c.__file__ +except ImportError: + from .common.log import logger + logger.info("Module _msprobe_c has not been installed.") + from msprobe.mindspore.debugger.precision_debugger import PrecisionDebugger from msprobe.mindspore.common.utils import seed_all diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py index d928e0a3a62504f07ffb8807a43d405773921861..5af9328050230bf6e70c336cbffe2d510ae564be 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py @@ -30,6 +30,10 @@ from msprobe.mindspore.runtime import Runtime from msprobe.mindspore.service import Service from msprobe.mindspore.task_handler_factory import TaskHandlerFactory +try: + from msprobe.lib import _msprobe_c +except ImportError: + _msprobe_c = None class PrecisionDebugger: _instance = None @@ -57,6 +61,9 @@ class PrecisionDebugger: return self.config = DebuggerConfig(common_config, task_config) + if _msprobe_c: + _msprobe_c._PrecisionDebugger(framework="MindSpore", config_path=config_path) + Runtime.step_count = 0 Runtime.is_running = False @@ -83,6 +90,8 @@ class PrecisionDebugger: instance = cls._instance if not instance: raise Exception(MsgConst.NOT_CREATED_INSTANCE) + if _msprobe_c: + _msprobe_c._PrecisionDebugger().start() if instance.task in PrecisionDebugger.task_not_need_service: return @@ -114,6 +123,8 @@ class PrecisionDebugger: instance = cls._instance if not instance: raise Exception(MsgConst.NOT_CREATED_INSTANCE) + if _msprobe_c: + _msprobe_c._PrecisionDebugger().stop() if instance.task == Const.GRAD_PROBE: instance.gm.stop() if instance.task in PrecisionDebugger.task_not_need_service: @@ -127,6 +138,8 @@ class PrecisionDebugger: instance = cls._instance if not instance: raise Exception(MsgConst.NOT_CREATED_INSTANCE) + if _msprobe_c: + _msprobe_c._PrecisionDebugger().step() if instance.task in PrecisionDebugger.task_not_need_service: return if instance.service: diff --git a/debug/accuracy_tools/msprobe/mindspore/dump/kernel_graph_dump.py b/debug/accuracy_tools/msprobe/mindspore/dump/kernel_graph_dump.py index a9a48d5a878f4fdfe7879064b4086407ec988154..2b215f5a1b48b280c4631d83d68130d4e8bd7e0f 100644 --- a/debug/accuracy_tools/msprobe/mindspore/dump/kernel_graph_dump.py +++ b/debug/accuracy_tools/msprobe/mindspore/dump/kernel_graph_dump.py @@ -56,6 +56,13 @@ class KernelGraphDump: self.dump_json["common_dump_settings"]["input_output"] = 2 def handle(self): + try: + from msprobe.lib import _msprobe_c + return + except ImportError: + # ���_msprobe_ce_c����MindSpore������ + logger.info("Module _msprobe_c has not been installed, use interface in mindspore instead.") + if os.getenv("GRAPH_OP_RUN") == "1": raise Exception("Must run in graph mode, not kbk mode") json_path = self.dump_json["common_dump_settings"]["path"] diff --git a/debug/accuracy_tools/msprobe/mindspore/ms_config.py b/debug/accuracy_tools/msprobe/mindspore/ms_config.py index 6d4c4c6730e94cab3a50f7b8c2104119b643e508..2585938899da4e9db06ae3c008df599ac868c3f1 100644 --- a/debug/accuracy_tools/msprobe/mindspore/ms_config.py +++ b/debug/accuracy_tools/msprobe/mindspore/ms_config.py @@ -45,7 +45,11 @@ class StatisticsConfig(BaseConfig): self._check_config() def _check_config(self): - if self.summary_mode and self.summary_mode not in ["statistics", "md5"]: + single_opt = ["statistics", "md5"] + muti_opt = ["md5", "max", "min", "mean", "l2norm"] + if isinstance(self.summary_mode, str) and self.summary_mode not in single_opt: + raise Exception("summary_mode is invalid") + if isinstance(self.summary_mode, list) and not all(opt in muti_opt for opt in self.summary_mode): raise Exception("summary_mode is invalid") diff --git a/debug/accuracy_tools/msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py b/debug/accuracy_tools/msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py index ff0b81cce7fe1a8eee64c9f4462591c3f9d811d8..d80dab148f06093dba61c759a5a767db84fbd9d0 100644 --- a/debug/accuracy_tools/msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +++ b/debug/accuracy_tools/msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py @@ -46,6 +46,13 @@ class KernelGraphOverflowCheck: self.dump_json["common_dump_settings"]["op_debug_mode"] = 2 def handle(self): + try: + from msprobe.lib import _msprobe_c + return + except ImportError: + # ���_msprobe_ce_c����MindSpore������ + logger.info("Module _msprobe_c has not been installed, use interface in mindspore instead.") + if os.getenv("GRAPH_OP_RUN") == "1": raise Exception("Must run in graph mode, not kbk mode") json_path = self.dump_json["common_dump_settings"]["path"] diff --git a/debug/accuracy_tools/msprobe/test/CMakeLists.txt b/debug/accuracy_tools/msprobe/test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..da8ed956f6bd903fff3b88b8b4512c54a607d063 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(cpp) \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/test/cpp/CMakeLists.txt b/debug/accuracy_tools/msprobe/test/cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..8807d800b8f745fbea895339a09586c411ca6da0 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/CMakeLists.txt @@ -0,0 +1,24 @@ +project(msprobe VERSION 1.0.0 LANGUAGES CXX C) +cmake_minimum_required(VERSION 3.14) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +find_package(gtest MODULE REQUIRED) +find_package(mockcpp MODULE REQUIRED) +find_package(nlohmannjson MODULE REQUIRED) +find_package(cpython MODULE REQUIRED) + +add_executable(msprobe_test) +target_link_libraries(msprobe_test PRIVATE ${gtest_LIBRARIES}) +target_link_libraries(msprobe_test PRIVATE ${mockcpp_LIBRARIES}) +target_link_libraries(msprobe_test PRIVATE _msprobe_c) + +target_include_directories(msprobe_test PRIVATE $ENV{PROJECT_ROOT_PATH}/msprobe/ccsrc) +target_include_directories(msprobe_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) +target_include_directories(msprobe_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/mock) + +target_compile_definitions(msprobe_test PRIVATE __RESOURCES_PATH__="${CMAKE_CURRENT_SOURCE_DIR}/../resources") + +file(GLOB_RECURSE SOURCES "*.cpp") +target_sources(msprobe_test PUBLIC ${SOURCES}) diff --git a/debug/accuracy_tools/msprobe/test/cpp/include/test_utils.cpp b/debug/accuracy_tools/msprobe/test/cpp/include/test_utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e744233b3199c15f5ce77b4690bbaa523b0bad45 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/include/test_utils.cpp @@ -0,0 +1,31 @@ +#include +#include +#include +#include +#include + +std::string TEST_ExecShellCommand(const std::string& cmd) +{ + std::array buffer; + std::string result; + std::unique_ptr pipe(popen(cmd.c_str(), "r"), pclose); + if (!pipe) { + throw std::runtime_error("popen() failed!"); + } + while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { + result += buffer.data(); + } + return result; +} + +std::string trim(const std::string& str) +{ + std::string::size_type first = str.find_first_not_of(" \t\n\r\f\v"); + std::string::size_type last = str.find_last_not_of(" \t\n\r\f\v"); + + if (first == std::string::npos || last == std::string::npos) { + return ""; + } + + return str.substr(first, (last - first + 1)); +} diff --git a/debug/accuracy_tools/msprobe/test/cpp/include/test_utils.hpp b/debug/accuracy_tools/msprobe/test/cpp/include/test_utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ed842b87db77e75e618acd7a25949145a1578c37 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/include/test_utils.hpp @@ -0,0 +1,8 @@ +#pragma once + +#include + +#define CONFIG_EXAMPLE __RESOURCES_PATH__"/config.json" + +std::string TEST_ExecShellCommand(const std::string& cmd); +std::string trim(const std::string& str); diff --git a/debug/accuracy_tools/msprobe/test/cpp/test_config.cpp b/debug/accuracy_tools/msprobe/test/cpp/test_config.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e8b9b73fb66c3fcae40819545c84b7fafb5d2c4d --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/test_config.cpp @@ -0,0 +1,276 @@ +#include +#include "gtest/gtest.h" +#include "nlohmann/json.hpp" +#include "test_utils.hpp" +#include "base/ErrorInfos.hpp" +#include "base/DebuggerConfig.hpp" + +using namespace MindStudioDebugger; + +namespace MsProbeTest { + +static const std::string cfgContent = R"({ + "task": "statistics", + "dump_path": "./dump_path", + "rank": [], + "step": [], + "level": "L1", + "seed": 1234, + "is_deterministic": false, + "enable_dataloader": false, + "acl_config": "", + "tensor": { + "scope": [], + "list":[], + "data_mode": ["all"], + "backward_input": [], + "file_format": "npy" + }, + "statistics": { + "scope": [], + "list":[], + "data_mode": ["all"], + "summary_mode": "statistics" + }, + "overflow_check": { + "overflow_nums": 1, + "check_mode":"all" + }, + "run_ut": { + "white_list": [], + "black_list": [], + "error_data_path": "./" + }, + "grad_probe": { + "grad_level": "L1", + "param_list": [], + "bounds": [-1, 0, 1] + }, + "free_benchmark": { + "scope": [], + "list": [], + "fuzz_device": "npu", + "pert_mode": "improve_precision", + "handler_type": "check", + "fuzz_level": "L1", + "fuzz_stage": "forward", + "if_preheat": false, + "preheat_step": 15, + "max_sample": 20 + } +})"; + +class TestConfigPyTorch : public ::testing::Test +{ +protected: + void SetUp(){} + void TearDown(){} +}; + +class TestConfigMindSpore : public ::testing::Test +{ +protected: + void SetUp(); + void TearDown(); + int32_t DumpCfgFile(); + const std::string framework = "MindSpore"; + const std::string cfgPath = "./config.json"; + nlohmann::json cfgJson; + const std::string logpath = "./test.log"; +}; + +int32_t TestConfigMindSpore::DumpCfgFile() +{ + std::ofstream ofs(cfgPath, std::ios::out | std::ios::trunc); + if (!ofs.is_open()) { + return -1; + } + try { + ofs << cfgJson.dump(); + } catch (std::exception &e) { + ofs.close(); + return -1; + } + + if (ofs.fail()) { + return -1; + } + + return 0; +} + +void TestConfigMindSpore::SetUp() +{ + DebuggerConfig::GetInstance().Reset(); + CleanErrorInfoCache(); + ErrorInfosManager::SetLogPath(logpath); + cfgJson = nlohmann::json::parse(cfgContent); +} + +void TestConfigMindSpore::TearDown() +{ + TEST_ExecShellCommand("rm -f " + cfgPath); + TEST_ExecShellCommand("rm -f " + logpath); +} + +TEST_F(TestConfigMindSpore, TestDefaultValue) +{ + DebuggerConfig& cfg = DebuggerConfig::GetInstance(); + EXPECT_FALSE(cfg.IsCfgLoaded()); + EXPECT_EQ(cfg.GetFramework(), DebuggerFramework::FRAMEWORK_PYTORCH); + EXPECT_TRUE(cfg.GetTaskList().empty()); + EXPECT_EQ(cfg.GetOutputPath(), "./output"); + EXPECT_TRUE(cfg.GetRankRange().empty()); + EXPECT_TRUE(cfg.GetStepRange().empty()); + EXPECT_EQ(cfg.GetDebugLevel(), DebuggerLevel::L1); + EXPECT_EQ(cfg.GetRandSeed(), 1234); + EXPECT_FALSE(cfg.IsDeterministic()); + EXPECT_FALSE(cfg.IsDataloaderEnable()); + EXPECT_EQ(cfg.GetStatisticsCfg(), nullptr); + EXPECT_EQ(cfg.GetDumpTensorCfg(), nullptr); + EXPECT_EQ(cfg.GetOverflowCheckCfg(), nullptr); +} + +TEST_F(TestConfigMindSpore, TestLoadConfigBase) +{ + int32_t ret; + DebuggerConfig& cfg = DebuggerConfig::GetInstance(); + ret = cfg.LoadConfig("", cfgPath); + EXPECT_EQ(ret, -1); + CleanErrorInfoCache(); + ret = cfg.LoadConfig(framework, "./xxx"); + EXPECT_EQ(ret, -1); + TEST_ExecShellCommand("echo \"invalid content\" > ./invalid.json"); + CleanErrorInfoCache(); + ret = cfg.LoadConfig(framework, "./invalid.json"); + EXPECT_EQ(ret, -1); + TEST_ExecShellCommand("rm ./invalid.json"); + ASSERT_EQ(DumpCfgFile(), 0); + CleanErrorInfoCache(); + ret = cfg.LoadConfig(framework, cfgPath); + EXPECT_EQ(ret, 0); +} + +TEST_F(TestConfigMindSpore, TestCommonCfg) +{ + DebuggerConfig& cfg = DebuggerConfig::GetInstance(); + + /* test static method */ + EXPECT_TRUE(cfg.IsRankHits(0)); + EXPECT_TRUE(cfg.IsRankHits(7)); + EXPECT_TRUE(cfg.IsRankHits(12345)); + EXPECT_TRUE(cfg.IsStepHits(0)); + EXPECT_TRUE(cfg.IsStepHits(7)); + EXPECT_TRUE(cfg.IsStepHits(12345)); + + cfgJson["dump_path"] = "./output1"; + cfgJson["rank"] = nlohmann::json::array({0, 1, 8}); + cfgJson["step"] = nlohmann::json::array({2, 4, "6-8"}); + cfgJson["level"] = "L2"; + cfgJson["seed"] = 2345; + cfgJson["is_deterministic"] = true; + cfgJson["enable_dataloader"] = true; + ASSERT_EQ(DumpCfgFile(), 0); + EXPECT_EQ(cfg.LoadConfig(framework, cfgPath), 0); + EXPECT_EQ(cfg.GetTaskList(), std::vector({DebuggerTaskType::TASK_DUMP_STATISTICS})); + EXPECT_EQ(cfg.GetOutputPath(), trim(TEST_ExecShellCommand("realpath ./output1"))); + EXPECT_EQ(cfg.GetRankRange(), std::vector({0, 1, 8})); + EXPECT_EQ(cfg.GetStepRange(), std::vector({2, 4, 6, 7, 8})); + EXPECT_EQ(cfg.GetDebugLevel(), DebuggerLevel::L2); + EXPECT_EQ(cfg.GetRandSeed(), 2345); + EXPECT_TRUE(cfg.IsDeterministic()); + EXPECT_TRUE(cfg.IsDataloaderEnable()); + EXPECT_NE(cfg.GetStatisticsCfg(), nullptr); + EXPECT_EQ(cfg.GetDumpTensorCfg(), nullptr); + EXPECT_EQ(cfg.GetOverflowCheckCfg(), nullptr); + EXPECT_TRUE(cfg.IsRankHits(0)); + EXPECT_FALSE(cfg.IsRankHits(7)); + EXPECT_FALSE(cfg.IsRankHits(12345)); + EXPECT_TRUE(cfg.IsStepHits(4)); + EXPECT_TRUE(cfg.IsStepHits(6)); + EXPECT_TRUE(cfg.IsStepHits(8)); + EXPECT_FALSE(cfg.IsStepHits(9)); + + /* invalid case */ + cfg.Reset(); + ErrorInfosManager::SetLogPath("./test.log"); + cfgJson["dump_path"] = 111; + cfgJson["rank"] = "abc"; + cfgJson["step"] = nlohmann::json::array({"a", "b"}); + cfgJson["level"] = "L10"; + cfgJson["seed"] = "123"; + cfgJson["is_deterministic"] = 1; + cfgJson["enable_dataloader"] = "true"; + ASSERT_EQ(DumpCfgFile(), 0); + EXPECT_NE(cfg.LoadConfig(framework, cfgPath), 0); + std::string logContent = TEST_ExecShellCommand("cat " + logpath); + EXPECT_NE(logContent.find("dump_path"), std::string::npos); + EXPECT_NE(logContent.find("rank"), std::string::npos); + EXPECT_NE(logContent.find("step"), std::string::npos); + EXPECT_NE(logContent.find("level"), std::string::npos); + EXPECT_NE(logContent.find("seed"), std::string::npos); + EXPECT_NE(logContent.find("is_deterministic"), std::string::npos); + EXPECT_NE(logContent.find("enable_dataloader"), std::string::npos); +} + +TEST_F(TestConfigMindSpore, TestTensorCfg) +{ + DebuggerConfig& cfg = DebuggerConfig::GetInstance(); + cfgJson["task"] = "tensor"; + cfgJson["level"] = "L2"; + nlohmann::json& tensorCfgJson = cfgJson["tensor"]; + tensorCfgJson["scope"] = nlohmann::json::array({"a", "b"}); + tensorCfgJson["list"] = nlohmann::json::array({"name-regex(conv)", "add", "ReduceMean-op0.10.5"}); + tensorCfgJson["data_mode"] = nlohmann::json::array({"all"}); + tensorCfgJson["backward_input"] = nlohmann::json::array({"/a.pt", "/b.pt"});; + tensorCfgJson["file_format"] = "npy"; + ASSERT_EQ(DumpCfgFile(), 0); + EXPECT_EQ(cfg.LoadConfig(framework, cfgPath), 0); + std::shared_ptr tensorcfg = cfg.GetDumpTensorCfg(); + ASSERT_NE(tensorcfg, nullptr); + EXPECT_EQ(tensorcfg->scope, std::vector({"a", "b"})); + EXPECT_EQ(tensorcfg->list, std::vector({"name-regex(conv)", "add", "ReduceMean-op0.10.5"})); + EXPECT_EQ(tensorcfg->direction, DebuggerDataDirection::DIRECTION_BOTH); + EXPECT_EQ(tensorcfg->inout, DebuggerDataInOut::INOUT_BOTH); + EXPECT_EQ(tensorcfg->backwardInput, std::vector({"/a.pt", "/b.pt"})); + EXPECT_EQ(tensorcfg->fileFormat, DebuggerDumpFileFormat::FILE_FORMAT_NPY); +} + +TEST_F(TestConfigMindSpore, TestStatisticCfg) +{ + DebuggerConfig& cfg = DebuggerConfig::GetInstance(); + cfgJson["task"] = "statistics"; + cfgJson["level"] = "L2"; + nlohmann::json& statisticsCfgJson = cfgJson["statistics"]; + statisticsCfgJson["scope"] = nlohmann::json::array({"c", "d"}); + statisticsCfgJson["list"] = nlohmann::json::array({"name-regex(conv)", "add", "ReduceMean-op0.10.5"}); + statisticsCfgJson["data_mode"] = nlohmann::json::array({"input"}); + statisticsCfgJson["summary_mode"] = "statistics"; + ASSERT_EQ(DumpCfgFile(), 0); + EXPECT_EQ(cfg.LoadConfig(framework, cfgPath), 0); + std::shared_ptr statisticscfg = cfg.GetStatisticsCfg(); + ASSERT_NE(statisticscfg, nullptr); + EXPECT_EQ(statisticscfg->scope, std::vector({"c", "d"})); + EXPECT_EQ(statisticscfg->list, std::vector({"name-regex(conv)", "add", "ReduceMean-op0.10.5"})); + EXPECT_EQ(statisticscfg->direction, DebuggerDataDirection::DIRECTION_BOTH); + EXPECT_EQ(statisticscfg->inout, DebuggerDataInOut::INOUT_INPUT); + EXPECT_EQ(statisticscfg->summaryOption,std::vector( + {DebuggerSummaryOption::MAX, DebuggerSummaryOption::MIN, DebuggerSummaryOption::MEAN, DebuggerSummaryOption::L2NORM})); +} + +TEST_F(TestConfigMindSpore, TestOverflowCfg) +{ + DebuggerConfig& cfg = DebuggerConfig::GetInstance(); + cfgJson["task"] = "overflow_check"; + nlohmann::json& overflowCfgJson = cfgJson["overflow_check"]; + overflowCfgJson["overflow_nums"] = 3; + overflowCfgJson["check_mode"] = "all"; + ASSERT_EQ(DumpCfgFile(), 0); + EXPECT_EQ(cfg.LoadConfig(framework, cfgPath), 0); + std::shared_ptr overflowcfg = cfg.GetOverflowCheckCfg(); + ASSERT_NE(overflowcfg, nullptr); + EXPECT_EQ(overflowcfg->overflowNums, 3); + EXPECT_EQ(overflowcfg->checkMode, DebuggerOpCheckLevel::CHECK_LEVEL_ALL); +} + +} diff --git a/debug/accuracy_tools/msprobe/test/cpp/test_cpython_utils.cpp b/debug/accuracy_tools/msprobe/test/cpp/test_cpython_utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0d9188878c0864d66d76cc3a823b0a0a5cf644d5 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/test_cpython_utils.cpp @@ -0,0 +1,312 @@ +#include +#include + +#include "test_utils.hpp" +#include "utils/CPythonUtils.hpp" + +using namespace MindStudioDebugger; +using namespace MindStudioDebugger::CPythonUtils; + +namespace MsProbeTest { + +class CPythonUtilsTest : public ::testing::Test { +protected: + void SetUp() override { + Py_Initialize(); + } + + void TearDown() override { + Py_Finalize(); + } +}; + +TEST_F(CPythonUtilsTest, CPythonAgent) { + PythonObject obj = PythonObject::From("test"); + std::string name = "test_object"; + int32_t result = RegisterPythonObject(name, obj); + EXPECT_EQ(result, 0); + bool registerd = IsPyObjRegistered(name); + EXPECT_TRUE(registerd); + + result = RegisterPythonObject(name, obj); + EXPECT_EQ(result, -1); + registerd = IsPyObjRegistered(name); + EXPECT_TRUE(registerd); + + name = "test_object"; + UnRegisterPythonObject(name); + name = "test_object1"; + UnRegisterPythonObject(name); + registerd = IsPyObjRegistered(name); + EXPECT_FALSE(registerd); + + result = RegisterPythonObject(name, obj); + EXPECT_EQ(result, 0); + registerd = IsPyObjRegistered(name); + EXPECT_TRUE(registerd); + + PythonObject registerd_obj = GetRegisteredPyObj(name); + EXPECT_EQ(static_cast(registerd_obj), static_cast(obj)); + EXPECT_TRUE(registerd_obj.IsString()); + EXPECT_EQ(registerd_obj.ToString(), "test"); + + PythonObject invalid_obj = GetRegisteredPyObj("invalid_name"); + EXPECT_TRUE(invalid_obj.IsNone()); +} + +TEST_F(CPythonUtilsTest, PythonObjectFromTo) { + // 测试PythonObject的From和To函数 + int32_t input_int = -42; + PythonObject obj_int = PythonObject::From(input_int); + EXPECT_TRUE(obj_int.IsNumber()); + + int32_t output_int; + EXPECT_EQ(obj_int.To(output_int), 0); + EXPECT_EQ(output_int, input_int); + + uint32_t input_uint = 56; + PythonObject obj_uint = PythonObject::From(input_uint); + EXPECT_TRUE(obj_uint.IsNumber()); + + uint32_t output_uint; + EXPECT_EQ(obj_uint.To(output_uint), 0); + EXPECT_EQ(output_uint, input_uint); + + double input_double = 3.14; + PythonObject obj_double = PythonObject::From(input_double); + EXPECT_TRUE(obj_double.IsNumber()); + + double output_double; + EXPECT_EQ(obj_double.To(output_double), 0); + EXPECT_DOUBLE_EQ(output_double, input_double); + + std::string input_str = "hello"; + PythonObject obj_str = PythonObject::From(input_str); + EXPECT_TRUE(obj_str.IsString()); + + std::string output_str; + EXPECT_EQ(obj_str.To(output_str), 0); + EXPECT_EQ(output_str, input_str); + + const char* input_char = "world"; + PythonObject obj_str1 = PythonObject::From(input_char); + EXPECT_TRUE(obj_str1.IsString()); + + EXPECT_EQ(obj_str1.To(output_str), 0); + EXPECT_EQ(output_str, std::string(input_char)); + + bool input_bool = true; + PythonObject obj_bool = PythonObject::From(input_bool); + EXPECT_TRUE(obj_bool.IsBool()); + + bool output_bool; + EXPECT_EQ(obj_bool.To(output_bool), 0); + EXPECT_EQ(output_bool, input_bool); + + std::vector input_vector_int = {1, 2, 3, 100}; + PythonObject list_int_obj = PythonObject::From(input_vector_int); + EXPECT_TRUE(list_int_obj.IsList()); + + std::vector output_vector_int; + EXPECT_EQ(list_int_obj.To(output_vector_int), 0); + + size_t size = input_vector_int.size(); + EXPECT_EQ(size, output_vector_int.size()); + + for (size_t i = 0; i < size; ++i) { + EXPECT_EQ(input_vector_int[i], output_vector_int[i]); + } + + std::vector input_vector_str = {"a", "bb", "ccc", "dddd"}; + PythonObject list_str_obj = PythonObject::From(input_vector_str); + EXPECT_TRUE(list_str_obj.IsList()); + + std::vector output_vector_str; + EXPECT_EQ(list_str_obj.To(output_vector_str), 0); + + size = input_vector_str.size(); + EXPECT_EQ(size, output_vector_str.size()); + + for (size_t i = 0; i < size; ++i) { + EXPECT_EQ(input_vector_str[i], output_vector_str[i]); + } +} + +TEST_F(CPythonUtilsTest, PythonObjectImport) { + PythonObject sys = PythonObject::Import("sys"); + EXPECT_TRUE(sys.IsModule()); + EXPECT_EQ(static_cast(sys), PyImport_ImportModule("sys")); + EXPECT_FALSE(sys.IsNone()); + PythonObject invalid = PyImport_ImportModule("invalid"); + EXPECT_TRUE(invalid.IsNone()); +} + +TEST_F(CPythonUtilsTest, PythonObjectGetAttr) { + PythonObject sys = PythonObject::Import("sys"); + PythonObject sys_path = sys.Get("path"); + EXPECT_TRUE(sys_path.IsList()); + PythonObject fexit = sys.Get("exit"); + EXPECT_TRUE(fexit.IsCallable()); + PythonObject invalid = sys.Get("invalid"); + EXPECT_TRUE(invalid.IsNone()); + + std::vector input_vector = {1, 2, 3, 100}; + PythonObject list_obj = PythonObject::From(input_vector); + PythonObject append = list_obj.Get("append"); + EXPECT_TRUE(append.IsCallable()); +} + +TEST_F(CPythonUtilsTest, PythonObjectCall) { + PythonObject int_class = PythonObject::Import("builtins").Get("int"); + EXPECT_TRUE(int_class.IsCallable()); + PythonObject int_obj = int_class.Call(); + EXPECT_TRUE(int_obj.IsNumber()); + int result = -1; + EXPECT_EQ(int_obj.To(result), 0); + EXPECT_EQ(result, 0); + + PythonObject ret = PythonObject::Import("builtins").Call(); + EXPECT_TRUE(ret.IsNone()); +} + +TEST_F(CPythonUtilsTest, PythonObjectType) { + PythonObject none = Py_None; + EXPECT_TRUE(none.IsNone()); + EXPECT_FALSE(none.IsNumber() || none.IsCallable()); + + PythonObject pytrue = Py_True; + EXPECT_TRUE(pytrue.IsBool()); + EXPECT_FALSE(pytrue.IsString() || pytrue.IsCallable()); + + PythonObject builtins = PyImport_ImportModule("builtins"); + EXPECT_TRUE(builtins.IsModule()); + EXPECT_FALSE(builtins.IsList() || builtins.IsCallable()); + + PythonObject int_class = builtins.Get("int"); + EXPECT_TRUE(int_class.IsCallable()); + EXPECT_FALSE(builtins.IsDict()); + + PythonObject dict = builtins.Get("__dict__"); + EXPECT_TRUE(dict.IsDict()); + EXPECT_FALSE(dict.IsNone() || dict.IsCallable()); +} + +TEST_F(CPythonUtilsTest, PythonNumberObject) { + PythonNumberObject o1(PyLong_FromLong(123)); + PythonNumberObject o2(PyFloat_FromDouble(3.14)); + PythonNumberObject o3 = PythonNumberObject::From(321); + PythonNumberObject o4 = PythonNumberObject::From(2.33); + PythonNumberObject o5(PythonObject::From(4.44)); + PythonNumberObject o6(PythonObject::From("1111")); + + int int_v; + EXPECT_EQ(o1.To(int_v), 0); + EXPECT_EQ(int_v, 123); + double double_v; + EXPECT_EQ(o2.To(double_v), 0); + EXPECT_TRUE(std::fabs(double_v - 3.14) < 1e-5); + EXPECT_EQ(o3.To(int_v), 0); + EXPECT_EQ(int_v, 321); + EXPECT_EQ(o4.To(double_v), 0); + EXPECT_TRUE(std::fabs(double_v - 2.33) < 1e-5); + EXPECT_EQ(o5.To(double_v), 0); + EXPECT_TRUE(std::fabs(double_v - 4.44) < 1e-5); + EXPECT_TRUE(o6.IsNone()); +} + +TEST_F(CPythonUtilsTest, PythonStringObject) { + PythonStringObject o1(PyUnicode_FromString("hello")); + PythonStringObject o2 = PythonStringObject::From("OK"); + PythonStringObject o3 = PythonStringObject::From(std::string("banana")); + PythonStringObject o4(PythonObject::From(1)); + + EXPECT_EQ(o1.ToString(), "hello"); + EXPECT_EQ(o2.ToString(), "OK"); + EXPECT_EQ(o3.ToString(), "banana"); + EXPECT_TRUE(o4.IsNone()); +} + +TEST_F(CPythonUtilsTest, PythonBoolObject) { + PythonBoolObject o1(Py_True); + PythonBoolObject o2(Py_False); + PythonBoolObject o3(PythonObject::From(true)); + PythonBoolObject o4(PythonObject::From(0)); + + EXPECT_EQ(o1, true); + EXPECT_EQ(o2, false); + EXPECT_EQ(o3, true); + EXPECT_TRUE(o4.IsNone()); +} + +TEST_F(CPythonUtilsTest, PythonListObject) { + PythonListObject empty_list(5); + PythonListObject sys_path(static_cast(PythonObject::Import("sys").Get("path"))); + PythonListObject list1 = PythonListObject::From(std::vector({1, 3, 5, 7})); + PythonListObject list2 = PythonListObject::From(std::vector>({{1, 3, 5, 7}, {2, 4, 6}})); + PythonListObject list3; + + int val; + EXPECT_EQ(empty_list.Size(), 5); + EXPECT_FALSE(sys_path.IsNone()); + EXPECT_TRUE(sys_path.Size() > 0); + EXPECT_TRUE(sys_path.GetItem(0).IsString()); + EXPECT_EQ(list1.Size(), 4); + EXPECT_EQ(list1.GetItem(1).To(val), 0); + EXPECT_EQ(val, 3); + EXPECT_EQ(list1.GetItem(3).ToString(), "7"); + EXPECT_TRUE(list1.GetItem(4).IsNone()); + EXPECT_EQ(list2.Size(), 2); + EXPECT_TRUE(list2.GetItem(0).IsList()); + EXPECT_EQ(list2.GetItem(1).ToString(), "[2, 4, 6]"); + EXPECT_EQ(list3.Size(), 0); + list3.Append(PythonObject::From(1)); + EXPECT_EQ(list3.Size(), 1); + list3.Append(PythonObject::From("2")).Append(PythonObject::From(true)); + EXPECT_EQ(list3.Size(), 3); + EXPECT_EQ(list3.GetItem(1).ToString(), "2"); + list3.SetItem(1, empty_list); + EXPECT_EQ(list3.Size(), 3); + EXPECT_EQ(static_cast(list3.GetItem(1)), static_cast(empty_list)); + list3.Insert(0, sys_path); + EXPECT_EQ(list3.Size(), 4); + EXPECT_EQ(static_cast(list3.GetItem(0)), static_cast(sys_path)); + PythonTupleObject tuple = list3.ToTuple(); + EXPECT_FALSE(tuple.IsNone()); +} + +TEST_F(CPythonUtilsTest, PythonTupleObject) { + PythonTupleObject tuple1; + PythonTupleObject tuple2(PyTuple_New(0)); + PythonTupleObject tuple3 = PythonTupleObject::From(std::vector({"ab", "cd"})); + PythonTupleObject tuple4 = PythonListObject::From(std::vector({1, 3, 5})).ToTuple(); + + EXPECT_FALSE(tuple1.IsNone()); + EXPECT_EQ(tuple1.Size(), 0); + EXPECT_TRUE(tuple1.GetItem(0).IsNone()); + EXPECT_FALSE(tuple2.IsNone()); + EXPECT_EQ(tuple2.Size(), 0); + EXPECT_EQ(tuple3.Size(), 2); + EXPECT_EQ(tuple3.GetItem(0).ToString(), "ab"); + EXPECT_EQ(tuple4.Size(), 3); + EXPECT_EQ(tuple4.GetItem(0).ToString(), "1"); +} + +TEST_F(CPythonUtilsTest, PythonDictObject) { + PythonDictObject dict1; + PythonDictObject dict2(PyDict_New()); + PythonDictObject dict3 = PythonDictObject::From(std::map({{1, "a"}, {2, "b"}})); + + EXPECT_FALSE(dict1.IsNone()); + EXPECT_FALSE(dict2.IsNone()); + EXPECT_TRUE(dict2.GetItem("none").IsNone()); + EXPECT_FALSE(dict3.IsNone()); + EXPECT_EQ(dict3.GetItem(1).ToString(), "a"); + EXPECT_EQ(dict3.GetItem(2).ToString(), "b"); + EXPECT_TRUE(dict3.GetItem(3).IsNone()); + dict3.Add(std::string("apple"), std::string("banana")); + EXPECT_EQ(dict3.GetItem(std::string("apple")).ToString(), "banana"); + dict3.Delete(std::string("apple")); + EXPECT_TRUE(dict3.GetItem(std::string("apple")).IsNone()); +} + +} diff --git a/debug/accuracy_tools/msprobe/test/cpp/test_data_utils.cpp b/debug/accuracy_tools/msprobe/test/cpp/test_data_utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..11442f12bfea9179ecd4e2e357bcf70b4212ab84 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/test_data_utils.cpp @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include "utils/DataUtils.hpp" + +using namespace MindStudioDebugger; +using namespace MindStudioDebugger::DataUtils; + +namespace MsProbeTest { + +TEST(DataUtilsTest, TestUnpackUint64Value) { + uint64_t data_le = 0x0102030405060708; + uint64_t result = UnpackUint64Value_Le(&data_le); +#if __BYTE_ORDER == __LITTLE_ENDIAN + EXPECT_EQ(result, 0x0102030405060708); +#else + EXPECT_EQ(result, 0x0807060504030201); +#endif + uint64_t data_be = 0x0102030405060708; + result = UnpackUint64Value_Be(&data_be); +#if __BYTE_ORDER == __LITTLE_ENDIAN + EXPECT_EQ(result, 0x0807060504030201); +#else + EXPECT_EQ(result, 0x0102030405060708); +#endif +} + +TEST(DataUtilsTest, TestDataTrans) { + size_t value = 123456; + int64_t result = SizeToS64(value); + EXPECT_EQ(result, 123456); + bool exception = false; + try { + int64_t result = SizeToS64(static_cast(INT64_MAX) + 1ULL); + } catch (const std::runtime_error& e) { + exception = true; + } + EXPECT_TRUE(exception); + uint64_t num = 0x123456789ABCDEF0; + std::string s = U64ToHexString(num); + EXPECT_EQ(s, "0x123456789ABCDEF0"); +} + +TEST(DataUtilsTest, TestBFloat16) { + float fp32 = 3.14f; + BFloat16 bf16(fp32); +#define BF16_EQ(a, b) (-0.01f < static_cast((a) - (b)) && static_cast((a) - (b)) < 0.01f) + EXPECT_TRUE(BF16_EQ(fp32, static_cast(bf16))); + EXPECT_TRUE(BF16_EQ(fp32 + fp32, static_cast(bf16 + bf16))); + EXPECT_TRUE(BF16_EQ(fp32 + fp32, bf16 + fp32)); + EXPECT_TRUE(BF16_EQ(fp32 + fp32, bf16 + fp32)); +#undef BF16_EQ +} + +TEST(DataUtilsTest, TestDType) { + EXPECT_EQ(SizeOfDType(DataType::DT_FLOAT), 4); + EXPECT_EQ(SizeOfDType(DataType::DT_DOUBLE), 8); + EXPECT_EQ(SizeOfDType(DataType::DT_INT64), 8); + EXPECT_EQ(SizeOfDType(DataType::DT_UINT8), 1); + EXPECT_EQ(SizeOfDType(DataType::DT_FLOAT16), 2); + EXPECT_EQ(SizeOfDType(static_cast(99)), 0); + EXPECT_EQ(GetDTypeString(DataType::DT_BOOL), "BOOL"); + EXPECT_EQ(GetDTypeString(DataType::DT_INT8), "INT8"); + EXPECT_EQ(GetDTypeString(DataType::DT_BF16), "BF16"); + EXPECT_EQ(GetDTypeString(DataType::DT_UINT64), "UINT64"); + EXPECT_EQ(GetDTypeString(DataType::DT_COMPLEX64), "COMPLEX64"); + EXPECT_EQ(GetDTypeString(static_cast(99)), "UNKNOWN"); +} + +TEST(DataUtilsTest, TestGetFormatString) { + EXPECT_EQ(GetFormatString(TensorFormat::FORMAT_NCHW), "NCHW"); + EXPECT_EQ(GetFormatString(TensorFormat::FORMAT_NHWC), "NHWC"); + EXPECT_EQ(GetFormatString(TensorFormat::FORMAT_FRACTAL_Z), "FRACTAL_Z"); + EXPECT_EQ(GetFormatString(TensorFormat::FORMAT_C1HWNC0), "C1HWNC0"); + EXPECT_EQ(GetFormatString(TensorFormat::FORMAT_HWCN), "HWCN"); + EXPECT_EQ(GetFormatString(TensorFormat::FORMAT_C1HWNCoC0), "C1HWNCoC0"); + EXPECT_EQ(GetFormatString(TensorFormat::FORMAT_DHWNC), "DHWNC"); + EXPECT_EQ(GetFormatString(TensorFormat::FORMAT_NCL), "NCL"); + EXPECT_EQ(GetFormatString(TensorFormat::FORMAT_MAX), "UNKNOWN"); +} + +TEST(DataUtilsTest, GetShapeString) { + EXPECT_EQ(GetShapeString({2, 3, 5}), "(2,3,5)"); + EXPECT_EQ(GetShapeString({}), "()"); + EXPECT_EQ(GetShapeString({3}), "(3)"); +} + +} diff --git a/debug/accuracy_tools/msprobe/test/cpp/test_environ.cpp b/debug/accuracy_tools/msprobe/test/cpp/test_environ.cpp new file mode 100644 index 0000000000000000000000000000000000000000..94c830227ae58637642a189f36ade78de9a2a75c --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/test_environ.cpp @@ -0,0 +1,28 @@ +#include +#include + +#include "include/test_utils.hpp" +#include "base/DebuggerConfig.hpp" +#include "base/Environment.hpp" + +using namespace MindStudioDebugger; +using namespace MindStudioDebugger::Environment; + +namespace MsProbeTest { + +TEST(EnvironmentTest, TestRankId) { + DebuggerConfig::GetInstance().Reset(); + EXPECT_EQ(GetRankID(), -1); + DebuggerConfig::GetInstance().LoadConfig("MindSpore", CONFIG_EXAMPLE); + EXPECT_EQ(GetRankID(), -1); + setenv("RANK_ID", "xxxx", 1); + EXPECT_EQ(GetRankID(), -1); + setenv("RANK_ID", "-5", 1); + EXPECT_EQ(GetRankID(), -1); + setenv("RANK_ID", "2", 1); + EXPECT_EQ(GetRankID(), 2); + + DebuggerConfig::GetInstance().Reset(); +} + +} diff --git a/debug/accuracy_tools/msprobe/test/cpp/test_file_operation.cpp b/debug/accuracy_tools/msprobe/test/cpp/test_file_operation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2886126e9f568fba6b8ce3eabd752653d4493108 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/test_file_operation.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include + +#include "test_utils.hpp" +#include "utils/DataUtils.hpp" +#include "utils/FileOperation.hpp" + +using namespace MindStudioDebugger; +using namespace MindStudioDebugger::FileOperation; + +namespace MsProbeTest { + +TEST(FileOperationTest, TestDumpJson) { + std::string testPath = "./test.json"; + nlohmann::json testJson = {{"key", "value"}}; + auto result = DumpJson(testPath, testJson); + EXPECT_EQ(result, DebuggerErrno::OK); + + std::ifstream ifs(testPath); + std::string fileContent((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); + ifs.close(); + EXPECT_EQ(fileContent, testJson.dump()); + remove(testPath.c_str()); +} + +TEST(FileOperationTest, TestDumpNpy) { + std::string testPath = "./test.npy"; + std::vector int8Data = {0, 1, 2, 3, 4, 5}; + auto result = DumpNpy(testPath, int8Data.data(), int8Data.size() * sizeof(uint8_t), DataUtils::DataType::DT_UINT8, + {2, 3}); + EXPECT_EQ(result, DebuggerErrno::OK); + std::string content = TEST_ExecShellCommand("python -c \'import numpy; print(numpy.load(\"./test.npy\"))\'"); + EXPECT_EQ(content, "[[0 1 2]\n [3 4 5]]\n"); + remove(testPath.c_str()); + + std::vector fp32Data = {0.1f, 1.2f, 2.3f, 3.4f, 4.5f, 5.6f, 6.7f, 7.8f}; + result = DumpNpy(testPath, reinterpret_cast(fp32Data.data()), fp32Data.size() * sizeof(float), + DataUtils::DataType::DT_FLOAT, {2, 2, 2}); + EXPECT_EQ(result, DebuggerErrno::OK); + content = TEST_ExecShellCommand("python -c \'import numpy; print(numpy.load(\"./test.npy\"))\'"); + EXPECT_EQ(content, "[[[0.1 1.2]\n [2.3 3.4]]\n\n [[4.5 5.6]\n [6.7 7.8]]]\n"); + remove(testPath.c_str()); +} + +} diff --git a/debug/accuracy_tools/msprobe/test/cpp/test_file_utils.cpp b/debug/accuracy_tools/msprobe/test/cpp/test_file_utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..03449f761be0c8548021218581f4cbff12d4e07d --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/test_file_utils.cpp @@ -0,0 +1,391 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_utils.hpp" +#include "utils/FileUtils.hpp" + +using namespace MindStudioDebugger; +using namespace MindStudioDebugger::FileUtils; + +namespace MsProbeTest { + +class FileUtilsTest : public ::testing::Test { +protected: + void SetUp() override { + // 创建目录 + ASSERT_EQ(mkdir(testDir.c_str(), 0750), 0); + ASSERT_EQ(mkdir(testDirSub.c_str(), 0750), 0); + // 创建文件 + std::ofstream file(testRegularFile); + file.close(); + // 创建符号链接 + ASSERT_EQ(symlink(GetAbsPath(testRegularFile).c_str(), testLink.c_str()), 0); + ASSERT_EQ(mkfifo(testFifo.c_str(), 0640), 0); + } + + void TearDown() override { + // 删除测试目录和文件 + TEST_ExecShellCommand("rm -rf " + testDir); + } + + const std::string testDir = "./FileUtilsTest"; + const std::string testDirSub = testDir + "/subdir"; + const std::string testRegularFile = testDir + "/RegularFile.txt"; + const std::string testNotExistsFile = testDir + "/NotExistsFile.txt"; + const std::string testLink = testDir + "/testlink"; + const std::string testFifo = testDir + "/testfifo"; +}; + +TEST_F(FileUtilsTest, TestIsPathExist) +{ + EXPECT_TRUE(IsPathExist("/")); + EXPECT_TRUE(IsPathExist(".")); + EXPECT_TRUE(IsPathExist(testRegularFile)); + EXPECT_FALSE(IsPathExist(testNotExistsFile)); +} + +TEST_F(FileUtilsTest, TestGetAbsPath) +{ + std::string pwd = trim(TEST_ExecShellCommand("pwd")); + EXPECT_EQ(pwd, GetAbsPath(".")); + EXPECT_EQ(pwd + "/testpath", GetAbsPath("./testpath")); + EXPECT_EQ(pwd + "/testpath", GetAbsPath("./testpath/")); + EXPECT_EQ(pwd + "/testpath", GetAbsPath("./subdir/../testpath")); + EXPECT_EQ(pwd + "/testpath", GetAbsPath("subdir/subdir/.././../testpath")); + EXPECT_EQ(pwd + "/subdir/testpath", GetAbsPath("./subdir/.././/subdir/testpath")); +} + +TEST_F(FileUtilsTest, TestIsDir) +{ + EXPECT_TRUE(IsDir("/")); + EXPECT_TRUE(IsDir("./")); + EXPECT_TRUE(IsDir(testDirSub)); + EXPECT_FALSE(IsDir(testRegularFile)); + EXPECT_FALSE(IsDir(testFifo)); +} + +TEST_F(FileUtilsTest, TestIsRegularFile) +{ + EXPECT_TRUE(IsRegularFile(testRegularFile)); + EXPECT_FALSE(IsRegularFile(testDirSub)); + EXPECT_TRUE(IsRegularFile(testLink)); + EXPECT_FALSE(IsRegularFile(testFifo)); + EXPECT_FALSE(IsRegularFile(testNotExistsFile)); +} + +TEST_F(FileUtilsTest, TestIsFileSymbolLink) +{ + EXPECT_TRUE(IsFileSymbolLink(testLink)); + EXPECT_FALSE(IsFileSymbolLink(testDirSub)); + EXPECT_FALSE(IsFileSymbolLink(testNotExistsFile)); + EXPECT_FALSE(IsFileSymbolLink(testRegularFile)); + EXPECT_FALSE(IsFileSymbolLink(testFifo)); +} + +TEST_F(FileUtilsTest, TestIsPathCharactersValid) +{ + std::string validPath = "/tmp/FileUtilsTest/testfile.txt"; + std::string invalidPath1 = "/tmp/FileUtilsTest/<>:|?*\""; + std::string invalidPath2 = " /tmp/FileUtilsTest/testfile.txt"; + EXPECT_TRUE(IsPathCharactersValid("123456789")); + EXPECT_TRUE(IsPathCharactersValid(validPath)); + EXPECT_FALSE(IsPathCharactersValid("")); + EXPECT_FALSE(IsPathCharactersValid(invalidPath1)); + EXPECT_FALSE(IsPathCharactersValid(invalidPath2)); +} + +TEST_F(FileUtilsTest, TestIsFileReadable) +{ + TEST_ExecShellCommand("chmod -r " + testRegularFile); + EXPECT_FALSE(IsFileReadable(testRegularFile)); + TEST_ExecShellCommand("chmod +r " + testRegularFile); + EXPECT_TRUE(IsFileReadable(testRegularFile)); + TEST_ExecShellCommand("chmod -r " + testDirSub); + EXPECT_FALSE(IsFileReadable(testDirSub)); + TEST_ExecShellCommand("chmod +r " + testDirSub); + EXPECT_TRUE(IsFileReadable(testDirSub)); +} + +TEST_F(FileUtilsTest, TestIsFileWritable) +{ + TEST_ExecShellCommand("chmod -w " + testRegularFile); + EXPECT_FALSE(IsFileWritable(testRegularFile)); + TEST_ExecShellCommand("chmod +w " + testRegularFile); + EXPECT_TRUE(IsFileWritable(testRegularFile)); + TEST_ExecShellCommand("chmod -w " + testDirSub); + EXPECT_FALSE(IsFileWritable(testDirSub)); + TEST_ExecShellCommand("chmod +w " + testDirSub); + EXPECT_TRUE(IsFileWritable(testDirSub)); +} + +TEST_F(FileUtilsTest, TestIsFileExecutable) +{ + TEST_ExecShellCommand("chmod -x " + testRegularFile); + EXPECT_FALSE(IsFileExecutable(testRegularFile)); + TEST_ExecShellCommand("chmod +x " + testRegularFile); + EXPECT_TRUE(IsFileExecutable(testRegularFile)); + TEST_ExecShellCommand("chmod -x " + testDirSub); + EXPECT_FALSE(IsFileExecutable(testDirSub)); + TEST_ExecShellCommand("chmod +x " + testDirSub); + EXPECT_TRUE(IsFileExecutable(testDirSub)); +} + +TEST_F(FileUtilsTest, TestIsDirReadable) +{ + EXPECT_TRUE("."); + EXPECT_TRUE(IsDirReadable(testDirSub)); + TEST_ExecShellCommand("chmod 100 " + testDirSub); + EXPECT_FALSE(IsDirReadable(testDirSub)); + TEST_ExecShellCommand("chmod 400 " + testDirSub); + EXPECT_FALSE(IsDirReadable(testDirSub)); + TEST_ExecShellCommand("chmod 500 " + testDirSub); + EXPECT_TRUE(IsDirReadable(testDirSub)); +} + +TEST_F(FileUtilsTest, TestGetParentDir) +{ + EXPECT_EQ("/tmp/FileUtilsTest", GetParentDir("/tmp/FileUtilsTest/dir")); + EXPECT_EQ("/tmp/FileUtilsTest", GetParentDir("/tmp/FileUtilsTest/")); + EXPECT_EQ("./FileUtilsTest", GetParentDir("./FileUtilsTest/testfile.txt")); + EXPECT_EQ(".", GetParentDir("testfile.txt")); + EXPECT_EQ(".", GetParentDir("")); +} + +TEST_F(FileUtilsTest, TestGetFileName) +{ + EXPECT_EQ("dir", GetFileName("/tmp/FileUtilsTest/dir")); + EXPECT_EQ("", GetFileName("/tmp/FileUtilsTest/")); + EXPECT_EQ("testfile.txt", GetFileName("./FileUtilsTest/testfile.txt")); + EXPECT_EQ("testfile.txt", GetFileName("testfile.txt")); + EXPECT_EQ("", GetFileName("")); +} + +TEST_F(FileUtilsTest, TestGetFileBaseName) +{ + EXPECT_EQ("dir", GetFileBaseName("/tmp/FileUtilsTest/dir")); + EXPECT_EQ("", GetFileBaseName("/tmp/FileUtilsTest/")); + EXPECT_EQ("testfile", GetFileBaseName("./FileUtilsTest/testfile.txt")); + EXPECT_EQ("testfile", GetFileBaseName("testfile.txt")); + EXPECT_EQ("testfile", GetFileBaseName("testfile")); +} + +TEST_F(FileUtilsTest, TestGetFileSuffix) +{ + EXPECT_EQ("", GetFileSuffix("/tmp/FileUtilsTest/dir")); + EXPECT_EQ("", GetFileSuffix("/tmp/FileUtilsTest/")); + EXPECT_EQ("txt", GetFileSuffix("./FileUtilsTest/testfile.txt")); + EXPECT_EQ("txt", GetFileSuffix("testfile.txt")); + EXPECT_EQ("", GetFileSuffix("testfile")); + EXPECT_EQ("", GetFileSuffix("testfile.")); +} + +TEST_F(FileUtilsTest, TestCheckFileRWX) +{ + TEST_ExecShellCommand("chmod 640 " + testRegularFile); + EXPECT_TRUE(CheckFileRWX(testRegularFile, "rw")); + EXPECT_FALSE(CheckFileRWX(testRegularFile, "rx")); + TEST_ExecShellCommand("chmod 750 " + testDirSub); + EXPECT_TRUE(CheckFileRWX(testDirSub, "rwx")); +} + +TEST_F(FileUtilsTest, TestIsPathLengthLegal) +{ + std::string maxFile = std::string(FILE_NAME_LENGTH_MAX, 'a'); + std::string longFile = std::string(FILE_NAME_LENGTH_MAX + 1, 'a'); + std::string maxPath(FULL_PATH_LENGTH_MAX, '/'); + std::string longPath = maxPath + "/"; + EXPECT_TRUE(IsPathLengthLegal(maxFile)); + EXPECT_TRUE(IsPathLengthLegal(maxPath)); + EXPECT_FALSE(IsPathLengthLegal(longFile)); + EXPECT_FALSE(IsPathLengthLegal(longPath)); + EXPECT_FALSE(IsPathLengthLegal("")); +} + +TEST_F(FileUtilsTest, TestIsPathDepthValid) +{ + EXPECT_TRUE(IsPathDepthValid("")); + EXPECT_TRUE(IsPathDepthValid(std::string(PATH_DEPTH_MAX, pathSeparator))); + EXPECT_FALSE(IsPathDepthValid(std::string(PATH_DEPTH_MAX + 1, pathSeparator))); +} + +TEST_F(FileUtilsTest, TestIsFileOwner) +{ + EXPECT_TRUE(IsFileOwner(testRegularFile)); + EXPECT_TRUE(IsFileOwner(testDirSub)); + EXPECT_FALSE(IsFileOwner("/")); +} + +TEST_F(FileUtilsTest, TestDeleteFile) +{ + ASSERT_TRUE(IsPathExist(testRegularFile)); + EXPECT_EQ(DeleteFile(testLink), DebuggerErrno::ERROR_NOT_ALLOW_SOFTLINK); + EXPECT_EQ(DeleteFile(testRegularFile), DebuggerErrno::OK); + EXPECT_FALSE(IsPathExist(testRegularFile)); + EXPECT_EQ(DeleteFile(testRegularFile), DebuggerErrno::OK); + EXPECT_EQ(DeleteFile(testFifo), DebuggerErrno::OK); + EXPECT_EQ(DeleteFile(testDirSub), DebuggerErrno::OK); + EXPECT_EQ(DeleteFile(testDir), DebuggerErrno::ERROR_SYSCALL_FAILED); + EXPECT_EQ(DeleteFile(testLink), DebuggerErrno::OK); +} + +TEST_F(FileUtilsTest, TestDeleteDir) +{ + ASSERT_TRUE(IsPathExist(testDirSub)); + EXPECT_EQ(DeleteDir(testDirSub), DebuggerErrno::OK); + EXPECT_FALSE(IsPathExist(testDirSub)); + EXPECT_EQ(DeleteDir(testDirSub), DebuggerErrno::OK); + std::string subSubDir = testDirSub + "/subdir"; + std::string subSubFile = testDirSub + "/subfile"; + TEST_ExecShellCommand("mkdir " + testDirSub); + TEST_ExecShellCommand("mkdir " + subSubDir); + TEST_ExecShellCommand("touch " + subSubFile); + EXPECT_EQ(DeleteDir(testLink), DebuggerErrno::ERROR_NOT_ALLOW_SOFTLINK); + EXPECT_EQ(DeleteDir(testRegularFile), DebuggerErrno::ERROR_SYSCALL_FAILED); + EXPECT_EQ(DeleteDir(testDirSub), DebuggerErrno::ERROR_SYSCALL_FAILED); + EXPECT_EQ(DeleteDir(testDirSub, true), DebuggerErrno::OK); + EXPECT_FALSE(IsPathExist(testDirSub)); +} + +TEST_F(FileUtilsTest, TestCreateDir) +{ + ASSERT_TRUE(IsPathExist(testDirSub)); + EXPECT_EQ(CreateDir(testDirSub), DebuggerErrno::OK); + TEST_ExecShellCommand("rm -rf " + testDirSub); + ASSERT_FALSE(IsPathExist(testDirSub)); + EXPECT_EQ(CreateDir(testDirSub), DebuggerErrno::OK); + EXPECT_TRUE(IsPathExist(testDirSub)); + TEST_ExecShellCommand("rm -rf " + testDirSub); + std::string subSubDir = testDirSub + "/subdir"; + EXPECT_EQ(CreateDir(subSubDir), DebuggerErrno::ERROR_DIR_NOT_EXISTS); + EXPECT_EQ(CreateDir(subSubDir, true), DebuggerErrno::OK); + EXPECT_TRUE(IsPathExist(subSubDir)); + EXPECT_TRUE(CheckFileRWX(subSubDir, "rwx")); + TEST_ExecShellCommand("rm -rf " + testDirSub); + EXPECT_EQ(CreateDir(subSubDir, true, 0750), DebuggerErrno::OK); + EXPECT_TRUE(CheckFileRWX(testDirSub, "rwx")); + EXPECT_TRUE(CheckFileRWX(subSubDir, "rwx")); +} + +TEST_F(FileUtilsTest, TestChmod) +{ + EXPECT_EQ(Chmod(testNotExistsFile, 0640), DebuggerErrno::ERROR_FILE_NOT_EXISTS); + EXPECT_EQ(Chmod(testRegularFile, 0440), DebuggerErrno::OK); + EXPECT_FALSE(IsFileWritable(testRegularFile)); + EXPECT_EQ(Chmod(testDirSub, 0550), DebuggerErrno::OK); + EXPECT_FALSE(IsFileWritable(testDirSub)); + EXPECT_EQ(Chmod(testRegularFile, 0640), DebuggerErrno::OK); + EXPECT_TRUE(IsFileWritable(testRegularFile)); + EXPECT_EQ(Chmod(testLink, 0640), DebuggerErrno::ERROR_NOT_ALLOW_SOFTLINK); + EXPECT_EQ(Chmod("", 0640), DebuggerErrno::ERROR_FILE_NOT_EXISTS); + EXPECT_EQ(Chmod("/", 0750), DebuggerErrno::ERROR_SYSCALL_FAILED); +} + +TEST_F(FileUtilsTest, TestGetFileSize) +{ + size_t size; + EXPECT_EQ(GetFileSize(testRegularFile, size), DebuggerErrno::OK); + EXPECT_EQ(size, 0); + TEST_ExecShellCommand("echo \"123456789\" > " + testRegularFile); + EXPECT_EQ(GetFileSize(testRegularFile, size), DebuggerErrno::OK); + EXPECT_EQ(size, 10); + EXPECT_EQ(GetFileSize(testNotExistsFile, size), DebuggerErrno::ERROR_FILE_NOT_EXISTS); + EXPECT_EQ(GetFileSize(testDirSub, size), DebuggerErrno::ERROR_ILLEGAL_FILE_TYPE); + EXPECT_EQ(GetFileSize(testFifo, size), DebuggerErrno::ERROR_ILLEGAL_FILE_TYPE); +} + +TEST_F(FileUtilsTest, TestOpenFileRead) +{ + std::ifstream ifs; + EXPECT_EQ(OpenFile(testNotExistsFile, ifs), DebuggerErrno::ERROR_FILE_NOT_EXISTS); + TEST_ExecShellCommand("chmod -r " + testRegularFile); + EXPECT_EQ(OpenFile(testRegularFile, ifs), DebuggerErrno::ERROR_PERMISSION_DENINED); + TEST_ExecShellCommand("chmod +r " + testRegularFile); + EXPECT_EQ(OpenFile(testLink, ifs), DebuggerErrno::ERROR_NOT_ALLOW_SOFTLINK); + TEST_ExecShellCommand("echo \"123456789\" > " + testRegularFile); + ASSERT_EQ(OpenFile(testRegularFile, ifs), DebuggerErrno::OK); + ASSERT_TRUE(ifs.is_open()); + std::string content((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); + EXPECT_EQ(content, "123456789\n"); + ifs.close(); +} + +TEST_F(FileUtilsTest, TestOpenFileWrite) +{ + std::ofstream ofs; + ASSERT_EQ(OpenFile(testRegularFile, ofs), DebuggerErrno::OK); + ofs << "123456789"; + ofs.close(); + std::ifstream ifs(testRegularFile, std::ios::in); + std::string content((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); + ifs.close(); + EXPECT_EQ(content, "123456789"); +} + +TEST_F(FileUtilsTest, TestCheckFileSuffixAndSize) +{ + EXPECT_EQ(CheckFileSuffixAndSize(testRegularFile, FileType::COMMON), DebuggerErrno::OK); + EXPECT_EQ(CheckFileSuffixAndSize(testRegularFile, FileType::JSON), DebuggerErrno::ERROR_UNKNOWN_FILE_SUFFIX); + std::string sparseKpl = testDir + "/test.kpl"; + std::string sparseNpy = testDir + "/test.npy"; + std::string sparseJson = testDir + "/test.json"; + std::string sparsePt = testDir + "/test.pt"; + std::string sparseCsv = testDir + "/test.csv"; + std::string sparseYaml = testDir + "/test.yaml"; + TEST_ExecShellCommand("truncate -s 1G " + sparseCsv); + EXPECT_EQ(CheckFileSuffixAndSize(sparseCsv, FileType::CSV), DebuggerErrno::OK); + TEST_ExecShellCommand("rm " + sparseCsv); + TEST_ExecShellCommand("truncate -s 1025M " + sparseCsv); + EXPECT_EQ(CheckFileSuffixAndSize(sparseCsv, FileType::CSV), DebuggerErrno::ERROR_FILE_TOO_LARGE); + TEST_ExecShellCommand("truncate -s 1025M " + sparseKpl); + EXPECT_EQ(CheckFileSuffixAndSize(sparseKpl, FileType::PKL), DebuggerErrno::ERROR_FILE_TOO_LARGE); + TEST_ExecShellCommand("truncate -s 11G " + sparseNpy); + EXPECT_EQ(CheckFileSuffixAndSize(sparseNpy, FileType::NUMPY), DebuggerErrno::ERROR_FILE_TOO_LARGE); + TEST_ExecShellCommand("truncate -s 1025M " + sparseJson); + EXPECT_EQ(CheckFileSuffixAndSize(sparseJson, FileType::JSON), DebuggerErrno::ERROR_FILE_TOO_LARGE); + TEST_ExecShellCommand("truncate -s 11G " + sparsePt); + EXPECT_EQ(CheckFileSuffixAndSize(sparsePt, FileType::PT), DebuggerErrno::ERROR_FILE_TOO_LARGE); + TEST_ExecShellCommand("truncate -s 10241K " + sparseYaml); + EXPECT_EQ(CheckFileSuffixAndSize(sparseYaml, FileType::YAML), DebuggerErrno::ERROR_FILE_TOO_LARGE); +} + +TEST_F(FileUtilsTest, TestCheckDirCommon) +{ + EXPECT_EQ(CheckDirCommon(""), DebuggerErrno::ERROR_CANNOT_PARSE_PATH); + EXPECT_EQ(CheckDirCommon(testNotExistsFile), DebuggerErrno::ERROR_FILE_NOT_EXISTS); + EXPECT_EQ(CheckDirCommon(testRegularFile), DebuggerErrno::ERROR_ILLEGAL_FILE_TYPE); + std::string linkdir = testDir + "/linkdir"; + TEST_ExecShellCommand("ln -s " + GetAbsPath(testDirSub) + " " + linkdir); + EXPECT_EQ(CheckDirCommon(linkdir), DebuggerErrno::ERROR_NOT_ALLOW_SOFTLINK); + EXPECT_EQ(CheckDirCommon(testDirSub), DebuggerErrno::OK); + TEST_ExecShellCommand("chmod -r " + testDirSub); + EXPECT_EQ(CheckDirCommon(testDirSub), DebuggerErrno::ERROR_PERMISSION_DENINED); +} + +TEST_F(FileUtilsTest, TestCheckFileBeforeRead) +{ + EXPECT_EQ(CheckFileBeforeRead(""), DebuggerErrno::ERROR_CANNOT_PARSE_PATH); + EXPECT_EQ(CheckFileBeforeRead(testNotExistsFile), DebuggerErrno::ERROR_FILE_NOT_EXISTS); + EXPECT_EQ(CheckFileBeforeRead(testLink), DebuggerErrno::ERROR_NOT_ALLOW_SOFTLINK); + EXPECT_EQ(CheckFileBeforeRead(testRegularFile), DebuggerErrno::OK); + TEST_ExecShellCommand("chmod -r " + testRegularFile); + EXPECT_EQ(CheckFileBeforeRead(testRegularFile), DebuggerErrno::ERROR_PERMISSION_DENINED); +} + +TEST_F(FileUtilsTest, TestCheckFileBeforeCreateOrWrite) +{ + EXPECT_EQ(CheckFileBeforeCreateOrWrite(""), DebuggerErrno::ERROR_CANNOT_PARSE_PATH); + EXPECT_EQ(CheckFileBeforeCreateOrWrite(testNotExistsFile), DebuggerErrno::OK); + EXPECT_EQ(CheckFileBeforeCreateOrWrite(testRegularFile), DebuggerErrno::ERROR_FILE_ALREADY_EXISTS); + EXPECT_EQ(CheckFileBeforeCreateOrWrite(testRegularFile, true), DebuggerErrno::OK); + TEST_ExecShellCommand("chmod -w " + testRegularFile); + EXPECT_EQ(CheckFileBeforeCreateOrWrite(testRegularFile, true), DebuggerErrno::ERROR_PERMISSION_DENINED); + EXPECT_EQ(CheckFileBeforeCreateOrWrite("/", true), DebuggerErrno::ERROR_PERMISSION_DENINED); +} + +} diff --git a/debug/accuracy_tools/msprobe/test/cpp/test_log.cpp b/debug/accuracy_tools/msprobe/test/cpp/test_log.cpp new file mode 100644 index 0000000000000000000000000000000000000000..254b54359a50166e1d893c5b936eb220ee0b2a73 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/test_log.cpp @@ -0,0 +1,50 @@ +#include + +#include "gtest/gtest.h" +#include "test_utils.hpp" +#include "base/ErrorInfos.hpp" + +using namespace MindStudioDebugger; + +namespace MsProbeTest { + +TEST(ErrorInfoTest, TestLog) +{ + std::string testDir = "./testdir"; + ASSERT_EQ(mkdir(testDir.c_str(), 0750), 0); + ErrorInfosManager::SetLogPath(testDir + "/logfile1.log"); + LOG_CRITICAL(DebuggerErrno::ERROR_DIR_NOT_EXISTS, "Critical log content."); + std::ifstream ifs1(testDir + "/logfile1.log", std::ios::in); + ASSERT_TRUE(ifs1.is_open()); + std::string content1((std::istreambuf_iterator(ifs1)), std::istreambuf_iterator()); + ifs1.close(); + EXPECT_EQ(content1, "[CRITICAL][DIR_NOT_EXISTS]Critical log content.\n"); + LOG_ERROR(DebuggerErrno::ERROR_INVALID_OPERATION, "Error log content."); + ifs1.open(testDir + "/logfile1.log"); + ASSERT_TRUE(ifs1.is_open()); + std::string content2((std::istreambuf_iterator(ifs1)), std::istreambuf_iterator()); + EXPECT_EQ(content2, + "[CRITICAL][DIR_NOT_EXISTS]Critical log content.\n[ERROR][INVALID_OPERATION]Error log content.\n"); + + ErrorInfosManager::SetLogPath(testDir + "/logfile2.log"); + LOG_WARNING(DebuggerErrno::ERROR_SYSCALL_FAILED, "Warning log content."); + std::ifstream ifs2(testDir + "/logfile2.log", std::ios::in); + ASSERT_TRUE(ifs2.is_open()); + std::string content3((std::istreambuf_iterator(ifs2)), std::istreambuf_iterator()); + ifs2.close(); + EXPECT_EQ(content3, "[WARNING][SYSCALL_FAILED]Warning log content.\n"); + + ErrorInfosManager::SetLogPath(testDir + "/logfile3.log"); + LOG_INFO("Info log content."); + LOG_DEBUG("Debug log content."); + std::ifstream ifs3(testDir + "/logfile3.log", std::ios::in); + ASSERT_TRUE(ifs3.is_open()); + std::string content4((std::istreambuf_iterator(ifs3)), std::istreambuf_iterator()); + ifs3.close(); + EXPECT_EQ(content4, "[INFO]Info log content.\n"); + TEST_ExecShellCommand("rm -rf " + testDir); + + ErrorInfosManager::SetLogPath(""); +} + +} diff --git a/debug/accuracy_tools/msprobe/test/cpp/test_main.cpp b/debug/accuracy_tools/msprobe/test/cpp/test_main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..08fb83905205f05c0710aec3d0bdaed3c8bdd54f --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/test_main.cpp @@ -0,0 +1,7 @@ +#include "gtest/gtest.h" + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/debug/accuracy_tools/msprobe/test/cpp/test_math_utils.cpp b/debug/accuracy_tools/msprobe/test/cpp/test_math_utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3b23e9c879c431ef7457990ba774aa0dc1321b45 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/test_math_utils.cpp @@ -0,0 +1,100 @@ +#include +#include +#include +#include +#include +#include "utils/MathUtils.hpp" + +using namespace MindStudioDebugger; +using namespace MindStudioDebugger::MathUtils; + +namespace MsProbeTest { + +TEST(MathUtilsTest, TestRandom) +{ + for (uint32_t i = 0; i < 5; i++) { + float result = Random(); + EXPECT_GE(result, 0.0f); + EXPECT_LE(result, 1.0f); + } + for (uint32_t i = 0; i < 5; i++) { + float floor = static_cast(i * 5) - 10.0f; + float ceil = static_cast(i * 10); + float result = Random(floor, ceil); + EXPECT_GE(result, floor); + EXPECT_LE(result, ceil); + } +} + +TEST(MathUtilsTest, TestRandomInt) +{ + for (uint32_t i = 0; i < 5; i++) { + int32_t floor = static_cast(i * 5) - 10; + int32_t ceil = static_cast(i * 10); + int32_t result = RandomInt(floor, ceil); + EXPECT_GE(result, floor); + EXPECT_LT(result, ceil); + } +} + +TEST(MathUtilsTest, TestRandomString) +{ + uint32_t len = 16; + std::string result = RandomString(len); + EXPECT_EQ(result.length(), len); + for (char c : result) { + EXPECT_TRUE((c >= ' ' && c <= '~')); + } + + result = RandomString(len, 'a', 'f'); + EXPECT_EQ(result.length(), len); + for (char c : result) { + EXPECT_TRUE(c >= 'a' && c <= 'f'); + } +} + +TEST(MathUtilsTest, TestCalculateMD5) +{ + const uint8_t data[] = "Hello, world!"; + std::string result = CalculateMD5(data, sizeof(data) - 1); + EXPECT_EQ(result, "6cd3556deb0da54bca060b4c39479839"); +} + +TEST(MathUtilsTest, TestGcd) +{ + EXPECT_EQ(Gcd(10, 5), 5); + EXPECT_EQ(Gcd(15, 5), 5); + EXPECT_EQ(Gcd(0, 5), 0); + EXPECT_EQ(Gcd(5, 0), 0); + EXPECT_EQ(Gcd(0, 0), 0); + EXPECT_EQ(Gcd(1, 1), 1); +} + +TEST(MathUtilsTest, TestLcm) +{ + EXPECT_EQ(Lcm(10, 5), 10); + EXPECT_EQ(Lcm(15, 5), 15); + EXPECT_EQ(Lcm(0, 5), 0); + EXPECT_EQ(Lcm(5, 0), 0); + EXPECT_EQ(Lcm(0, 0), 0); + EXPECT_EQ(Lcm(1, 1), 1); +} + +TEST(MathUtilsTest, TestDivCeil) +{ + EXPECT_EQ(DivCeil(10, 5), 2); + EXPECT_EQ(DivCeil(10, 3), 4); + EXPECT_EQ(DivCeil(10, 1), 10); + EXPECT_EQ(DivCeil(0, 5), 0); + EXPECT_EQ(DivCeil(0, 0), 0); +} + +TEST(MathUtilsTest, TestAlignCeil) +{ + EXPECT_EQ(AlignCeil(10, 5), 10); + EXPECT_EQ(AlignCeil(7, 5), 10); + EXPECT_EQ(AlignCeil(0, 5), 0); + EXPECT_EQ(AlignCeil(10, 0), 0); +} + +} diff --git a/debug/accuracy_tools/msprobe/test/cpp/test_precision_debugger.cpp b/debug/accuracy_tools/msprobe/test/cpp/test_precision_debugger.cpp new file mode 100644 index 0000000000000000000000000000000000000000..69df0c18fcc27cd0ac359262649fcc588f2e9b9f --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/cpp/test_precision_debugger.cpp @@ -0,0 +1,122 @@ +#include +#include + +#include "include/test_utils.hpp" +#include "third_party/ACL/AclApi.hpp" +#include "base/ErrorInfos.hpp" +#include "core/PrecisionDebugger.hpp" + +using namespace MindStudioDebugger; + +namespace MsProbeTest { + +class PrecisionDbgTaskStub : public PrecisionDbgTaskBase { +public: + PrecisionDbgTaskStub() = default; + ~PrecisionDbgTaskStub() = default; + std::string Name() const override {return "PrecisionDbgTaskStub";} + bool Condition(const DebuggerConfig& cfg) const override {return true;} + + void Initialize(const DebuggerConfig& cfg) {initialize_called = true;} + void OnStart() {start_called = true;} + void OnStop() {stop_called = true;} + void OnStep() {step_called = true;} + + bool initialize_called{false}; + bool start_called{false}; + bool stop_called{false}; + bool step_called{false}; +}; + +class PrecisionDbgTaskUselessStub : public PrecisionDbgTaskStub { +public: + bool Condition(const DebuggerConfig& cfg) const override {return false;} +}; + +TEST(PrecisionDebuggerTest, TestRegisterBeforeInit) { + PrecisionDebugger& debugger = PrecisionDebugger::GetInstance(); + PrecisionDbgTaskStub stub_task; + + DebuggerConfig::GetInstance().Reset(); + debugger.RegisterDebuggerTask(&stub_task); + stub_task.Register(); + + EXPECT_FALSE(debugger.IsEnable()); + EXPECT_EQ(debugger.GetCurStep(), 0); + debugger.Start(); + EXPECT_FALSE(debugger.IsEnable()); + debugger.Stop(); + debugger.Step(); + EXPECT_EQ(debugger.GetCurStep(), 0); + + EXPECT_FALSE(stub_task.initialize_called); + EXPECT_FALSE(stub_task.start_called); + EXPECT_FALSE(stub_task.stop_called); + EXPECT_FALSE(stub_task.step_called); + + debugger.UnRegisterDebuggerTask(&stub_task); + debugger.UnRegisterDebuggerTask(nullptr); +} + +TEST(PrecisionDebuggerTest, TestInit) { + PrecisionDebugger& debugger = PrecisionDebugger::GetInstance(); + MOCKER(MindStudioDebugger::AscendCLApi::LoadAclApi) + .stubs() + .then(returnValue(0)) + .expects(atLeast(1)); + + DebuggerConfig::GetInstance().Reset(); + EXPECT_FALSE(debugger.HasInitialized()); + EXPECT_NE(debugger.Initialize("", ""), 0); + EXPECT_FALSE(debugger.HasInitialized()); + CleanErrorInfoCache(); + EXPECT_EQ(debugger.Initialize("MindSpore", CONFIG_EXAMPLE), 0); + EXPECT_TRUE(debugger.HasInitialized()); + EXPECT_EQ(debugger.Initialize("MindSpore", CONFIG_EXAMPLE), 0); + EXPECT_TRUE(debugger.HasInitialized()); + + GlobalMockObject::verify(); + GlobalMockObject::reset(); +} + +TEST(PrecisionDebuggerTest, TestSubTaskDispatch) { + PrecisionDebugger& debugger = PrecisionDebugger::GetInstance(); + PrecisionDbgTaskStub stub_task1; + PrecisionDbgTaskStub stub_task2; + PrecisionDbgTaskUselessStub stub_task3; + MOCKER(MindStudioDebugger::AscendCLApi::LoadAclApi) + .stubs() + .then(returnValue(0)); + MOCKER(MindStudioDebugger::AscendCLApi::ACLAPI_aclrtSynchronizeDevice) + .stubs() + .then(returnValue(0)) + .expects(atLeast(1)); + + stub_task1.Register(); + EXPECT_EQ(debugger.Initialize("MindSpore", CONFIG_EXAMPLE), 0); + stub_task2.Register(); + stub_task3.Register(); + + EXPECT_TRUE(stub_task1.initialize_called); + EXPECT_TRUE(stub_task2.initialize_called); + EXPECT_FALSE(stub_task3.initialize_called); + EXPECT_FALSE(stub_task1.start_called); + EXPECT_FALSE(stub_task2.stop_called); + EXPECT_FALSE(stub_task3.step_called); + + debugger.Start(); + EXPECT_TRUE(stub_task1.start_called); + EXPECT_FALSE(stub_task3.start_called); + + debugger.Stop(); + EXPECT_TRUE(stub_task1.stop_called); + EXPECT_TRUE(stub_task2.stop_called); + + debugger.Step(); + EXPECT_TRUE(stub_task1.step_called); + + GlobalMockObject::verify(); + GlobalMockObject::reset(); +} + +} diff --git a/debug/accuracy_tools/msprobe/test/resources/config.json b/debug/accuracy_tools/msprobe/test/resources/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a61fd5ca83a787913413ba8aac589cb50dfd13e3 --- /dev/null +++ b/debug/accuracy_tools/msprobe/test/resources/config.json @@ -0,0 +1,50 @@ +{ + "task": "statistics", + "dump_path": "./dump_path", + "rank": [], + "step": [], + "level": "L1", + "seed": 1234, + "is_deterministic": false, + "enable_dataloader": false, + "acl_config": "", + "tensor": { + "scope": [], + "list":[], + "data_mode": ["all"], + "backward_input": [], + "file_format": "npy" + }, + "statistics": { + "scope": [], + "list":[], + "data_mode": ["all"], + "summary_mode": "statistics" + }, + "overflow_check": { + "overflow_nums": 1, + "check_mode":"all" + }, + "run_ut": { + "white_list": [], + "black_list": [], + "error_data_path": "./" + }, + "grad_probe": { + "grad_level": "L1", + "param_list": [], + "bounds": [-1, 0, 1] + }, + "free_benchmark": { + "scope": [], + "list": [], + "fuzz_device": "npu", + "pert_mode": "improve_precision", + "handler_type": "check", + "fuzz_level": "L1", + "fuzz_stage": "forward", + "if_preheat": false, + "preheat_step": 15, + "max_sample": 20 + } +} \ No newline at end of file diff --git a/debug/accuracy_tools/setup.py b/debug/accuracy_tools/setup.py index 85ab803822469682387aa99b1ac373c9680eadbe..c3fa0333a7f76fac6aa1c695575b518fedb4033b 100644 --- a/debug/accuracy_tools/setup.py +++ b/debug/accuracy_tools/setup.py @@ -16,6 +16,9 @@ __version__ = '1.1.1' +import subprocess +import platform +import sys import setuptools INSTALL_REQUIRED = [ @@ -38,13 +41,52 @@ EXCLUDE_PKGS = [ "api_accuracy_checker*", "grad_tool*", "ptdbg_ascend*", + "msprobe.ccsrc*", "msprobe.test*", + "build.sh", + "build_dependency*", + "cmake*", + "output*", + "third_party*", ] +if "--plat-name" in sys.argv or "--python-tag" in sys.argv: + raise SystemError("Specifing platforms or python version is not supported.") + +if (platform.system() != "Linux"): + raise SystemError("MsProbe is only supported on Linux platforms.") + + +mod_list_range = {"adump",} +mod_list = [] +for i in range(len(sys.argv)): + if sys.argv[i].startswith("--include-mod"): + if sys.argv[i].startswith("--include-mod="): + mod_list = sys.argv[i][len("--include-mod="):].split(',') + sys.argv.remove(sys.argv[i]) + elif i + 1 < len(sys.argv) and not sys.argv[i + 1].startswith("--"): + mod_list = sys.argv[i + 1].split(',') + sys.argv.remove(sys.argv[i + 1]) + sys.argv.remove(sys.argv[i]) + mod_list = list(set(mod_list) & mod_list_range) + break + +# 当前只有adump一个mod +if mod_list: + arch = platform.machine() + sys.argv.append("--plat-name") + sys.argv.append(f"linux_{arch}") + sys.argv.append("--python-tag") + sys.argv.append(f"cp{sys.version_info.major}{sys.version_info.minor}") + build_cmd = f"bash ./build.sh -j16 -a {arch} -v {sys.version_info.major}.{sys.version_info.minor}" + p = subprocess.run(build_cmd.split(), shell=False) + if p.returncode != 0: + raise RuntimeError(f"Failed to build source({p.returncode})") + setuptools.setup( name="mindstudio-probe", version=__version__, - description="Pytorch Ascend Probe Utils", + description="Ascend Probe Utils", long_description="MindStudio-Probe is a set of tools for diagnosing and improving model accuracy on Ascend NPU, " "including API acc checker, ptdbg, grad tool etc.", url="https://gitee.com/ascend/mstt/tree/master/debug/accuracy_tools/msprobe", @@ -59,6 +101,7 @@ setuptools.setup( 'Intended Audience :: Education', 'Intended Audience :: Science/Research', 'Programming Language :: Python :: 3', + 'Programming Language :: C++', 'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering :: Mathematics', 'Topic :: Scientific/Engineering :: Artificial Intelligence',