From 558ee45a36d276f935a93ed49d972d5dc4bb4fe4 Mon Sep 17 00:00:00 2001 From: caishangqiu Date: Thu, 28 Aug 2025 12:35:07 +0800 Subject: [PATCH] refactoring project files --- .clang-format | 36 +++++++++ CMakeLists.txt | 28 +++++++ README.md | 103 +++++-------------------- __init__.py | 13 ++++ build.sh | 83 ++++++++++++++++++++ cmake/Findcpython.cmake | 14 ++++ cmake/Findgtest.cmake | 45 +++++++++++ cmake/Findmockcpp.cmake | 42 ++++++++++ cmake/config.ini | 7 ++ cmake/download_opensource.sh | 110 +++++++++++++++++++++++++++ cmake/utils.cmake | 46 +++++++++++ docs/0001.capability_matrix.md | 0 docs/0002.installation.md | 55 ++++++++++++++ docs/0003.config_introduction.md | 0 docs/0004.config_examples.md | 0 docs/0101.dump_offline_model.md | 0 docs/0102.dump_mindie_llm_for_atb.md | 0 docs/README.md | 29 +++++++ pyproject.toml | 11 +++ requirements/requirements.txt | 7 ++ requirements/requirements_tf.txt | 10 +++ setup.py | 90 ++++++++++++++++++++++ third_party/.keep | 0 23 files changed, 645 insertions(+), 84 deletions(-) create mode 100644 .clang-format create mode 100644 CMakeLists.txt create mode 100644 __init__.py create mode 100644 build.sh create mode 100644 cmake/Findcpython.cmake create mode 100644 cmake/Findgtest.cmake create mode 100644 cmake/Findmockcpp.cmake create mode 100644 cmake/config.ini create mode 100644 cmake/download_opensource.sh create mode 100644 cmake/utils.cmake create mode 100644 docs/0001.capability_matrix.md create mode 100644 docs/0002.installation.md create mode 100644 docs/0003.config_introduction.md create mode 100644 docs/0004.config_examples.md create mode 100644 docs/0101.dump_offline_model.md create mode 100644 docs/0102.dump_mindie_llm_for_atb.md create mode 100644 docs/README.md create mode 100644 pyproject.toml create mode 100644 requirements/requirements.txt create mode 100644 requirements/requirements_tf.txt create mode 100644 setup.py create mode 100644 third_party/.keep diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..72ffb18052 --- /dev/null +++ b/.clang-format @@ -0,0 +1,36 @@ +BasedOnStyle: LLVM + +IndentWidth: 4 +TabWidth: 4 +UseTab: Never + +ColumnLimit: 120 +BreakBeforeBraces: Custom + +BraceWrapping: + AfterNamespace: false + AfterFunction: true + AfterClass: false + AfterControlStatement: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyRecord: false + SplitEmptyFunction: false + AfterEnum: true + +AccessModifierOffset: -4 +IndentCaseLabels: true +SpaceBeforeParens: ControlStatements + +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortBlocksOnASingleLine: false + +BinPackParameters: false +BinPackArguments: false + +NamespaceIndentation: All +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000..bc9134a369 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.14) +project(msprobe) + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED OFF) +set(CMAKE_CXX_EXTENSIONS OFF) + +execute_process( + COMMAND uname -m + OUTPUT_VARIABLE machine_arch + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +if (DEFINED ARCH_TYPE AND NOT "${ARCH_TYPE}" STREQUAL "${machine_arch}") + message(FATAL_ERROR + "Cross-compilation is not supported currently. (compile ${ARCH_TYPE} on ${machine_arch})" + ) +endif() + +set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") +set(ENV{PROJECT_ROOT_PATH} "${CMAKE_SOURCE_DIR}") + +include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake") +add_subdirectory(msprobe) + +if (DEFINED BUILD_TEST_CASE AND "${BUILD_TEST_CASE}" STREQUAL "True") + add_subdirectory(test/UT) +endif() diff --git a/README.md b/README.md index 7c28869d37..3b1683324a 100644 --- a/README.md +++ b/README.md @@ -1,84 +1,19 @@ -# 🚨 重要通知 - -**1. Ascend Training Tools 更名为 MindStudio Training Tools (mstt)。** - -**2. 本代码仓 URL 变更为 [https://gitee.com/ascend/mstt](https://gitee.com/ascend/mstt),原 URL 仍然可用(2024.07.04 )。** - -**3. 不再维护:[api_accuracy_checker](./debug/accuracy_tools/api_accuracy_checker/) (2024.09.30下线)和[ ptdbg_ascend](./debug/accuracy_tools/ptdbg_ascend/) -(2024.09.30下线)** - -**相关目录 mstt/debug/accuracy_tools/api_accuracy_checker 和 mstt/debug/accuracy_tools/ptdbg_ascend 将于 2024.09.30 删除。新版本的预检和 ptdbg 已经合到 mstt/debug/accuracy_tools/msprobe 目录下。** - ---- - -# 🧰 MindStudio Training Tools - -![Build Status](https://img.shields.io/badge/build-passing-brightgreen) -![Commit Activity](https://img.shields.io/badge/commit%20activity-high-red) -![License: Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-blue) - -## [分析迁移工具](https://gitee.com/ascend/mstt/wikis/工具介绍/分析迁移工具/分析迁移工具介绍) - -1. [脚本分析工具](https://gitee.com/ascend/mstt/wikis/%E5%B7%A5%E5%85%B7%E4%BB%8B%E7%BB%8D/%E5%88%86%E6%9E%90%E8%BF%81%E7%A7%BB%E5%B7%A5%E5%85%B7/%E5%88%86%E6%9E%90%E5%B7%A5%E5%85%B7%E4%BD%BF%E7%94%A8%E6%8C%87%E5%AF%BC) - - 脚本分析工具可以帮助用户在执行迁移操作前,分析基于 GPU 平台的 PyTorch 训练脚本中算子、三方库套件、API 亲和性以及动态 shape 的支持情况。 - -2. [(推荐)自动迁移工具](https://gitee.com/ascend/mstt/wikis/%E5%B7%A5%E5%85%B7%E4%BB%8B%E7%BB%8D/%E5%88%86%E6%9E%90%E8%BF%81%E7%A7%BB%E5%B7%A5%E5%85%B7/%E8%87%AA%E5%8A%A8%E8%BF%81%E7%A7%BB%E5%B7%A5%E5%85%B7%E4%BD%BF%E7%94%A8%E6%8C%87%E5%AF%BC) - - 自动迁移工具只需在训练脚本中导入库代码即可完成模型脚本的迁移,使用方式简单,且修改内容少。 - -3. [脚本迁移工具](https://gitee.com/ascend/mstt/wikis/%E5%B7%A5%E5%85%B7%E4%BB%8B%E7%BB%8D/%E5%88%86%E6%9E%90%E8%BF%81%E7%A7%BB%E5%B7%A5%E5%85%B7/%E8%84%9A%E6%9C%AC%E8%BF%81%E7%A7%BB%E5%B7%A5%E5%85%B7%E4%BD%BF%E7%94%A8%E6%8C%87%E5%AF%BC) - - 脚本迁移工具通过后端命令行,将 GPU 上训练的 PyTorch 脚本迁移至 NPU 上,得到新的训练脚本用于训练。 - -4. [训推一体权重转换工具](https://gitee.com/Ascend/mstt/wikis/%E5%B7%A5%E5%85%B7%E4%BB%8B%E7%BB%8D/%E5%88%86%E6%9E%90%E8%BF%81%E7%A7%BB%E5%B7%A5%E5%85%B7/%E8%AE%AD%E6%8E%A8%E4%B8%80%E4%BD%93%E6%9D%83%E9%87%8D%E8%BD%AC%E6%8D%A2%E5%B7%A5%E5%85%B7%E4%BD%BF%E7%94%A8%E6%8C%87%E5%AF%BC) - - 训推一体权重转换工具,支持在 GPU 和 NPU 上训练好的模型转成加速推理支持的格式。 - -## [精度工具](./debug/accuracy_tools/) - -[MindStudio Probe(msprobe,MindStudio 精度调试工具)](./debug/accuracy_tools/msprobe)。 - -## [性能工具](./profiler) - -1. [compare_tools(性能比对工具)](./profiler/compare_tools) - - 提供 NPU 与 GPU 性能拆解功能以及算子、通信、内存性能的比对功能。 - -2. [cluster_analyse(集群分析工具)](./profiler/cluster_analyse) - - 提供多机多卡的集群分析能力(基于通信域的通信分析和迭代耗时分析), 当前需要配合 MindStudio Insight 的集群分析功能使用。 - -3. [advisor](./profiler/advisor) - - 将 Ascend PyTorch Profiler 或者 msprof 采集的 PyTorch 场景性能数据进行分析,并输出性能调优建议。 - -4. [bind_core](./profiler/affinity_cpu_bind) - - 绑核脚本,支持非侵入修改工程代码,实现一键式绑核功能。 - -## [Tensorboard](./plugins/tensorboard-plugins/tb_plugin) - -Tensorboard 支持 NPU 性能数据可视化插件 PyTorch Profiler TensorBoard NPU Plugin。 - -支持将 Ascend 平台采集、解析的 PyTorch Profiling 数据可视化呈现,也兼容 GPU 数据采集、解析可视化。 - -## 分支维护策略 - -1. MindStudio Training Tools 工具版本分支的维护阶段如下: - - | **状态** | **时间** | **说明** | - | ------------------- | -------- | ------------------------------------------------ | - | 计划 | 1—3 个月 | 计划特性 | - | 开发 | 3个月 | 开发特性 | - | 维护 | 6—12个月 | 合入所有已解决的问题并发布版本 | - | 无维护 | 0—3 个月 | 合入所有已解决的问题,无专职维护人员,无版本发布 | - | 生命周期终止(EOL) | N/A | 分支不再接受任何修改 | - -2. MindStudio Training Tools 分支版本号命名规则如下: - - mstt 仓每年发布 4 个版本,每个版本都将对应一个分支;以 v6.0 为例,其将对应 v6.0.RC1、v6.0.RC2、v6.0.RC3 以及 v6.0.0 四个版本,在仓库中将存在与之对应的分支。 - - | **分支** | **状态** | **发布日期** | **后续状态** | **EOL日期** | - | ------------- | -------- | ------------ | ------------------------ | ----------- | - | **v6.0.0** | 维护 | 2023.12.12 | 预计 2024.12.12 起无维护 | | +# 📖 msprobe 使用手册 + +![platform](https://img.shields.io/badge/platform-Linux-yellow) +![License: Apache 2.0](https://img.shields.io/badge/license-Apache%202.0-green) + +## 用前必看 + +使用工具前,请先浏览[工具模块简介、适用场景和当前版本局限](./docs/0001.capability_matrix.md)。 + +## ⚙️ [安装](./docs/0002.installation.md) +## 🛠️ config.json [介绍](./docs/0003.config_introduction.md) 和 [示例](./docs/0004.config_examples.md) + +## 🧰 主要功能 + +### 1 数据采集 + +[离线模型 ONNX、TensorFlow (.pb, saved model)、Ascend OM 场景](./docs/0101.dump_offline_model.md) + +[以 Ascend Transformer Boost (ATB) 为后端的 MindIE-LLM 场景](./docs/0102.dump_mindie_llm_for_atb.md) diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000..53529bc8d3 --- /dev/null +++ b/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2025-2025 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/build.sh b/build.sh new file mode 100644 index 0000000000..90b120efaa --- /dev/null +++ b/build.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +set -e + +BUILD_PATH=$(pwd) + +BUILD_ARGS=$(getopt \ + -o ha:v:j:ft \ + --long help,release,debug,arch:,python-version:,jobs:,force-rebuild,local,test-cases \ + -- "$@") +eval set -- "${BUILD_ARGS}" + +ARCH_TYPE=$(uname -m) +BUILD_TYPE=release +CONCURRENT_JOBS=16 +BUILD_TEST_CASE=False +USE_LOCAL_FIRST=False +PYTHON_VERSION="" + +HELP_DOC=$(cat << EOF +Usage: build.sh [OPTION]...\n +Build the C++ part of msprobe.\n +\n +Arguments:\n + -a, --arch Specify the schema, which generally does not need to be set up.\n + -j, --jobs Specify the number of compilation jobs(default 16).\n + -f, --force-rebuild Clean up the cache before building.\n + -t, --test-cases Build test cases.\n + --local Prioritize the use of on-premises, third-party resources as dependencies.\n + --release Build the release version(default).\n + --debug Build the debug version. + -v, --python-version Specify version of python. +EOF +) + +while true; do + case "$1" in + -h | --help) + echo -e ${HELP_DOC} + exit 0 ;; + -a | --arch) + ARCH_TYPE="$2" ; shift 2 ;; + -v | --python-version) + PYTHON_VERSION="$2" ; shift 2 ;; + --release) + BUILD_TYPE=release ; shift ;; + --debug) + BUILD_TYPE=debug ; shift ;; + -j | --jobs) + CONCURRENT_JOBS="$2" ; shift 2 ;; + --local) + USE_LOCAL_FIRST=True ; shift ;; + -f | --force-rebuild) + rm -rf "${BUILD_PATH}/lib" "${BUILD_PATH}/output" "${BUILD_PATH}/msprobe/lib/msprobe_c.so" + shift ;; + -t | --test-cases) + BUILD_TEST_CASE=True ; shift ;; + --) + shift ; break ;; + *) + echo "Unknow argument $1" + exit 1 ;; + esac +done + +BUILD_OUTPUT_PATH=${BUILD_PATH}/output/${BUILD_TYPE} + +cmake -B ${BUILD_OUTPUT_PATH} -S . -DARCH_TYPE=${ARCH_TYPE} -DBUILD_TYPE=${BUILD_TYPE} \ + -DUSE_LOCAL_FIRST=${USE_LOCAL_FIRST} -DBUILD_TEST_CASE=${BUILD_TEST_CASE} \ + -DPYTHON_VERSION=${PYTHON_VERSION} +cd ${BUILD_OUTPUT_PATH} +make -j${CONCURRENT_JOBS} + +if [[ ! -e ${BUILD_OUTPUT_PATH}/msprobe/csrc/libmsprobe_c.so ]]; then + echo "Failed to build libmsprobe_c.so." + exit 1 +fi + +if [[ ! -e ${BUILD_PATH}/msprobe/lib ]]; then + mkdir ${BUILD_PATH}/msprobe/lib +fi + +cp ${BUILD_OUTPUT_PATH}/msprobe/csrc/libmsprobe_c.so ${BUILD_PATH}/msprobe/lib/msprobe_c.so diff --git a/cmake/Findcpython.cmake b/cmake/Findcpython.cmake new file mode 100644 index 0000000000..577d42dcda --- /dev/null +++ b/cmake/Findcpython.cmake @@ -0,0 +1,14 @@ +set(PKG_NAME cpython) + +if (NOT ${PKG_NAME}_FOUND) + find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development) + + if (NOT Python3_FOUND) + message(FATAL_ERROR "${Python3} is not found.") + endif() + + set(PACKAGE_VERSION ${Python3_VERSION}) + include_directories(${Python3_INCLUDE_DIRS}) + set(${PKG_NAME}_LIBRARIES ${Python3_LIBRARIES}) + set(${PKG_NAME}_FOUND TRUE) +endif() diff --git a/cmake/Findgtest.cmake b/cmake/Findgtest.cmake new file mode 100644 index 0000000000..aa5606d311 --- /dev/null +++ b/cmake/Findgtest.cmake @@ -0,0 +1,45 @@ +set(PACKAGE_VERSION 1.12.1) + +set(PKG_NAME gtest) +set(SHA256_VALUE "81964fe578e9bd7c94dfdb09c8e4d6e6759e19967e397dbea48d1c10e45d0df2") +set(DOWNLOAD_PATH "$ENV{PROJECT_ROOT_PATH}/third_party") +set(DIR_NAME "${DOWNLOAD_PATH}/googletest-release-1.12.1") + +if (NOT ${PKG_NAME}_FOUND) + download_opensource_pkg(${PKG_NAME} + SHA256 ${SHA256_VALUE} + DOWNLOAD_PATH ${DOWNLOAD_PATH} + ) + include_directories(${DIR_NAME}/googletest/include) + include_directories(${DIR_NAME}/googlemock/include) + set(BUILD_DEPENDENCY_PATH "$ENV{PROJECT_ROOT_PATH}/build_dependency") + + execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND cmake . -DBUILD_SHARED_LIBS=ON + RESULT_VARIABLE RESULT + ) + if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build gtest. ${RESULT}") + endif() + + execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND make -j16 + RESULT_VARIABLE RESULT + ) + if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build gtest. ${RESULT}") + endif() + + file(GLOB GTEST_SO "${DIR_NAME}/lib/libgtest.so") + file(GLOB GMOCK_SO "${DIR_NAME}/lib/libgmock.so") + file(GLOB GTEST_MAIN_SO "${DIR_NAME}/lib/libgtest_main.so") + file(GLOB GMOCK_MAIN_SO "${DIR_NAME}/lib/libgmock_main.so") + + if (NOT GTEST_SO OR NOT GMOCK_SO OR NOT GTEST_MAIN_SO OR NOT GMOCK_MAIN_SO) + message(FATAL_ERROR "Failed to build gtest.") + endif() + set(${PKG_NAME}_LIBRARIES "${GTEST_SO};${GMOCK_SO};${GTEST_MAIN_SO};${GMOCK_MAIN_SO}") + set(${PKG_NAME}_FOUND TRUE) +endif() diff --git a/cmake/Findmockcpp.cmake b/cmake/Findmockcpp.cmake new file mode 100644 index 0000000000..da938b9c6e --- /dev/null +++ b/cmake/Findmockcpp.cmake @@ -0,0 +1,42 @@ +set(PACKAGE_VERSION 2.7) + +set(PKG_NAME mockcpp) +set(SHA256_VALUE "0dc7111c5be9785d0550ed3b68db7e12fd5d7802b7bc6548c52ac7b9e727fcc1") +set(DOWNLOAD_PATH "$ENV{PROJECT_ROOT_PATH}/third_party") +set(DIR_NAME "${DOWNLOAD_PATH}/mockcpp-v2.7") + +if (NOT ${PKG_NAME}_FOUND) + download_opensource_pkg(${PKG_NAME} + SHA256 ${SHA256_VALUE} + DOWNLOAD_PATH ${DOWNLOAD_PATH} + ) + include_directories(${DIR_NAME}/include) + include_directories(${DIR_NAME}/3rdparty) + + execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND cmake . + RESULT_VARIABLE RESULT + ) + if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build mockcpp. ${RESULT}") + endif() + + execute_process( + WORKING_DIRECTORY ${DIR_NAME} + COMMAND make -j16 + RESULT_VARIABLE RESULT + ) + if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build mockcpp. ${RESULT}") + endif() + + file(GLOB MOCKCPP_LIB "${DIR_NAME}/src/libmockcpp.a") + + if (NOT MOCKCPP_LIB) + message(FATAL_ERROR "Failed to build mockcpp.") + endif() + set(${PKG_NAME}_LIBRARIES "${MOCKCPP_LIB}") + set(${PKG_NAME}_FOUND TRUE) + +endif() diff --git a/cmake/config.ini b/cmake/config.ini new file mode 100644 index 0000000000..6940c5403f --- /dev/null +++ b/cmake/config.ini @@ -0,0 +1,7 @@ +[gtest] +url = https://tools.mindspore.cn/Ascend/mstt/libs/googletest/release-1.12.1.tar.gz +tag = release-1.12.1 + +[mockcpp] +url = https://tools.mindspore.cn/Ascend/mstt/libs/mockcpp/v2.7.zip +tag = v2.7 diff --git a/cmake/download_opensource.sh b/cmake/download_opensource.sh new file mode 100644 index 0000000000..ce9f13fdd5 --- /dev/null +++ b/cmake/download_opensource.sh @@ -0,0 +1,110 @@ +#!/bin/bash + +if [ "$#" -lt 2 ]; then + echo "Usage: $0 [ ] [ ]" + exit 1 +fi + +pkg_name=$1 +path=$2 + +if [ "$#" -ge 3 ]; then + sha256_value=$3 +fi +if [ "$#" -ge 4 ]; then + tag=$4 +fi + +url=$(awk -F " = " '/\['${pkg_name}'\]/{a=1}a==1&&$1~/url/{print $2;exit}' config.ini) +tag=$(awk -F " = " '/\['${pkg_name}'\]/{a=1} a==1 && $1 ~ /tag/ {print $2; exit}' config.ini) + +if [[ ! $url = https* ]]; then + echo "[ERROR] The URL of $pkg_name is illegal." + exit 1 +fi + +echo "[INFO] Start to download ${url}..." + +if [ ! -d "$path" ]; then + echo "[ERROR] The specified path does not exist: $path" + exit 1 +fi +cd ${path} + +extension=$(echo "${url}" | awk -F'[./]' '{print $NF}') +fullname="${path}/$(basename "${url}")" +if [[ "${extension}" == "gz" || "${extension}" == "zip" ]]; then + if [[ -e "${fullname}" ]]; then + echo "[INFO] Source ${fullname} already exists, skipping download." + else + echo "[INFO] Start downloading: ${url}" + curl -L -k --fail --retry 3 --connect-timeout 10 -o "${fullname}" "${url}" + if [[ $? -ne 0 ]]; then + echo "[ERROR] Download failed: ${url}" + rm -f "${fullname}" + exit 1 + fi + + filesize=$(stat -c%s "${fullname}") + if [[ "${filesize}" -lt 10240 ]]; then + echo "[ERROR] Downloaded file too small (<10KB), possible error page: ${url}" + rm -f "${fullname}" + exit 1 + fi + + if file "${fullname}" | grep -q "HTML"; then + echo "[ERROR] Downloaded file is HTML, not a zip archive." + rm -f "${fullname}" + exit 1 + fi + + echo "[INFO] Download success: ${url} (${filesize} bytes)" + fi + + if [[ ! -z "${sha256_value}" ]]; then + sha256data=$(sha256sum "${fullname}" | cut -d' ' -f1) + if [[ "${sha256data}" != "${sha256_value}" ]]; then + echo "[ERROR] SHA256 verification failed: ${url}" + echo "[ERROR] Expected: ${sha256_value}" + echo "[ERROR] Actual : ${sha256data}" + exit 1 + fi + fi + + if [[ "${extension}" == "gz" ]]; then + tar -zxvf "${fullname}" -C ./ -n > /dev/null + elif [[ "${extension}" == "zip" ]]; then + unzip -n "${fullname}" -d ./ > /dev/null + fi +elif [[ "${extension}" == "git" ]]; then + repo_dir=$(basename "${url}" .git) + + if [[ -d "${repo_dir}" ]]; then + echo "[INFO] Repository already exists: ${repo_dir}, skipping clone." + if [[ -n "${tag}" ]]; then + cd "${repo_dir}" + echo "[INFO] Checking out ${tag}..." + git fetch origin + git checkout "${tag}" || { + echo "[ERROR] Failed to checkout ${tag}" + exit 1 + } + cd - + fi + else + if [[ -n "${tag}" ]]; then + git clone --progress -b "${tag}" "${url}" + else + git clone --progress "${url}" + fi + if [[ $? -eq 0 ]]; then + echo "[INFO] Clone success: ${url}" + else + echo "[ERROR] Clone failed: ${url}" + exit 1 + fi + fi +else + echo "[ERROR] Unknown url type: ${url}" + exit 1 +fi diff --git a/cmake/utils.cmake b/cmake/utils.cmake new file mode 100644 index 0000000000..5d90991d92 --- /dev/null +++ b/cmake/utils.cmake @@ -0,0 +1,46 @@ +function(download_opensource_pkg pkg_name) + message("start to download ${pkg_name}...") + set(options) + set(oneValueArgs SHA256 GIT_TAG DOWNLOAD_PATH DIR_NAME BUILD_CMD) + set(multiValueArgs PATCHES) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if (NOT PKG_DOWNLOAD_PATH) + set(PKG_DOWNLOAD_PATH "${CMAKE_SOURCE_DIR}/../third_party") + endif() + file(MAKE_DIRECTORY ${PKG_DOWNLOAD_PATH}) + + execute_process( + WORKING_DIRECTORY $ENV{PROJECT_ROOT_PATH}/cmake + COMMAND bash download_opensource.sh ${pkg_name} ${PKG_DOWNLOAD_PATH} ${PKG_SHA256} ${PKG_GIT_TAG} + RESULT_VARIABLE RESULT + ) + if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to download ${pkg_name}(${RESULT}).") + endif() + if (PKG_BUILD_CMD) + execute_process( + COMMAND bash -c "cd ${PKG_DOWNLOAD_PATH}/${DIR_NAME};${PKG_BUILD_CMD}" + ) + endif() +endfunction() + +function(compile_protobuf_file output_path) + if (NOT PROTOC_EXECUTABLE) + message(FATAL_ERROR "You shall install protobuf first.") + endif() + + file(MAKE_DIRECTORY ${output_path}) + + foreach(file ${ARGN}) + get_filename_component(abs_file_path ${file} ABSOLUTE) + get_filename_component(file_name ${file} NAME_WE) + get_filename_component(file_dir ${abs_file_path} PATH) + file(RELATIVE_PATH rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${file_dir}) + + execute_process( + COMMAND ${PROTOC_EXECUTABLE} -I${file_dir} --cpp_out=${output_path} ${abs_file_path} + ) + + message("Compile protobuf file ${file}") + endforeach() +endfunction() diff --git a/docs/0001.capability_matrix.md b/docs/0001.capability_matrix.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/0002.installation.md b/docs/0002.installation.md new file mode 100644 index 0000000000..7a262b539d --- /dev/null +++ b/docs/0002.installation.md @@ -0,0 +1,55 @@ +# 安装 + +## 1 依赖 + +### 1.1 硬件环境 + +[昇腾产品形态说明](https://www.hiascend.com/document/detail/zh/canncommercial/80RC22/quickstart/quickstart/quickstart_18_0002.html) + +### 1.2 软件环境 + +[固件和驱动](https://www.hiascend.com/hardware/firmware-drivers/community?product=1&model=30&cann=8.2.RC1.alpha001&driver=Ascend+HDK+25.0.RC1) + +| 框架 | 是否必选 | 版本 | +| -------------------------------------------------------------------------------------------- | -------- | ----------------------------------------------------------- | +| [Python](https://www.python.org/) | 是 | 3.7 ~ 3.12 | +| [GCC](https://gcc.gnu.org/) | 是 | 需支持 C++14 标准 | +| [git](https://git-scm.com/) | 否 | 推荐稳定版本 2.34.x - 2.42.x | +| [CANN](https://www.hiascend.cn/developer/download/community/result?module=cann)*1 | 否 | 完全兼容,根据 CPU 架构和 NPU 型号选择 toolkit 和 kernel 包 | +| [PyTorch (CPU, GPU)](https://pytorch.org/) | 否 | 1.11、2.1 ~ 2.7,对应的 Python 版本最低为 3.7 | +| [PyTorch (NPU)](https://gitee.com/ascend/pytorch) | 否 | 1.11、2.1 ~ 2.7,对应的 Python 版本最低为 3.7 | +| [MindIE-LLM](https://gitee.com/ascend/MindIE-LLM)*2 | 否 | 1.0,2.0 | +| [TensorFlow](https://github.com/tensorflow/tensorflow/releases/tag/v2.6.5)*3 | 否 | 仅支持 2.6.5 版本,对应的 Python 版本为 3.7 ~ 3.9 | + +*1: **CANN** 安装参见[社区资料](https://www.hiascend.com/document/detail/zh/canncommercial/81RC1/softwareinst/instg/instg_0002.html)。 + +*2: **MindIE-LLM** 非开源,如需查看请联系该组件的华为工程师。 + +*3: **TensorFlow** 模型在 **Ascend NPU** 的迁移,还需要安装 [TF 插件](https://gitee.com/ascend/tensorflow/releases)。 + +用户可以根据使用场景自行安装适配的 Python 和其他软件包,并在使用 msprobe 前确保所依赖的框架可以正常运行。 + +## 2 安装 msprobe + +### 2.1 从源码安装 + +```sh +git clone https://gitee.com/ascend/mstt.git -b poc +cd mstt/debug/accuracy_tools_infer + +pip install setuptools wheel + +python setup.py bdist_wheel [--compat tf] +cd ./dist +pip install mindstudio_probe*.whl +``` +**注意**:`--compat` 参数非必选,默认为无,当前支持 tf。 + + + + +# 3 查看 msprobe 工具信息 + +```sh +pip show mindstudio_probe +``` diff --git a/docs/0003.config_introduction.md b/docs/0003.config_introduction.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/0004.config_examples.md b/docs/0004.config_examples.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/0101.dump_offline_model.md b/docs/0101.dump_offline_model.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/0102.dump_mindie_llm_for_atb.md b/docs/0102.dump_mindie_llm_for_atb.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000000..12c2ce3ba7 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,29 @@ +# msprobe 文档编写查阅指南 + +## 1 文档编号 + +0. 公共文档:0001 - 0099 +1. 数据采集:0101 - 0199 +2. 溢出检测:0201 - 0299 +3. 精度预检:0301 - 0399 +4. 精度比对:0401 - 0499 +5. 模型改图:0501 - 0599 +6. 状态监控:0601 - 0699 +7. 数据解析:0701 - 0799 +8. 参数检查:0801 - 0899 + +## 2 文档模板 + +```md +# 简介 + +# 接口介绍 + +# 使用示例 + +# 输出件介绍 + +# 约束 + +# 常见问题 +``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..1688f68d01 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +[tool.black] +line-length = 120 # 设置最大行长 +target-version = ['py37', 'py38', 'py39', 'py310', 'py311', 'py312'] # 兼容的 Python 版本 + +[tool.isort] +profile = "black" # 使 isort 与 Black 兼容 +line_length = 120 # 统一最大行长 +multi_line_output = 3 # 按分组方式输出多行 import 语句 +force_grid_wrap = 0 # 控制换行时的显示方式 +use_parentheses = true # 使用括号包裹长 import 语句 +combine_as_imports = true # 合并多行的 as 导入 diff --git a/requirements/requirements.txt b/requirements/requirements.txt new file mode 100644 index 0000000000..1939d52001 --- /dev/null +++ b/requirements/requirements.txt @@ -0,0 +1,7 @@ +numpy < 2.0 +protobuf >= 3.18, < 5.0 +onnx >= 1.12.0, < 2.0 +onnxruntime >= 1.10, < 2.0 +pandas >= 1.3, < 3.0 +PyYAML +tqdm diff --git a/requirements/requirements_tf.txt b/requirements/requirements_tf.txt new file mode 100644 index 0000000000..745fc5dd1a --- /dev/null +++ b/requirements/requirements_tf.txt @@ -0,0 +1,10 @@ +numpy >= 1.19.2, <= 1.21.6 +protobuf >= 3.9.2, <= 3.20.3 +scipy >= 1.5.2, <= 1.7.3 +pandas >= 1.2.0, <= 1.3.5 +decorator +sympy +attrs +psutil +PyYAML +tqdm diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000..50a8db9b08 --- /dev/null +++ b/setup.py @@ -0,0 +1,90 @@ +# Copyright (c) 2025-2025 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "8.1.1" + +import os +import sys +from platform import machine +from subprocess import run + +from setuptools import find_packages, setup + +_COMPAT_REQUIREMENTS_MAP = {"tf": "requirements_tf.txt", "default": "requirements.txt"} + + +def parse_args(): + compat_flag = None + if "--compat" not in sys.argv: + return compat_flag + index = sys.argv.index("--compat") + if index + 1 >= len(sys.argv): + raise ValueError("Missing argument for --compat.") + compat_flag = sys.argv[index + 1] + sys.argv.remove("--compat") + sys.argv.remove(compat_flag) + return compat_flag + + +def get_requirements(compat_name=None): + requirements_parent_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements") + requirements_file = _COMPAT_REQUIREMENTS_MAP.get(compat_name, _COMPAT_REQUIREMENTS_MAP["default"]) + with open(os.path.join(requirements_parent_path, requirements_file)) as f: + required_lines = f.read().splitlines() + return required_lines + + +compat = parse_args() +required = get_requirements(compat) + +build_cmd = f"bash ./build.sh -j16 -a {machine()} -v {sys.version_info.major}.{sys.version_info.minor}" +p = run(build_cmd.split(), shell=False) +if p.returncode != 0: + raise RuntimeError(f"Failed to build source({p.returncode})") + + +setup( + name="mindstudio-probe-infer", + version=__version__, + description="Ascend Probe Utils", + long_description=""" + MindStudio-Probe is a set of tools for diagnosing and improving model accuracy on Ascend NPU, + including API accuracy, args checker, grad tool etc. + """, + long_description_content_type="text/markdown", + url="https://gitee.com/ascend/mstt/tree/master/accuracy_tools/msprobe", + author="Ascend Team", + author_email="pmail_mindstudio@huawei.com", + packages=find_packages(include=["msprobe", "msprobe*"]), + package_data={"": ["LICENSE", "lib/*.so"]}, + license="Apache-2.0", + keywords=["msprobe", "pytorch", "mindspore"], + python_requires=">=3.7", + install_requires=required, + zip_safe=False, + classifiers=[ + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3", + "Programming Language :: C++", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", + ], + entry_points={"console_scripts": ["msprobe_infer=msprobe.__main__:main"]}, +) diff --git a/third_party/.keep b/third_party/.keep new file mode 100644 index 0000000000..e69de29bb2 -- Gitee