From c666ae9c00ff2f2901f32a1e382fab8a78a7d068 Mon Sep 17 00:00:00 2001 From: jiangchao_j Date: Thu, 10 Jul 2025 16:37:38 +0800 Subject: [PATCH] use google re2 to replace std regex --- debug/accuracy_tools/cmake/Findre2.cmake | 58 +++++++++++++++++++ debug/accuracy_tools/cmake/config.ini | 5 +- .../msprobe/ccsrc/CMakeLists.txt | 2 + .../msprobe/ccsrc/base/DebuggerConfig.cpp | 21 ++++--- .../msprobe/ccsrc/base/DebuggerConfig.h | 5 +- .../msprobe/ccsrc/base/Environment.cpp | 4 +- .../msprobe/core/common_config.py | 16 ++++- .../mindspore/debugger/precision_debugger.py | 3 + .../msprobe/mindspore/ms_config.py | 1 - ...347\275\221URL\350\257\264\346\230\216.md" | 1 + 10 files changed, 102 insertions(+), 14 deletions(-) create mode 100644 debug/accuracy_tools/cmake/Findre2.cmake diff --git a/debug/accuracy_tools/cmake/Findre2.cmake b/debug/accuracy_tools/cmake/Findre2.cmake new file mode 100644 index 0000000000..4570ba5291 --- /dev/null +++ b/debug/accuracy_tools/cmake/Findre2.cmake @@ -0,0 +1,58 @@ +set(PKG_NAME re2) +set(SHA256_VALUE "7268e1b4254d9ffa5ccf010fee954150dbb788fd9705234442e7d9f0ee5a42d3") +set(DOWNLOAD_PATH "$ENV{PROJECT_ROOT_PATH}/third_party") +set(DIR_NAME "${DOWNLOAD_PATH}/re2-2019-12-01") +set(BUILD_DIR "${DIR_NAME}/build") +file(MAKE_DIRECTORY "${BUILD_DIR}") +set(BUILD_DEPENDENCY_PATH "$ENV{PROJECT_ROOT_PATH}/build_dependency/${PKG_NAME}") + +if (NOT ${PKG_NAME}_FOUND) + +file(GLOB RE2_INCLUDE "${BUILD_DEPENDENCY_PATH}/include/${PKG_NAME}/re2.h") +file(GLOB RE2_LIB "${BUILD_DEPENDENCY_PATH}/lib64/libre2.a") +if (RE2_INCLUDE AND RE2_LIB) + include_directories(${BUILD_DEPENDENCY_PATH}/include) + set(${PKG_NAME}_LIBRARIES "${RE2_LIB}") + set(${PKG_NAME}_FOUND TRUE) + return() +endif() + +download_opensource_pkg(${PKG_NAME} + SHA256 ${SHA256_VALUE} + DOWNLOAD_PATH ${DOWNLOAD_PATH} +) + +execute_process( + WORKING_DIRECTORY ${BUILD_DIR} + COMMAND cmake -DCMAKE_INSTALL_PREFIX=${BUILD_DEPENDENCY_PATH} -DCMAKE_C_FLAGS=-fPIC -DCMAKE_CXX_FLAGS=-fPIC .. + RESULT_VARIABLE RESULT +) +if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build re2. ${RESULT}") +endif() + +execute_process( + WORKING_DIRECTORY ${BUILD_DIR} + COMMAND make -j16 + RESULT_VARIABLE RESULT +) +if (NOT RESULT EQUAL 0) + message(FATAL_ERROR "Failed to build re2. ${RESULT}") +endif() + +execute_process( + WORKING_DIRECTORY ${BUILD_DIR} + COMMAND make install +) + +file(GLOB RE2_INCLUDE "${BUILD_DEPENDENCY_PATH}/include/${PKG_NAME}/re2.h") +file(GLOB RE2_LIB "${BUILD_DEPENDENCY_PATH}/lib64/libre2.a") +if (NOT RE2_INCLUDE OR NOT RE2_LIB) + message(FATAL_ERROR "Failed to build re2.") +endif() + +include_directories(${BUILD_DEPENDENCY_PATH}/include) +set(${PKG_NAME}_LIBRARIES "${RE2_LIB}") +set(${PKG_NAME}_FOUND TRUE) + +endif() diff --git a/debug/accuracy_tools/cmake/config.ini b/debug/accuracy_tools/cmake/config.ini index 57e544d540..81b9ee5b47 100644 --- a/debug/accuracy_tools/cmake/config.ini +++ b/debug/accuracy_tools/cmake/config.ini @@ -11,4 +11,7 @@ url = https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.10.1.z url = https://gitee.com/mirrors/openssl/repository/archive/OpenSSL_1_1_1k.tar.gz [protobuf] -url = https://gitee.com/mirrors/protobuf_source/repository/archive/v3.15.0.tar.gz \ No newline at end of file +url = https://gitee.com/mirrors/protobuf_source/repository/archive/v3.15.0.tar.gz + +[re2] +url = https://gitee.com/mirrors/re2/repository/archive/2019-12-01.tar.gz \ No newline at end of file diff --git a/debug/accuracy_tools/msprobe/ccsrc/CMakeLists.txt b/debug/accuracy_tools/msprobe/ccsrc/CMakeLists.txt index 8472c1ad71..57313609f9 100644 --- a/debug/accuracy_tools/msprobe/ccsrc/CMakeLists.txt +++ b/debug/accuracy_tools/msprobe/ccsrc/CMakeLists.txt @@ -8,6 +8,7 @@ find_package(cpython MODULE REQUIRED) find_package(openssl MODULE REQUIRED) find_package(nlohmannjson MODULE REQUIRED) find_package(protobuf MODULE REQUIRED) +find_package(re2 MODULE REQUIRED) if (DEFINED CANN_PATH AND NOT "${CANN_PATH}" STREQUAL "") file(GLOB_RECURSE DUMP_DATA_PROTOS "${CANN_PATH}/**/dump_data.proto") @@ -46,6 +47,7 @@ target_link_libraries(_msprobe_c PUBLIC pthread) target_link_libraries(_msprobe_c PUBLIC ${cpython_LIBRARIES}) target_link_libraries(_msprobe_c PUBLIC ${openssl_LIBRARIES}) target_link_libraries(_msprobe_c PUBLIC ${protobuf_LIBRARIES}) +target_link_libraries(_msprobe_c PUBLIC ${re2_LIBRARIES}) if(DEFINED BUILD_TYPE AND "${BUILD_TYPE}" STREQUAL "debug") target_compile_options(_msprobe_c PRIVATE "-O0") diff --git a/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.cpp b/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.cpp index 591d18399c..35c1c00f75 100644 --- a/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.cpp +++ b/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,10 @@ #include #include +#include #include #include +#include #include "include/ErrorCode.h" #include "include/Macro.h" @@ -211,6 +213,7 @@ constexpr char REGEX_PREFIX[] = "name-regex("; constexpr char REGEX_SUFFIX[] = ")"; constexpr size_t REGEX_PREFIX_LEN = sizeof(REGEX_PREFIX) - 1; constexpr size_t REGEX_SUFFIX_LEN = sizeof(REGEX_SUFFIX) - 1; +constexpr size_t REGEX_INDEX = REGEX_PREFIX_LEN - 1; void KernelListMatcher::Parse(const std::vector& expressions) { @@ -219,7 +222,7 @@ void KernelListMatcher::Parse(const std::vector& expressions) if (strncmp(expression.c_str(), REGEX_PREFIX, REGEX_PREFIX_LEN) == 0 && strncmp(expression.c_str() + (len - REGEX_SUFFIX_LEN), REGEX_SUFFIX, REGEX_SUFFIX_LEN) == 0) { /* name-regex(xxx)表示正则表达式 */ - regexList.emplace_back(expression.substr(REGEX_PREFIX_LEN, len - REGEX_PREFIX_LEN - REGEX_SUFFIX_LEN)); + regexList.emplace_back(expression.substr(REGEX_INDEX, len - REGEX_INDEX)); } else { /* 否则认为是full scope name */ fullNameList.emplace_back(expression); @@ -236,11 +239,15 @@ std::vector KernelListMatcher::GenRealKernelList(const char** fullK } output = fullNameList; - for (auto& reg : regexList) { - for (const char** ss = fullKernelList; *ss != nullptr; ++ss) { - if (std::regex_search(*ss, reg)) { - output.emplace_back(*ss); - } + for (auto& pattern : regexList) { + re2::RE2 reg(pattern, re2::RE2::Quiet); + if (reg.ok()) { + for (const char** ss = fullKernelList; *ss != nullptr; ++ss) { + std::string ret; + if (re2::RE2::FullMatch(*ss, reg, &ret)) { + output.emplace_back(*ss); + } + } } } diff --git a/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.h b/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.h index 69db3998e8..bdae11cb72 100644 --- a/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.h +++ b/debug/accuracy_tools/msprobe/ccsrc/base/DebuggerConfig.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -125,7 +124,7 @@ public: private: std::vector fullNameList; - std::vector regexList; + std::vector regexList; }; /* 说明:config类作为基础的配置解析查询类,对外应该是只读的,外部仅能通过Parse接口解析配置文件,而不应该直接修改配置字段,此处用以下方式防止外部误操作 diff --git a/debug/accuracy_tools/msprobe/ccsrc/base/Environment.cpp b/debug/accuracy_tools/msprobe/ccsrc/base/Environment.cpp index cfc4c4b164..58d89561a0 100644 --- a/debug/accuracy_tools/msprobe/ccsrc/base/Environment.cpp +++ b/debug/accuracy_tools/msprobe/ccsrc/base/Environment.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (C) 2024-2025. Huawei Technologies Co., Ltd. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,8 @@ * limitations under the License. */ +#include + #include "utils/CPythonUtils.h" #include "DebuggerConfig.h" #include "Environment.h" diff --git a/debug/accuracy_tools/msprobe/core/common_config.py b/debug/accuracy_tools/msprobe/core/common_config.py index 836a7b89d3..34c3469cf3 100644 --- a/debug/accuracy_tools/msprobe/core/common_config.py +++ b/debug/accuracy_tools/msprobe/core/common_config.py @@ -13,7 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from msprobe.core.common.const import Const, FileCheckConst +import re + +from msprobe.core.common.const import Const from msprobe.core.common.log import logger from msprobe.core.common.exceptions import MsprobeException from msprobe.core.common.utils import get_real_step_or_rank @@ -67,6 +69,7 @@ class BaseConfig: self.if_preheat = json_config.get("if_preheat") self.preheat_step = json_config.get("preheat_step") self.max_sample = json_config.get("max_sample") + self.is_regex_valid = True @staticmethod def _check_str_list_config(config_item, config_name): @@ -83,6 +86,7 @@ class BaseConfig: self._check_str_list_config(self.scope, "scope") self._check_str_list_config(self.list, "list") self._check_data_mode() + self._check_regex_in_list() def _check_data_mode(self): if self.data_mode is not None: @@ -118,3 +122,13 @@ class BaseConfig: f"summary_mode is invalid, summary_mode is not in {Const.SUMMARY_MODE}.", MsprobeException(MsprobeException.INVALID_PARAM_ERROR) ) + + def _check_regex_in_list(self): + if self.list: + for name in self.list: + if name.startswith('name-regex(') and name.endswith(')'): + try: + re.compile(name[len('name-regex('):-1]) + except re.error: + self.is_regex_valid = False + break diff --git a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py index b5b83e14b2..e5c13f216a 100644 --- a/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py +++ b/debug/accuracy_tools/msprobe/mindspore/debugger/precision_debugger.py @@ -79,6 +79,9 @@ class PrecisionDebugger(BasePrecisionDebugger): self.common_config.dump_path = dump_path if dump_path else self.common_config.dump_path self.config = DebuggerConfig(self.common_config, self.task_config) + if self._is_kernel_dump() and not self.task_config.is_regex_valid: + raise ValueError('Illegal regular expressions exist in the list.') + setattr(inner.CellBackwardHook, '__call__', wrap_backward_hook_call_func(getattr(inner.CellBackwardHook, '__call__'))) diff --git a/debug/accuracy_tools/msprobe/mindspore/ms_config.py b/debug/accuracy_tools/msprobe/mindspore/ms_config.py index f67754cfde..2c27b05aa2 100644 --- a/debug/accuracy_tools/msprobe/mindspore/ms_config.py +++ b/debug/accuracy_tools/msprobe/mindspore/ms_config.py @@ -14,7 +14,6 @@ # limitations under the License. from msprobe.core.common.const import Const -from msprobe.core.common.file_utils import load_json from msprobe.core.common.utils import is_int from msprobe.core.common_config import BaseConfig, CommonConfig from msprobe.core.grad_probe.constant import level_adp diff --git "a/\345\205\254\347\275\221URL\350\257\264\346\230\216.md" "b/\345\205\254\347\275\221URL\350\257\264\346\230\216.md" index 4d9c33a66a..265f258b27 100644 --- "a/\345\205\254\347\275\221URL\350\257\264\346\230\216.md" +++ "b/\345\205\254\347\275\221URL\350\257\264\346\230\216.md" @@ -20,3 +20,4 @@ | 开源软件 | MindStudio Training Tools - accuracy_tools | /debug/accuracy_tools/cmake/config.ini | 公网地址 | https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.10.1.zip | 开源软件下载 | | 开源软件 | MindStudio Training Tools - accuracy_tools | /debug/accuracy_tools/cmake/config.ini | 公网地址 | https://gitee.com/mirrors/openssl/repository/archive/OpenSSL_1_1_1k.tar.gz | 开源软件下载 | | 开源软件 | MindStudio Training Tools - accuracy_tools | /debug/accuracy_tools/cmake/config.ini | 公网地址 | https://gitee.com/mirrors/protobuf_source/repository/archive/v3.15.0.tar.gz | 开源软件下载 | +| 开源软件 | MindStudio Training Tools - accuracy_tools | /debug/accuracy_tools/cmake/config.ini | 公网地址 | https://gitee.com/mirrors/re2/repository/archive/2019-12-01.tar.gz | 开源软件下载 | -- Gitee