From b7744da31d19827363a9ed0c8826eea40d196701 Mon Sep 17 00:00:00 2001 From: SCh-zx <1325467101@qq.com> Date: Mon, 8 Sep 2025 17:40:18 +0800 Subject: [PATCH 1/2] log --- torch_npu/csrc/core/npu/NPUException.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/torch_npu/csrc/core/npu/NPUException.cpp b/torch_npu/csrc/core/npu/NPUException.cpp index a670330489..02afb7b813 100644 --- a/torch_npu/csrc/core/npu/NPUException.cpp +++ b/torch_npu/csrc/core/npu/NPUException.cpp @@ -153,6 +153,9 @@ const char *c10_npu_get_error_message() if (c10_npu::option::OptionsManager::IsCompactErrorOutput()) { std::string log(errmsg); std::string errmsg_ = c10_npu::c10_npu_check_error_message(log); + if (errmsg_ == "") { + return errmsg; + } thread_local std::string processedErrMsg = "CANN error: " + errmsg_; c10_npu::setRepoErrMsg(processedErrMsg.c_str()); return processedErrMsg.c_str(); -- Gitee From 0db2f8fb71458efc70d1d6ba706852c185588cea Mon Sep 17 00:00:00 2001 From: SCh-zx <1325467101@qq.com> Date: Wed, 10 Sep 2025 10:51:30 +0800 Subject: [PATCH 2/2] log --- torch_npu/csrc/core/npu/NPUException.h | 6 ++++-- torch_npu/csrc/distributed/HCCLUtils.hpp | 3 ++- torch_npu/csrc/framework/utils/CalcuOpUtil.h | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/torch_npu/csrc/core/npu/NPUException.h b/torch_npu/csrc/core/npu/NPUException.h index 2fa8bff52a..ce65ac1b92 100644 --- a/torch_npu/csrc/core/npu/NPUException.h +++ b/torch_npu/csrc/core/npu/NPUException.h @@ -163,10 +163,11 @@ inline const char* getErrorFunction(const char* /* msg */, const char* args) << "\n"; \ std::string err_msg = oss.str(); \ ASCEND_LOGE("%s", err_msg.c_str()); \ + std::string errmsg(c10_npu::c10_npu_get_error_message()); \ TORCH_CHECK( \ false, \ (device_error_msg.empty() ? "" : device_error_msg), \ - c10_npu::c10_npu_get_error_message()); \ + errmsg.empty() ? err_msg : errmsg); \ } else if (error_code == ACL_ERROR_RT_DEVICE_TASK_ABORT) { \ TORCH_CHECK( \ false, \ @@ -218,9 +219,10 @@ inline const char* getErrorFunction(const char* /* msg */, const char* args) err_map.error_code_map[Error] : ".") + "\n"; \ std::string err_msg = oss.str(); \ ASCEND_LOGE("%s", err_msg.c_str()); \ + std::string errmsg(c10_npu::c10_npu_get_error_message()); \ TORCH_CHECK( \ false, \ - c10_npu::c10_npu_get_error_message()); \ + errmsg.empty() ? err_msg : errmsg); \ } else { \ TORCH_CHECK( \ false, \ diff --git a/torch_npu/csrc/distributed/HCCLUtils.hpp b/torch_npu/csrc/distributed/HCCLUtils.hpp index 53f86212dd..eeca53333a 100644 --- a/torch_npu/csrc/distributed/HCCLUtils.hpp +++ b/torch_npu/csrc/distributed/HCCLUtils.hpp @@ -24,9 +24,10 @@ << DIST_ERROR(ErrCode::HCCL) + ".\n"; \ std::string err_msg = oss.str(); \ ASCEND_LOGE("%s", err_msg.c_str()); \ + std::string errmsg(c10_npu::c10_npu_get_error_message()); \ TORCH_CHECK( \ false, \ - c10_npu::c10_npu_get_error_message()); \ + errmsg.empty() ? err_msg : errmsg); \ } else { \ TORCH_CHECK( \ false, \ diff --git a/torch_npu/csrc/framework/utils/CalcuOpUtil.h b/torch_npu/csrc/framework/utils/CalcuOpUtil.h index 5ee41e7d64..ef11595d3d 100644 --- a/torch_npu/csrc/framework/utils/CalcuOpUtil.h +++ b/torch_npu/csrc/framework/utils/CalcuOpUtil.h @@ -46,7 +46,8 @@ using std::vector; << OPS_ERROR(ErrCode::INTERNAL); \ std::string err_msg=oss.str(); \ ASCEND_LOGE("%s", err_msg.c_str()); \ - TORCH_CHECK((expr) == 0, c10_npu::c10_npu_get_error_message()); \ + std::string errmsg(c10_npu::c10_npu_get_error_message()); \ + TORCH_CHECK((expr) == 0, errmsg.empty() ? err_msg : errmsg); \ } else { \ TORCH_CHECK((expr) == 0, __func__, ":", __FILE__, ":", __LINE__, \ " NPU error,NPU error code is:", expr, "\n", \ -- Gitee