diff --git a/torch_npu/csrc/core/npu/NPUException.cpp b/torch_npu/csrc/core/npu/NPUException.cpp index fc1bc90a42907136bcf9d379f7b714d6d4f448fd..13041e9f68ddcb42563144f89a72505f91518477 100644 --- a/torch_npu/csrc/core/npu/NPUException.cpp +++ b/torch_npu/csrc/core/npu/NPUException.cpp @@ -154,6 +154,9 @@ const char *c10_npu_get_error_message() if (c10_npu::option::OptionsManager::IsCompactErrorOutput()) { std::string log(errmsg); std::string errmsg_ = c10_npu::c10_npu_check_error_message(log); + if (errmsg_ == "") { + return errmsg; + } thread_local std::string processedErrMsg = "CANN error: " + errmsg_; c10_npu::setRepoErrMsg(processedErrMsg.c_str()); return processedErrMsg.c_str(); diff --git a/torch_npu/csrc/core/npu/NPUException.h b/torch_npu/csrc/core/npu/NPUException.h index 2fa8bff52a0239c928f1c0594abe02fc3d8aa3c1..ce65ac1b927103211839cc407b943e97ca9d0225 100644 --- a/torch_npu/csrc/core/npu/NPUException.h +++ b/torch_npu/csrc/core/npu/NPUException.h @@ -163,10 +163,11 @@ inline const char* getErrorFunction(const char* /* msg */, const char* args) << "\n"; \ std::string err_msg = oss.str(); \ ASCEND_LOGE("%s", err_msg.c_str()); \ + std::string errmsg(c10_npu::c10_npu_get_error_message()); \ TORCH_CHECK( \ false, \ (device_error_msg.empty() ? "" : device_error_msg), \ - c10_npu::c10_npu_get_error_message()); \ + errmsg.empty() ? err_msg : errmsg); \ } else if (error_code == ACL_ERROR_RT_DEVICE_TASK_ABORT) { \ TORCH_CHECK( \ false, \ @@ -218,9 +219,10 @@ inline const char* getErrorFunction(const char* /* msg */, const char* args) err_map.error_code_map[Error] : ".") + "\n"; \ std::string err_msg = oss.str(); \ ASCEND_LOGE("%s", err_msg.c_str()); \ + std::string errmsg(c10_npu::c10_npu_get_error_message()); \ TORCH_CHECK( \ false, \ - c10_npu::c10_npu_get_error_message()); \ + errmsg.empty() ? err_msg : errmsg); \ } else { \ TORCH_CHECK( \ false, \ diff --git a/torch_npu/csrc/distributed/HCCLUtils.hpp b/torch_npu/csrc/distributed/HCCLUtils.hpp index b4662c1e49b85cce5b0f80d55f85706b503c5916..42ee22cc58a0ba7e77f7de6a91a732253e2515c7 100644 --- a/torch_npu/csrc/distributed/HCCLUtils.hpp +++ b/torch_npu/csrc/distributed/HCCLUtils.hpp @@ -24,9 +24,10 @@ << DIST_ERROR(ErrCode::HCCL) + ".\n"; \ std::string err_msg = oss.str(); \ ASCEND_LOGE("%s", err_msg.c_str()); \ + std::string errmsg(c10_npu::c10_npu_get_error_message()); \ TORCH_CHECK( \ false, \ - c10_npu::c10_npu_get_error_message()); \ + errmsg.empty() ? err_msg : errmsg); \ } else { \ TORCH_CHECK( \ false, \ diff --git a/torch_npu/csrc/framework/utils/CalcuOpUtil.h b/torch_npu/csrc/framework/utils/CalcuOpUtil.h index c249a332ba24c19cc4879b08e8b7f053ddd23eee..6caaaac36f3018de1a3fa1a912f851ef6b8b22f7 100644 --- a/torch_npu/csrc/framework/utils/CalcuOpUtil.h +++ b/torch_npu/csrc/framework/utils/CalcuOpUtil.h @@ -46,7 +46,8 @@ using std::vector; << OPS_ERROR(ErrCode::INTERNAL); \ std::string err_msg=oss.str(); \ ASCEND_LOGE("%s", err_msg.c_str()); \ - TORCH_CHECK((expr) == 0, c10_npu::c10_npu_get_error_message()); \ + std::string errmsg(c10_npu::c10_npu_get_error_message()); \ + TORCH_CHECK((expr) == 0, errmsg.empty() ? err_msg : errmsg); \ } else { \ TORCH_CHECK((expr) == 0, __func__, ":", __FILE__, ":", __LINE__, \ " NPU error,NPU error code is:", expr, "\n", \