From a880d2fbff0fb6e75383c6ddd32acb1f8fbc5dca Mon Sep 17 00:00:00 2001 From: chen_liqing Date: Mon, 18 Aug 2025 15:38:11 +0800 Subject: [PATCH 1/4] LazyInitAclops fixs --- torch_npu/csrc/core/npu/NpuVariables.cpp | 9 +++++++++ torch_npu/csrc/core/npu/NpuVariables.h | 3 +++ .../csrc/core/npu/register/OptionsManager.cpp | 8 +++++--- torch_npu/csrc/framework/LazyInitAclops.cpp | 19 +++++++++++++++++++ torch_npu/csrc/framework/LazyInitAclops.h | 2 ++ torch_npu/csrc/npu/Module.cpp | 13 +++---------- torch_npu/dynamo/__init__.py | 1 + 7 files changed, 42 insertions(+), 13 deletions(-) diff --git a/torch_npu/csrc/core/npu/NpuVariables.cpp b/torch_npu/csrc/core/npu/NpuVariables.cpp index 24a2a8da62..cbae2a499e 100644 --- a/torch_npu/csrc/core/npu/NpuVariables.cpp +++ b/torch_npu/csrc/core/npu/NpuVariables.cpp @@ -104,5 +104,14 @@ bool IsAclnnOnly() { return false; } + +bool IsSupportAclOpLazyInit() +{ + static bool default_support = ((GetSocVersion() >= SocVersion::Ascend910B1) && + (GetSocVersion() < SocVersion::Ascend310B1)) || + (GetSocVersion() >= SocVersion::Ascend910_9391); + return default_support; +} + } // namespace c10_npu diff --git a/torch_npu/csrc/core/npu/NpuVariables.h b/torch_npu/csrc/core/npu/NpuVariables.h index 6a3a8cdfd7..3e0846fb13 100644 --- a/torch_npu/csrc/core/npu/NpuVariables.h +++ b/torch_npu/csrc/core/npu/NpuVariables.h @@ -42,6 +42,9 @@ bool IsSupportInfNan(); bool IsBF16Supported(); bool IsAclnnOnly(); + +bool IsSupportAclOpLazyInit(); + } // namespace c10_npu #endif diff --git a/torch_npu/csrc/core/npu/register/OptionsManager.cpp b/torch_npu/csrc/core/npu/register/OptionsManager.cpp index b5d5e99a76..d903ef4e6e 100644 --- a/torch_npu/csrc/core/npu/register/OptionsManager.cpp +++ b/torch_npu/csrc/core/npu/register/OptionsManager.cpp @@ -12,6 +12,7 @@ #include "torch_npu/csrc/core/npu/register/OptionRegister.h" #include "torch_npu/csrc/core/npu/register/OptionsManager.h" #include "torch_npu/csrc/core/npu/NPUFunctions.h" +#include "torch_npu/csrc/core/npu/NpuVariables.h" #include "torch_npu/csrc/core/npu/NPUCachingAllocator.h" #include "torch_npu/csrc/npu/memory_snapshot.h" @@ -481,12 +482,13 @@ uint32_t OptionsManager::GetAclOpInitMode() { const static uint32_t acl_op_init_mode = []() -> uint32_t { char* buf_val = std::getenv("ACL_OP_INIT_MODE"); - // Default 0 - int64_t acl_op_init_mode_ = (buf_val != nullptr) ? strtol(buf_val, nullptr, 10) : 0; + int64_t default_value = c10_npu::IsSupportAclOpLazyInit() ? 1 : 0; + int64_t acl_op_init_mode_ = (buf_val != nullptr) ? strtol(buf_val, nullptr, 10) : default_value; std::unordered_map aclOpInitMode = getAclOpInitMode(); if (aclOpInitMode.find(acl_op_init_mode_) == aclOpInitMode.end()) { acl_op_init_mode_ = 0; - TORCH_NPU_WARN_ONCE("Get env ACL_OP_INIT_MODE not in [0, 1, 2], so reset it to the default value 0."); + TORCH_NPU_WARN_ONCE( + "Get env ACL_OP_INIT_MODE not in [0, 1, 2], so reset it to the default value ", default_value, "."); } return static_cast(acl_op_init_mode_); }(); diff --git a/torch_npu/csrc/framework/LazyInitAclops.cpp b/torch_npu/csrc/framework/LazyInitAclops.cpp index 440d237fef..a3960bc9c9 100644 --- a/torch_npu/csrc/framework/LazyInitAclops.cpp +++ b/torch_npu/csrc/framework/LazyInitAclops.cpp @@ -174,6 +174,22 @@ void LazyInitAclopsCore() #endif } +bool IsJitCompileDisable() +{ + static const std::string jit_compile_option_name = "jitCompile"; + auto option_value = c10_npu::option::GetOption(jit_compile_option_name); + if (option_value.has_value() && (option_value.value() == "disable")) { + return true; + } else { + static const std::string jit_compile_init_option_name = "jitCompileInit"; + auto init_option_value = c10_npu::option::GetOption(jit_compile_init_option_name); + if (init_option_value.has_value() && (init_option_value.value() == "disable")) { + return true; + } + } + return false; +} + void LazyInitAclops() { static auto acl_op_init_mode = c10_npu::option::OptionsManager::GetAclOpInitMode(); @@ -186,6 +202,9 @@ void LazyInitAclops() if (!encounteredAclops.exchange(true) && c10_npu::NpuSysCtrl::GetInstance().GetInitFlag()) { RECORD_FUNCTION("LazyInitAclops", std::vector({})); + std::string val = IsJitCompileDisable() ? "disable" : "enable"; + NPU_CHECK_ERROR(at_npu::native::AclSetCompileopt(aclCompileOpt::ACL_OP_JIT_COMPILE, val.c_str())); + ASCEND_LOGI("Set jitCompileInit option to %s", val.c_str()); LazyInitAclopsCore(); ASCEND_LOGI("Lazy init for aclops finished.") } diff --git a/torch_npu/csrc/framework/LazyInitAclops.h b/torch_npu/csrc/framework/LazyInitAclops.h index b842b78652..fccd8858f4 100644 --- a/torch_npu/csrc/framework/LazyInitAclops.h +++ b/torch_npu/csrc/framework/LazyInitAclops.h @@ -8,6 +8,8 @@ void InitAclops(); void LazyInitAclops(); void InitializeJitCompilationMode(); +bool IsJitCompileDisable(); + } // namespace aclops } // namespace at_npu diff --git a/torch_npu/csrc/npu/Module.cpp b/torch_npu/csrc/npu/Module.cpp index 65da2f7d4c..7933f16990 100644 --- a/torch_npu/csrc/npu/Module.cpp +++ b/torch_npu/csrc/npu/Module.cpp @@ -34,6 +34,7 @@ #include "torch_npu/csrc/core/npu/register/OptionRegister.h" #include "torch_npu/csrc/core/OverflowUtils.h" #include "torch_npu/csrc/framework/StorageDescHelper.h" +#include "torch_npu/csrc/framework/LazyInitAclops.h" #include "torch_npu/csrc/npu/DataParallelComm.h" #include "torch_npu/csrc/npu/NPUPluggableAllocator.h" #include "torch_npu/csrc/npu/Stream.h" @@ -946,18 +947,10 @@ PyObject *THNPModule_is_jit_compile_false_wrap(PyObject *self, PyObject *noargs) { HANDLE_TH_ERRORS pybind11::gil_scoped_release no_gil; - static const std::string jit_compile_option_name = "jitCompile"; - auto option_value = c10_npu::option::GetOption(jit_compile_option_name); - if (option_value.has_value() && (option_value.value() == "disable")) { + if (at_npu::aclops::IsJitCompileDisable()) { Py_RETURN_TRUE; } else { - static const std::string jit_compile_init_option_name = "jitCompileInit"; - auto init_option_value = c10_npu::option::GetOption(jit_compile_init_option_name); - if (init_option_value.has_value() && (init_option_value.value() == "disable")) { - Py_RETURN_TRUE; - } else { - Py_RETURN_FALSE; - } + Py_RETURN_FALSE; } END_HANDLE_TH_ERRORS } diff --git a/torch_npu/dynamo/__init__.py b/torch_npu/dynamo/__init__.py index a6c2357087..32fc5bc31f 100644 --- a/torch_npu/dynamo/__init__.py +++ b/torch_npu/dynamo/__init__.py @@ -74,6 +74,7 @@ class _LazyTorchair: try: from . import torchair + os.environ["ACL_OP_INIT_MODE"] = "0" except Exception as e: # In cpython, default import loader will suppress error when # find module's __spec__. So here we need to record error and -- Gitee From 945a1a2561c51a923c6eb9f54951f78e92c82f05 Mon Sep 17 00:00:00 2001 From: chen_liqing Date: Tue, 2 Sep 2025 11:09:01 +0800 Subject: [PATCH 2/4] 1 --- .../csrc/core/npu/register/OptionsManager.cpp | 30 +++++++++++-------- .../csrc/core/npu/register/OptionsManager.h | 1 + torch_npu/csrc/npu/Module.cpp | 10 +++++++ torch_npu/dynamo/__init__.py | 3 +- 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/torch_npu/csrc/core/npu/register/OptionsManager.cpp b/torch_npu/csrc/core/npu/register/OptionsManager.cpp index d903ef4e6e..0da57b69a6 100644 --- a/torch_npu/csrc/core/npu/register/OptionsManager.cpp +++ b/torch_npu/csrc/core/npu/register/OptionsManager.cpp @@ -478,20 +478,26 @@ uint32_t OptionsManager::GetP2PBufferSize() return buf_size; } +static uint32_t acl_op_init_mode = []() -> uint32_t { + char* buf_val = std::getenv("ACL_OP_INIT_MODE"); + int64_t default_value = c10_npu::IsSupportAclOpLazyInit() ? 1 : 0; + int64_t acl_op_init_mode_ = (buf_val != nullptr) ? strtol(buf_val, nullptr, 10) : default_value; + std::unordered_map aclOpInitMode = getAclOpInitMode(); + if (aclOpInitMode.find(acl_op_init_mode_) == aclOpInitMode.end()) { + acl_op_init_mode_ = 0; + TORCH_NPU_WARN_ONCE( + "Get env ACL_OP_INIT_MODE not in [0, 1, 2], so reset it to the default value ", default_value, "."); + } + return static_cast(acl_op_init_mode_); +}(); + +void OptionsManager::SetAclOpInitMode(uint32_t val) +{ + acl_op_init_mode = val; +} + uint32_t OptionsManager::GetAclOpInitMode() { - const static uint32_t acl_op_init_mode = []() -> uint32_t { - char* buf_val = std::getenv("ACL_OP_INIT_MODE"); - int64_t default_value = c10_npu::IsSupportAclOpLazyInit() ? 1 : 0; - int64_t acl_op_init_mode_ = (buf_val != nullptr) ? strtol(buf_val, nullptr, 10) : default_value; - std::unordered_map aclOpInitMode = getAclOpInitMode(); - if (aclOpInitMode.find(acl_op_init_mode_) == aclOpInitMode.end()) { - acl_op_init_mode_ = 0; - TORCH_NPU_WARN_ONCE( - "Get env ACL_OP_INIT_MODE not in [0, 1, 2], so reset it to the default value ", default_value, "."); - } - return static_cast(acl_op_init_mode_); - }(); return acl_op_init_mode; } diff --git a/torch_npu/csrc/core/npu/register/OptionsManager.h b/torch_npu/csrc/core/npu/register/OptionsManager.h index 73f5dbcb81..9aab2c6a10 100644 --- a/torch_npu/csrc/core/npu/register/OptionsManager.h +++ b/torch_npu/csrc/core/npu/register/OptionsManager.h @@ -126,6 +126,7 @@ public: static uint32_t GetHcclBufferSize(); static uint32_t GetP2PBufferSize(); static uint32_t GetTaskQueueEnable(); + static uint32_t SetAclOpInitMode(); static uint32_t GetAclOpInitMode(); static uint32_t GetStreamsPerDevice(); static char* GetCpuAffinityConf(); diff --git a/torch_npu/csrc/npu/Module.cpp b/torch_npu/csrc/npu/Module.cpp index 7933f16990..c9bd875602 100644 --- a/torch_npu/csrc/npu/Module.cpp +++ b/torch_npu/csrc/npu/Module.cpp @@ -1823,6 +1823,15 @@ static PyObject* THNPModule_reset_device_res_limit(PyObject* self, PyObject *arg END_HANDLE_TH_ERRORS } +static PyObject* THNPModule_start_acl_op_init(PyObject* self, PyObject *noargs) +{ + HANDLE_TH_ERRORS + c10_npu::option::OptionsManager::SetAclOpInitMode(0); + at_npu::aclops::InitializeJitCompilationMode(); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + static struct PyMethodDef THNPModule_methods[] = { {"_npu_init", (PyCFunction)THNPModule_initExtension, METH_NOARGS, nullptr}, {"_npu_set_run_yet_variable_to_false", (PyCFunction)THNPModule_set_run_yet_variable_to_false_wrap, METH_NOARGS, nullptr}, @@ -1891,6 +1900,7 @@ static struct PyMethodDef THNPModule_methods[] = { {"_npu_get_device_res_limit", (PyCFunction)THNPModule_get_device_res_limit, METH_VARARGS, nullptr}, {"_npu_set_device_res_limit", (PyCFunction)THNPModule_set_device_res_limit, METH_VARARGS, nullptr}, {"_npu_reset_device_res_limit", (PyCFunction)THNPModule_reset_device_res_limit, METH_O, nullptr}, + {"_start_acl_op_init", (PyCFunction)THNPModule_start_acl_op_init, METH_NOARGS, nullptr}, {nullptr}}; TORCH_NPU_API PyMethodDef* THNPModule_get_methods() diff --git a/torch_npu/dynamo/__init__.py b/torch_npu/dynamo/__init__.py index 32fc5bc31f..575336ee02 100644 --- a/torch_npu/dynamo/__init__.py +++ b/torch_npu/dynamo/__init__.py @@ -74,7 +74,8 @@ class _LazyTorchair: try: from . import torchair - os.environ["ACL_OP_INIT_MODE"] = "0" + import torch_npu + torch_npu._C._start_acl_op_init() except Exception as e: # In cpython, default import loader will suppress error when # find module's __spec__. So here we need to record error and -- Gitee From 2396b8fee3492c2580ecff52f4229ef003c2dfe7 Mon Sep 17 00:00:00 2001 From: chen_liqing Date: Tue, 2 Sep 2025 11:15:01 +0800 Subject: [PATCH 3/4] 1 --- torch_npu/csrc/core/npu/register/OptionsManager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch_npu/csrc/core/npu/register/OptionsManager.h b/torch_npu/csrc/core/npu/register/OptionsManager.h index 9aab2c6a10..d20d0621aa 100644 --- a/torch_npu/csrc/core/npu/register/OptionsManager.h +++ b/torch_npu/csrc/core/npu/register/OptionsManager.h @@ -126,7 +126,7 @@ public: static uint32_t GetHcclBufferSize(); static uint32_t GetP2PBufferSize(); static uint32_t GetTaskQueueEnable(); - static uint32_t SetAclOpInitMode(); + static void SetAclOpInitMode(uint32_t val); static uint32_t GetAclOpInitMode(); static uint32_t GetStreamsPerDevice(); static char* GetCpuAffinityConf(); -- Gitee From 3c371d08c36fb45751e2fe33130b15d5f8131145 Mon Sep 17 00:00:00 2001 From: chen_liqing Date: Wed, 3 Sep 2025 11:24:03 +0800 Subject: [PATCH 4/4] fix --- torch_npu/csrc/npu/Module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch_npu/csrc/npu/Module.cpp b/torch_npu/csrc/npu/Module.cpp index c9bd875602..688d0f8469 100644 --- a/torch_npu/csrc/npu/Module.cpp +++ b/torch_npu/csrc/npu/Module.cpp @@ -1827,7 +1827,7 @@ static PyObject* THNPModule_start_acl_op_init(PyObject* self, PyObject *noargs) { HANDLE_TH_ERRORS c10_npu::option::OptionsManager::SetAclOpInitMode(0); - at_npu::aclops::InitializeJitCompilationMode(); + at_npu::aclops::LazyInitAclops(); Py_RETURN_NONE; END_HANDLE_TH_ERRORS } -- Gitee