diff --git a/torch_npu/csrc/core/npu/NPUAffinityController.cpp b/torch_npu/csrc/core/npu/NPUAffinityController.cpp index 36e585f50a4c2b67f21e3d0c5ea85724453b18e8..56604d9935aed2dc924bd3a36913e79488171ea6 100644 --- a/torch_npu/csrc/core/npu/NPUAffinityController.cpp +++ b/torch_npu/csrc/core/npu/NPUAffinityController.cpp @@ -18,6 +18,7 @@ static thread_local ThreadType local_thread = ThreadType::MAIN_THREAD; static pthread_t main_thread; static bool start_main_thread_bind = false; static std::mutex core_map_mutex; +static bool lazy_bind = true; using ThreadCoreMap = std::unordered_map; @@ -89,6 +90,15 @@ void parseCPUAffinityConf(uint32_t &mode, std::vector &ranges) } } + std::regex pattern_for_lazy_bind("lazy_bind:(\\d)"); + std::smatch match_for_lazy_bind; + if (std::regex_search(inputStr, match_for_lazy_bind, pattern_for_lazy_bind)) { + int lazy_bind_val = std::stoi(match_for_lazy_bind[1].str()); + if (lazy_bind_val == 0) { + lazy_bind = false; + } + } + // Handle cases where only `mode` is provided, or `mode:` without value if (isAllDigits(inputStr)) { mode = static_cast(std::stoi(inputStr)); @@ -335,6 +345,14 @@ bool NeedMainThreadBind() return start_main_thread_bind && (local_thread == ThreadType::MAIN_THREAD); } +bool SetThreadAffinityInInitialize() +{ + if (needToSetThreadAffinity() && !lazy_bind) { + return true; + } + return false; +} + void StartMainThreadBind(c10::DeviceIndex device_id) { if (!needToSetThreadAffinity() || local_thread == ThreadType::USER_THREAD) { diff --git a/torch_npu/csrc/core/npu/NPUAffinityController.h b/torch_npu/csrc/core/npu/NPUAffinityController.h index e850a47b67f3484ffafb56f0b4cc67b0eea0c0ee..5a38f885d4d68a6a6797bdc94e0a11c25c01b571 100644 --- a/torch_npu/csrc/core/npu/NPUAffinityController.h +++ b/torch_npu/csrc/core/npu/NPUAffinityController.h @@ -26,6 +26,7 @@ void SetThreadAffinity(int core_start, int core_end); void SetMainThread(); bool NeedMainThreadBind(); +bool SetThreadAffinityInInitialize(); void StartMainThreadBind(c10::DeviceIndex device_id); } // namespace c10_npu \ No newline at end of file diff --git a/torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.cpp b/torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.cpp index 4b6707b8495b9a89c500e06f160b42950e9ae6fb..5f348e029ca05ecc337e3e68befd3e6c44967168 100644 --- a/torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.cpp +++ b/torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.cpp @@ -190,6 +190,9 @@ NpuSysCtrl::SysStatus NpuSysCtrl::Initialize(int device_id) lazy_fn_.clear(); SetMainThread(); + if (SetThreadAffinityInInitialize()) { + SetThreadAffinity(device_id_); + } init_flag_ = true; ASCEND_LOGD("Npu sys ctrl initialize successfully.");