diff --git a/torch_npu/csrc/core/npu/NPUAffinityController.cpp b/torch_npu/csrc/core/npu/NPUAffinityController.cpp index 5567c3e6e22292f53e4a5907f46f56287df666af..95bd7e59fbfe490726adc8f3f26793d72af94c2a 100644 --- a/torch_npu/csrc/core/npu/NPUAffinityController.cpp +++ b/torch_npu/csrc/core/npu/NPUAffinityController.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include namespace c10_npu { @@ -160,46 +162,6 @@ void printCoreRanges(const uint32_t mode, const std::vector &ranges ASCEND_LOGD("Read CPU affinity config: %s", oss.str().c_str()); } -bool getThreadAffinityInfo() -{ - parseCPUAffinityConf(cpu_affinity_mode, device_ranges); - printCoreRanges(cpu_affinity_mode, device_ranges); - - if (cpu_affinity_mode == 0) { - return false; - } - - cpu_set_t mask; - pthread_getaffinity_np(pthread_self(), sizeof(mask), &mask); - for (auto &range : device_ranges) { - for (unsigned int i = range.start; i < range.end; i++) { - if (!CPU_ISSET(i, &mask)) { - ASCEND_LOGW("Thread affinity is already set."); - return false; - } - } - } - return true; -} - -inline bool needToSetThreadAffinity() -{ - static bool need_to_set_affinity = getThreadAffinityInfo(); - return need_to_set_affinity; -} - -void SetThreadType(ThreadType type) -{ - // Called at the start of the thread's execution to avoid frequent triggering of this function. - local_thread = type; - if (type == ThreadType::OTHER_THREAD || type == ThreadType::MAIN_THREAD) { - return; - } - if (prctl(PR_SET_NAME, threadTypeToNameMap.at(type).c_str()) != 0) { - ASCEND_LOGW("Set thread name to %s failed!", threadTypeToNameMap.at(type).c_str()); - } -} - std::string getAffinityMapAsString(c10::DeviceIndex device_id, const ThreadCoreMap &threadCoreMap) { std::ostringstream oss; @@ -244,6 +206,70 @@ ThreadCoreMap getCpuAffinityMap(c10::DeviceIndex device_id, const std::vector(i); + device_thread_core_maps.emplace( + device_id, getCpuAffinityMap(device_id, device_ranges)); + } + } + return true; +} + +inline bool needToSetThreadAffinity() +{ + static std::once_flag flag; + static bool need_to_set_affinity; + + std::call_once(flag, [] { + need_to_set_affinity = getThreadAffinityInfo(); + }); + return need_to_set_affinity; +} + +void SetThreadType(ThreadType type) +{ + // Called at the start of the thread's execution to avoid frequent triggering of this function. + local_thread = type; + if (type == ThreadType::OTHER_THREAD || type == ThreadType::MAIN_THREAD) { + return; + } + if (prctl(PR_SET_NAME, threadTypeToNameMap.at(type).c_str()) != 0) { + ASCEND_LOGW("Set thread name to %s failed!", threadTypeToNameMap.at(type).c_str()); + } +} + bool setThreadAffinityImpl(pthread_t thread, CoreIdRange core_range) { cpu_set_t mask; @@ -258,20 +284,6 @@ bool setThreadAffinityImpl(pthread_t thread, CoreIdRange core_range) } } -CoreIdRange getCoreRange(c10::DeviceIndex device_id, ThreadType type) -{ - CoreIdRange core_range; - if (cpu_affinity_mode == 0 || cpu_affinity_mode == 1) { - core_range = device_ranges[device_id]; - } else { - if (device_thread_core_maps.find(device_id) == device_thread_core_maps.end()) { - device_thread_core_maps.emplace(device_id, getCpuAffinityMap(device_id, device_ranges)); - } - core_range = device_thread_core_maps.at(device_id).at(type); - } - return core_range; -} - void SetThreadAffinity(c10::DeviceIndex device_id) { if (!needToSetThreadAffinity() || local_thread == ThreadType::USER_THREAD) { @@ -324,6 +336,9 @@ void SetThreadAffinity(int core_start, int core_end) void SetMainThread() { + if (!needToSetThreadAffinity()) { + return; + } main_thread = pthread_self(); }