diff --git a/third_party/acl/inc/acl/acl_rt.h b/third_party/acl/inc/acl/acl_rt.h index 547a4e041698843fb6bee121c7b5556dc2e2f050..d1422a033114b078e0b815752ed1927a8cd8cb61 100755 --- a/third_party/acl/inc/acl/acl_rt.h +++ b/third_party/acl/inc/acl/acl_rt.h @@ -30,6 +30,13 @@ extern "C" { #define ACL_CONTINUE_ON_FAILURE 0x00000000u #define ACL_STOP_ON_FAILURE 0x00000001u +#define ACL_RT_IPC_MEM_FLAG_DEFAULT 0x0UL +#define ACL_RT_IPC_MEM_FLAG_DISABLE_PID_VALIDATION 0x1UL +#define ACL_RT_IPC_MEM_FLAG_ENABLE_PEER_ACCESS 0x2UL + +#define ACL_RT_VMM_FLAG_DEFAULT 0x0UL +#define ACL_RT_VMM_FLAG_DISABLE_PID_VALIDATION 0x1UL + #define MAX_MEM_UCE_INFO_ARRAY_SIZE 128 constexpr int32_t DEVICE_UTILIZATION_NOT_SUPPORT = -1; diff --git a/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp b/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp index f7aefa2474bf91a428d51b12f09099ed26fad939..3e146ee9dc18151323937104422651b7307dea10 100644 --- a/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp +++ b/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp @@ -23,7 +23,6 @@ #include "NPUBlockHandle.h" #include "torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.h" #include "torch_npu/csrc/core/npu/NPUEvent.h" -#include "torch_npu/csrc/core/npu/NPUIPCPidManager.h" #include "torch_npu/csrc/profiler/npu_profiler.h" #include "torch_npu/csrc/core/npu/GetCANNInfo.h" #ifndef BUILD_LIBTORCH @@ -363,9 +362,6 @@ However, it is possible to temporarily disable (expandable_segments:False) the bevhavior for allocator tensors that need to be used cross-process. */ -std::mutex ipcHandleMutex; -ska::flat_hash_map ipcShareableHandle_to_handle; -ska::flat_hash_set ipcHandles; struct ExpandableSegment { ExpandableSegment( @@ -490,10 +486,7 @@ struct ExpandableSegment { if (!handle.shareableHandle) { uint64_t shareableHandle = 0; NPU_CHECK_ERROR(c10_npu::acl::AclrtMemExportToShareableHandle( - handle.handle, ACL_MEM_HANDLE_TYPE_NONE, 0, &shareableHandle)); - int32_t* pids = nullptr; - int pid_num = torch_npu::ipc::getPids(&pids); - NPU_CHECK_ERROR(c10_npu::acl::AclrtMemSetPidToShareableHandle(shareableHandle, pids, pid_num)); + handle.handle, ACL_MEM_HANDLE_TYPE_NONE, ACL_RT_VMM_FLAG_DISABLE_PID_VALIDATION, &shareableHandle)); handle.shareableHandle = shareableHandle; } uint64_t shandle = *handle.shareableHandle; @@ -506,7 +499,6 @@ struct ExpandableSegment { c10::DeviceIndex device, std::istream& buf) { - std::lock_guard lock(ipcHandleMutex); ShareHeader header{}; buf.read((char*)&header, sizeof(ShareHeader)); auto segment = std::make_unique( @@ -519,20 +511,11 @@ struct ExpandableSegment { uint64_t shareableHandle = 0; buf.read((char*)&shareableHandle, sizeof(uint64_t)); - auto iter = ipcShareableHandle_to_handle.find(shareableHandle); - if (iter != ipcShareableHandle_to_handle.end()) { - aclrtDrvMemHandle handle = iter->second; - segment->handles_.emplace_back(Handle{handle, shareableHandle}); - continue; - } - int32_t deviceId = static_cast(device); aclrtDrvMemHandle handle; NPU_CHECK_ERROR(c10_npu::acl::AclrtMemImportFromShareableHandle( shareableHandle, deviceId, &handle)); segment->handles_.emplace_back(Handle{handle, shareableHandle}); - ipcShareableHandle_to_handle.insert(iter, {shareableHandle, handle}); - ipcHandles.insert(handle); } segment->mapAndSetAccess(0, header.num_handles); return segment; @@ -600,13 +583,6 @@ private: Handle h = handles_.at(i).value(); handles_.at(i) = std::nullopt; NPU_CHECK_ERROR(c10_npu::acl::AclrtUnmapMem((char *)ptr_ + segment_size_ * i, getHcclComm())); - if (C10_UNLIKELY(h.shareableHandle)) { - std::lock_guard lock(ipcHandleMutex); - auto iter = ipcHandles.find(h.handle); - if (iter != ipcHandles.end()) { - continue; - } - } if (!pool) { NPU_CHECK_ERROR(c10_npu::acl::AclrtFreePhysical(h.handle)); } else { @@ -1688,10 +1664,7 @@ public: auto it = ipc_handle_map.find(base_ptr); if (it == ipc_handle_map.end()) { NPU_CHECK_ERROR(c10_npu::acl::AclrtIpcMemGetExportKey( - base_ptr, base_size, handle.data, ACL_IPC_HANDLE_SIZE, 0)); - int32_t* pids = nullptr; - size_t pid_num = torch_npu::ipc::getPids(&pids); - NPU_CHECK_ERROR(c10_npu::acl::AclrtIpcMemSetImportPid(handle.data, pids, pid_num)); + base_ptr, base_size, handle.data, ACL_IPC_HANDLE_SIZE, ACL_RT_IPC_MEM_FLAG_DISABLE_PID_VALIDATION)); ipc_handle_map[base_ptr] = handle; } else { handle = it->second; @@ -3664,7 +3637,8 @@ public: if (type == SHAREABLE_NPU_MALLOC) { handle_str handle_r; ss.read(handle_r.data, ACL_IPC_HANDLE_SIZE); - NPU_CHECK_ERROR(c10_npu::acl::AclrtIpcMemImportByKey(&npu_ipc_ptr_, handle_r.data, 0)); + NPU_CHECK_ERROR(c10_npu::acl::AclrtIpcMemImportByKey( + &npu_ipc_ptr_, handle_r.data, ACL_RT_IPC_MEM_FLAG_ENABLE_PEER_ACCESS)); handle_s.assign(handle_r.data, ACL_IPC_HANDLE_SIZE); } else if (type == SHAREABLE_NPU_EXPANDABLE_SEGMENT) { expandable_segment_ = diff --git a/torch_npu/csrc/core/npu/NPUIPCPidManager.cpp b/torch_npu/csrc/core/npu/NPUIPCPidManager.cpp deleted file mode 100644 index 393b4706c60decfb6171dfb50d8670d92f74b102..0000000000000000000000000000000000000000 --- a/torch_npu/csrc/core/npu/NPUIPCPidManager.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include "torch_npu/csrc/core/npu/NPUIPCPidManager.h" -namespace torch_npu { -namespace ipc { - -int32_t* pids = nullptr; -size_t pid_num = 0; -size_t capacity = 0; - -void addPid(int pid) -{ - const size_t requiredCapacity = pid_num + 1; - - if (requiredCapacity > capacity) { - size_t newCapacity = capacity + 10; - - int32_t* newArray = new int32_t[newCapacity]; - for (int i = 0; i < pid_num; ++i) { - newArray[i] = pids[i]; - } - - delete[] pids; - pids = newArray; - capacity = newCapacity; - } - - pids[pid_num++] = static_cast(pid); -} - -size_t getPids(int32_t** ret_pids) -{ - *ret_pids = pids; - return pid_num; -} - -} // namespace ipc -} // namespace torch_npu \ No newline at end of file diff --git a/torch_npu/csrc/core/npu/NPUIPCPidManager.h b/torch_npu/csrc/core/npu/NPUIPCPidManager.h deleted file mode 100644 index f27cd240d15723f743fbcefe7204c81588ca60b3..0000000000000000000000000000000000000000 --- a/torch_npu/csrc/core/npu/NPUIPCPidManager.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once -#include -#include - -namespace torch_npu { -namespace ipc { - -void addPid(int pid); -size_t getPids(int32_t** pids); - -} // namespace ipc -} // namespace torch_npu \ No newline at end of file diff --git a/torch_npu/csrc/npu/Module.cpp b/torch_npu/csrc/npu/Module.cpp index a4b63f599d3d2b26cbfd7b53528783d8e1f770ed..7255afcd7cb30ad1bb175dda59affbf022f13259 100644 --- a/torch_npu/csrc/npu/Module.cpp +++ b/torch_npu/csrc/npu/Module.cpp @@ -27,7 +27,6 @@ #include "torch_npu/csrc/core/npu/NPUQueue.h" #include "torch_npu/csrc/core/npu/NPUAffinityController.h" #include "torch_npu/csrc/core/npu/NPUPeerToPeerAccess.h" -#include "torch_npu/csrc/core/npu/NPUIPCPidManager.h" #include "torch_npu/csrc/core/npu/NPUGuard.h" #include "torch_npu/csrc/core/npu/NpuVariables.h" #include "torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.h" @@ -1765,43 +1764,6 @@ static PyObject* THNPModule_is_gte_cann_version(PyObject* self, PyObject *args) END_HANDLE_TH_ERRORS } -static PyObject* THNPModule_add_ipc_pid(PyObject* self, PyObject *args) -{ - HANDLE_TH_ERRORS - int pid; - if (!PyArg_ParseTuple(args, "i", &pid)) { - throw torch::TypeError("Pybind failed to parse parameters." + PTA_ERROR(ErrCode::TYPE)); - } - torch_npu::ipc::addPid(pid); - - Py_RETURN_NONE; - END_HANDLE_TH_ERRORS -} - -static PyObject* THNPModule_get_ipc_pid(PyObject* self, PyObject *noargs) -{ - HANDLE_TH_ERRORS - int32_t pid; - NPU_CHECK_ERROR(c10_npu::acl::AclrtDeviceGetBareTgid(&pid)); - return THPUtils_packInt32(pid); - END_HANDLE_TH_ERRORS -} - -static PyObject* THNPModule_add_p2p_access(PyObject* self, PyObject *args) -{ - HANDLE_TH_ERRORS - int src_dev; - int dst_dev; - if (!PyArg_ParseTuple(args, "ii", &src_dev, &dst_dev)) { - throw torch::TypeError("Pybind failed to parse parameters." + PTA_ERROR(ErrCode::TYPE)); - } - bool warning_flag = false; - at_npu::native::NpuP2pCtrl::get_instance().get_p2p_access(src_dev, dst_dev, warning_flag); - - Py_RETURN_NONE; - END_HANDLE_TH_ERRORS -} - static PyObject* THNPModule_set_device_res_limit(PyObject* self, PyObject *args) { HANDLE_TH_ERRORS @@ -2004,9 +1966,6 @@ static struct PyMethodDef THNPModule_methods[] = { {"_npu_clear_fft_plan_cache", (PyCFunction)THNPModule_npu_clear_fft_plan_cache, METH_NOARGS, nullptr}, {"_get_cann_version", (PyCFunction)THNPModule_get_cann_version, METH_O, nullptr}, {"_is_gte_cann_version", (PyCFunction)THNPModule_is_gte_cann_version, METH_VARARGS, nullptr}, - {"_add_ipc_pid", (PyCFunction)THNPModule_add_ipc_pid, METH_VARARGS, nullptr}, - {"_get_ipc_pid", (PyCFunction)THNPModule_get_ipc_pid, METH_NOARGS, nullptr}, - {"_add_p2p_access", (PyCFunction)THNPModule_add_p2p_access, METH_VARARGS, nullptr}, {"_npu_get_device_res_limit", (PyCFunction)THNPModule_get_device_res_limit, METH_VARARGS, nullptr}, {"_npu_set_device_res_limit", (PyCFunction)THNPModule_set_device_res_limit, METH_VARARGS, nullptr}, {"_npu_reset_device_res_limit", (PyCFunction)THNPModule_reset_device_res_limit, METH_O, nullptr},