diff --git a/third_party/acl/inc/acl/acl_rt.h b/third_party/acl/inc/acl/acl_rt.h
index 6d97252ba45482cc4592ff707264a74fab44f67f..d8e24edf23fcd3b52beeebd701c5e50c1b8a9a8a 100755
--- a/third_party/acl/inc/acl/acl_rt.h
+++ b/third_party/acl/inc/acl/acl_rt.h
@@ -30,6 +30,13 @@ extern "C" {
 #define ACL_CONTINUE_ON_FAILURE 0x00000000u
 #define ACL_STOP_ON_FAILURE     0x00000001u
 
+#define ACL_RT_IPC_MEM_FLAG_DEFAULT                  0x0UL
+#define ACL_RT_IPC_MEM_FLAG_DISABLE_PID_VALIDATION   0x1UL
+#define ACL_RT_IPC_MEM_FLAG_ENABLE_PEER_ACCESS       0x2UL
+
+#define ACL_RT_VMM_FLAG_DEFAULT                      0x0UL
+#define ACL_RT_VMM_FLAG_DISABLE_PID_VALIDATION       0x1UL
+
 #define MAX_MEM_UCE_INFO_ARRAY_SIZE 128
 
 constexpr int32_t DEVICE_UTILIZATION_NOT_SUPPORT = -1;
diff --git a/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp b/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp
index b64552b804c45fa761ab7b1fc25357b611853db0..fab09b77fc6dbfbce2f0e6e2793c75aa409d39b5 100644
--- a/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp
+++ b/torch_npu/csrc/core/npu/NPUCachingAllocator.cpp
@@ -24,7 +24,6 @@
 #include "torch_npu/csrc/core/npu/GetCANNInfo.h"
 #include "torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.h"
 #include "torch_npu/csrc/core/npu/NPUEvent.h"
-#include "torch_npu/csrc/core/npu/NPUIPCPidManager.h"
 #include "torch_npu/csrc/profiler/npu_profiler.h"
 #ifndef BUILD_LIBTORCH
 #include "torch_npu/csrc/sanitizer/NPUTrace.h"
@@ -477,10 +476,7 @@ struct ExpandableSegment {
             if (!handle.shareableHandle) {
                 uint64_t shareableHandle = 0;
                 NPU_CHECK_ERROR(c10_npu::acl::AclrtMemExportToShareableHandle(
-                    handle.handle, ACL_MEM_HANDLE_TYPE_NONE, 0, &shareableHandle));
-                int32_t* pids = nullptr;
-                int pid_num = torch_npu::ipc::getPids(&pids);
-                NPU_CHECK_ERROR(c10_npu::acl::AclrtMemSetPidToShareableHandle(shareableHandle, pids, pid_num));
+                    handle.handle, ACL_MEM_HANDLE_TYPE_NONE, ACL_RT_VMM_FLAG_DISABLE_PID_VALIDATION, &shareableHandle));
                 handle.shareableHandle = shareableHandle;
             }
             uint64_t shandle = *handle.shareableHandle;
@@ -572,9 +568,7 @@ private:
         // cannot call c10::npu::stream_synchronize because
         // it might grab the GIL which can lead to a deadlock
         // Locking order must be GIL -> Allocator Lock
-        if (stream_) {
-            NPU_CHECK_ERROR(aclrtSynchronizeStream(*stream_));
-        } else {
+        {
             c10_npu::NPUGuard device_guard(device_);
             c10_npu::npuSynchronizeDevice(true);
         }
@@ -1673,10 +1667,7 @@ public:
             auto it = ipc_handle_map.find(base_ptr);
             if (it == ipc_handle_map.end()) {
                 NPU_CHECK_ERROR(c10_npu::acl::AclrtIpcMemGetExportKey(
-                    base_ptr, base_size, handle.data, ACL_IPC_HANDLE_SIZE, 0));
-                int32_t* pids = nullptr;
-                size_t pid_num = torch_npu::ipc::getPids(&pids);
-                NPU_CHECK_ERROR(c10_npu::acl::AclrtIpcMemSetImportPid(handle.data, pids, pid_num));
+                    base_ptr, base_size, handle.data, ACL_IPC_HANDLE_SIZE, ACL_RT_IPC_MEM_FLAG_DISABLE_PID_VALIDATION));
                 ipc_handle_map[base_ptr] = handle;
             } else {
                 handle = it->second;
@@ -3327,15 +3318,6 @@ public:
         ASCEND_LOGD("End empty cache with check_error = %d", check_error);
     }
 
-    void clearIpcHandles() override
-    {
-        std::lock_guard<std::mutex> lock(ipcHandleMutex);
-        for (auto &handle : ipcHandles) {
-            NPU_CHECK_ERROR(c10_npu::acl::AclrtFreePhysical(handle));
-        }
-        ipcHandles.clear();
-    }
-
     void *getBaseAllocation(void *ptr, size_t *outSize) override
     {
         Block *block = get_allocated_block(ptr);
@@ -3628,7 +3610,8 @@ public:
             if (type == SHAREABLE_NPU_MALLOC) {
                 handle_str handle_r;
                 ss.read(handle_r.data, ACL_IPC_HANDLE_SIZE);
-                NPU_CHECK_ERROR(c10_npu::acl::AclrtIpcMemImportByKey(&npu_ipc_ptr_, handle_r.data, 0));
+                NPU_CHECK_ERROR(c10_npu::acl::AclrtIpcMemImportByKey(
+                    &npu_ipc_ptr_, handle_r.data, ACL_RT_IPC_MEM_FLAG_ENABLE_PEER_ACCESS));
                 handle_s.assign(handle_r.data, ACL_IPC_HANDLE_SIZE);
             } else if (type == SHAREABLE_NPU_EXPANDABLE_SEGMENT) {
                 expandable_segment_ =
@@ -3648,7 +3631,10 @@ public:
         void clear()
         {
             if (npu_ipc_ptr_) {
-                c10_npu::NPUGuard device_guard(device_);
+                {
+                    c10_npu::NPUGuard device_guard(device_);
+                    c10_npu::npuSynchronizeDevice(true);
+                }
                 NPU_CHECK_ERROR(c10_npu::acl::AclrtIpcMemClose(handle_s.c_str()));
                 npu_ipc_ptr_ = nullptr;
             }
diff --git a/torch_npu/csrc/core/npu/NPUCachingAllocator.h b/torch_npu/csrc/core/npu/NPUCachingAllocator.h
index 30a42035ec1914d44e408e23520e3b593b2b37b6..a5368b705da1cf59e9ed40532283ad66872fec88 100644
--- a/torch_npu/csrc/core/npu/NPUCachingAllocator.h
+++ b/torch_npu/csrc/core/npu/NPUCachingAllocator.h
@@ -203,7 +203,6 @@ public:
     virtual bool initialized() = 0;
     virtual void setMemoryFraction(double fraction, int device) = 0;
     virtual void emptyCache(bool check_error) = 0;
-    virtual void clearIpcHandles() = 0;
     virtual void cacheInfo(int dev_id, size_t* cachedAndFree, size_t* largestBlock) = 0;
     virtual void* getBaseAllocation(void* ptr, size_t* size) = 0;
     virtual void recordStream(const c10::DataPtr& ptr, c10_npu::NPUStream stream) = 0;
@@ -311,11 +310,6 @@ C10_NPU_API inline void emptyCache(bool check_error = true)
     return get()->emptyCache(check_error);
 }
 
-inline void clearIpcHandles()
-{
-    return get()->clearIpcHandles();
-}
-
 inline void cacheInfo(int dev_id, size_t* cachedAndFree, size_t* largestBlock)
 {
     return get()->cacheInfo(dev_id, cachedAndFree, largestBlock);
diff --git a/torch_npu/csrc/core/npu/NPUIPCPidManager.cpp b/torch_npu/csrc/core/npu/NPUIPCPidManager.cpp
deleted file mode 100644
index 393b4706c60decfb6171dfb50d8670d92f74b102..0000000000000000000000000000000000000000
--- a/torch_npu/csrc/core/npu/NPUIPCPidManager.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "torch_npu/csrc/core/npu/NPUIPCPidManager.h"
-namespace torch_npu {
-namespace ipc {
-
-int32_t* pids = nullptr;
-size_t pid_num = 0;
-size_t capacity = 0;
-
-void addPid(int pid)
-{
-    const size_t requiredCapacity = pid_num + 1;
-
-    if (requiredCapacity > capacity) {
-        size_t newCapacity = capacity + 10;
-
-        int32_t* newArray = new int32_t[newCapacity];
-        for (int i = 0; i < pid_num; ++i) {
-            newArray[i] = pids[i];
-        }
-
-        delete[] pids;
-        pids = newArray;
-        capacity = newCapacity;
-    }
-
-    pids[pid_num++] = static_cast<int32_t>(pid);
-}
-
-size_t getPids(int32_t** ret_pids)
-{
-    *ret_pids = pids;
-    return pid_num;
-}
-
-} // namespace ipc
-} // namespace torch_npu
\ No newline at end of file
diff --git a/torch_npu/csrc/core/npu/NPUIPCPidManager.h b/torch_npu/csrc/core/npu/NPUIPCPidManager.h
deleted file mode 100644
index f27cd240d15723f743fbcefe7204c81588ca60b3..0000000000000000000000000000000000000000
--- a/torch_npu/csrc/core/npu/NPUIPCPidManager.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-#include <cstdint>
-#include <cstddef>
-
-namespace torch_npu {
-namespace ipc {
-
-void addPid(int pid);
-size_t getPids(int32_t** pids);
-
-} // namespace ipc
-} // namespace torch_npu
\ No newline at end of file
diff --git a/torch_npu/csrc/ipc/StorageSharing.cpp b/torch_npu/csrc/ipc/StorageSharing.cpp
index 18fdd4c5e0722bcde2133239e3ccf9c0f9ad6ba0..1de9d10dee47eab1625c89cae11b133442eb3c68 100644
--- a/torch_npu/csrc/ipc/StorageSharing.cpp
+++ b/torch_npu/csrc/ipc/StorageSharing.cpp
@@ -47,6 +47,7 @@ static PyObject* THNPStorage_shareNpu(PyObject* self, PyObject* args)
     }
 
     at::DeviceGuard device_guard(storage.device());
+    c10_npu::LazySetDevice(storage.device().index());
     THPObjectPtr tuple(PyTuple_New(8));
     THPObjectPtr device(THPUtils_packInt32(storage.device().index()));
     THPObjectPtr _handle(Py_None);
@@ -193,6 +194,7 @@ static PyObject* THNPStorage_newSharedNpu(PyObject* _unused, PyObject* args)
     const auto device = c10::checked_convert<c10::DeviceIndex>(
         THPUtils_unpackLong(_device), "c10::DeviceIndex");
     c10_npu::NPUGuard device_guard(device);
+    c10_npu::LazySetDevice(device);
 
     if (PyObject_IsTrue(_event_sync_required)) {
         // TO BE DONE
diff --git a/torch_npu/csrc/npu/Module.cpp b/torch_npu/csrc/npu/Module.cpp
index 9217bc30bcd5d2885926edaa15ffcc4619aceac8..d8ce4805e03596d3c67b1dd69af34903d94dd631 100644
--- a/torch_npu/csrc/npu/Module.cpp
+++ b/torch_npu/csrc/npu/Module.cpp
@@ -28,7 +28,6 @@
 #include "torch_npu/csrc/core/npu/NPUQueue.h"
 #include "torch_npu/csrc/core/npu/NPUAffinityController.h"
 #include "torch_npu/csrc/core/npu/NPUPeerToPeerAccess.h"
-#include "torch_npu/csrc/core/npu/NPUIPCPidManager.h"
 #include "torch_npu/csrc/core/npu/NPUGuard.h"
 #include "torch_npu/csrc/core/npu/NpuVariables.h"
 #include "torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.h"
@@ -1740,43 +1739,6 @@ static PyObject* THNPModule_is_gte_cann_version(PyObject* self, PyObject *args)
     END_HANDLE_TH_ERRORS
 }
 
-static PyObject* THNPModule_add_ipc_pid(PyObject* self, PyObject *args)
-{
-    HANDLE_TH_ERRORS
-    int pid;
-    if (!PyArg_ParseTuple(args, "i", &pid)) {
-        throw torch::TypeError("Pybind failed to parse parameters." + PTA_ERROR(ErrCode::TYPE));
-    }
-    torch_npu::ipc::addPid(pid);
- 
-    Py_RETURN_NONE;
-    END_HANDLE_TH_ERRORS
-}
-
-static PyObject* THNPModule_get_ipc_pid(PyObject* self, PyObject *noargs)
-{
-    HANDLE_TH_ERRORS
-    int32_t pid;
-    NPU_CHECK_ERROR(c10_npu::acl::AclrtDeviceGetBareTgid(&pid));
-    return THPUtils_packInt32(pid);
-    END_HANDLE_TH_ERRORS
-}
-
-static PyObject* THNPModule_add_p2p_access(PyObject* self, PyObject *args)
-{
-    HANDLE_TH_ERRORS
-    int src_dev;
-    int dst_dev;
-    if (!PyArg_ParseTuple(args, "ii", &src_dev, &dst_dev)) {
-        throw torch::TypeError("Pybind failed to parse parameters." + PTA_ERROR(ErrCode::TYPE));
-    }
-    bool warning_flag = false;
-    at_npu::native::NpuP2pCtrl::get_instance().get_p2p_access(src_dev, dst_dev, warning_flag);
- 
-    Py_RETURN_NONE;
-    END_HANDLE_TH_ERRORS
-}
-
 static PyObject* THNPModule_set_device_res_limit(PyObject* self, PyObject *args)
 {
     HANDLE_TH_ERRORS
@@ -1884,9 +1846,6 @@ static struct PyMethodDef THNPModule_methods[] = {
     {"_npu_clear_fft_plan_cache", (PyCFunction)THNPModule_npu_clear_fft_plan_cache, METH_NOARGS, nullptr},
     {"_get_cann_version", (PyCFunction)THNPModule_get_cann_version, METH_O, nullptr},
     {"_is_gte_cann_version", (PyCFunction)THNPModule_is_gte_cann_version, METH_VARARGS, nullptr},
-    {"_add_ipc_pid", (PyCFunction)THNPModule_add_ipc_pid, METH_VARARGS, nullptr},
-    {"_get_ipc_pid", (PyCFunction)THNPModule_get_ipc_pid, METH_NOARGS, nullptr},
-    {"_add_p2p_access", (PyCFunction)THNPModule_add_p2p_access, METH_VARARGS, nullptr},
     {"_npu_get_device_res_limit", (PyCFunction)THNPModule_get_device_res_limit, METH_VARARGS, nullptr},
     {"_npu_set_device_res_limit", (PyCFunction)THNPModule_set_device_res_limit, METH_VARARGS, nullptr},
     {"_npu_reset_device_res_limit", (PyCFunction)THNPModule_reset_device_res_limit, METH_O, nullptr},
diff --git a/torch_npu/csrc/npu/NPUPluggableAllocator.cpp b/torch_npu/csrc/npu/NPUPluggableAllocator.cpp
index 7610374a3ba35297c97eac5d17dbd11cc3bba0b9..14ea0ce7e73dbe0b18c255b8678c3a23ad44c5bc 100644
--- a/torch_npu/csrc/npu/NPUPluggableAllocator.cpp
+++ b/torch_npu/csrc/npu/NPUPluggableAllocator.cpp
@@ -189,12 +189,6 @@ void NPUPluggableAllocator::emptyCache(bool check_error)
     }
 }
 
-void NPUPluggableAllocator::clearIpcHandles()
-{
-    TORCH_NPU_WARN("NPUPluggableAllocator does not yet support clearIpcHandles. "
-                   "If you need it, please file an issue describing your use case.");
-}
-
 void NPUPluggableAllocator::cacheInfo(int dev_id, size_t* cachedAndFree, size_t* largestBlock)
 {
     TORCH_NPU_WARN("NPUPluggableAllocator does not yet support cacheInfo. "
diff --git a/torch_npu/csrc/npu/NPUPluggableAllocator.h b/torch_npu/csrc/npu/NPUPluggableAllocator.h
index 266db02a604c906f0e5a4abf6e07d0f407504613..a3691d48eefbaf3743f5ce29a304a0dab3560151 100644
--- a/torch_npu/csrc/npu/NPUPluggableAllocator.h
+++ b/torch_npu/csrc/npu/NPUPluggableAllocator.h
@@ -60,7 +60,6 @@ struct NPUPluggableAllocator
     bool initialized() override;
     void setMemoryFraction(double fraction, int device) override;
     void emptyCache(bool check_error) override;
-    void clearIpcHandles() override;
     void cacheInfo(int dev_id, size_t* cachedAndFree, size_t* largestBlock) override;
     void* getBaseAllocation(void* ptr, size_t* size) override;
     void recordStream(const c10::DataPtr&, streamType stream) override;