diff --git a/torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.cpp b/torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.cpp
index b11003c51c787b869c4cdd4aff09e2e7a4589f27..a47d8be28c51b89ecaa157dabf0cb12081bfe918 100644
--- a/torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.cpp
+++ b/torch_npu/csrc/core/npu/sys_ctrl/npu_sys_ctrl.cpp
@@ -140,6 +140,10 @@ NpuSysCtrl::SysStatus NpuSysCtrl::Initialize(int device_id)
     ASCEND_LOGD("Npu workspace allocator initialize successfully");
     c10_npu::option::OptionsManager::IsOomSnapshotEnable();
 
+    if (device_id >= 0) {
+        NPU_CHECK_ERROR(c10_npu::SetDevice(device_id));
+    }
+
     if (c10_npu::option::OptionsManager::CheckAclDumpDateEnable()) {
         const char *aclConfigPath = "acl.json";
         NPU_CHECK_ERROR(aclmdlSetDump(aclConfigPath));
@@ -192,12 +196,6 @@ NpuSysCtrl::SysStatus NpuSysCtrl::LazyInitialize(int device_id)
 
     // There's no need to call c10_npu::GetDevice at the start of the process, because device 0 may not be needed
     auto ret = aclrtGetDevice(&device_id_);
-    if (ret != ACL_ERROR_NONE) {
-        device_id_ = (device_id == -1) ? 0 : device_id;
-        NPU_CHECK_ERROR(c10_npu::SetDevice(device_id_));
-    } else {
-        ASCEND_LOGW("Npu device %d has been set before global init.", device_id_);
-    }
 
     if (c10_npu::IsSupportInfNan()) {
         c10_npu::acl::AclrtSetDeviceSatMode(aclrtFloatOverflowMode::ACL_RT_OVERFLOW_MODE_INFNAN);