diff --git a/third_party/acl/inc/acl/acl_rt.h b/third_party/acl/inc/acl/acl_rt.h
index 6d97252ba45482cc4592ff707264a74fab44f67f..4b897a33ec37eadd0cdfb4479454e32a45b59fe9 100755
--- a/third_party/acl/inc/acl/acl_rt.h
+++ b/third_party/acl/inc/acl/acl_rt.h
@@ -941,10 +941,11 @@ ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst,
 
 /**
  * @ingroup AscendCL
- * @brief  Asynchronous memory replication between Host and Device
+ * @brief  Asynchronous memory replication between Host and Device, would
+ *         be synchronous if memory is not allocated via calling acl or rts api.
  *
  * @par Function
- *  After calling this interface,
+ *  After calling this interface and memory is allocated via calling acl or rts api,
  *  be sure to call the aclrtSynchronizeStream interface to ensure that
  *  the task of memory replication has been completed
  *
diff --git a/torch_npu/csrc/core/npu/interface/AclInterface.cpp b/torch_npu/csrc/core/npu/interface/AclInterface.cpp
index e918d04ae73f2c5197533a53aaa8f117c6281da1..c97693eeb5bfddbec5eb57b9637f3bc5070d8360 100644
--- a/torch_npu/csrc/core/npu/interface/AclInterface.cpp
+++ b/torch_npu/csrc/core/npu/interface/AclInterface.cpp
@@ -1127,11 +1127,14 @@ aclError AclrtUnSubscribeReport(uint64_t theadId, aclrtStream stream)
 bool AclrtMemcpyAsyncWithConditionExist()
 {
     const static bool isAclrtMemcpyAsyncWithConditionExist = []() -> bool {
-        auto func  = GET_FUNC(aclrtMemcpyAsyncWithCondition)
+        auto func = GET_FUNC(aclrtMemcpyAsyncWithCondition)
+        bool flag = c10_npu::GetSocVersion() >= c10_npu::SocVersion::Ascend910B1 ? true : false;
         if (func != nullptr) {
             ASCEND_LOGI("Successfully to find function aclrtMemcpyAsyncWithCondition");
+        } else {
+            flag = false;
         }
-        return func != nullptr;
+        return flag;
     }();
     return isAclrtMemcpyAsyncWithConditionExist;
 }
diff --git a/torch_npu/csrc/framework/OpParamMaker.cpp b/torch_npu/csrc/framework/OpParamMaker.cpp
index 3abd06421a90eee27edfd0d95756546d3af2709d..9a949332c85e1f8a1918409dfc75bcb5db6eefe2 100644
--- a/torch_npu/csrc/framework/OpParamMaker.cpp
+++ b/torch_npu/csrc/framework/OpParamMaker.cpp
@@ -451,19 +451,35 @@ int MemcopyAsyncFunc(c10_npu::queue::QueueParas *in, aclrtStream stream)
 {
     auto cur_paras = static_cast<c10_npu::queue::CopyParas *>(in->paramVal);
     logger->debug("MemcopyAsyncFunc Run.");
-    aclError ret =
-        aclrtMemcpyAsync(cur_paras->dst, cur_paras->dstLen, cur_paras->src, cur_paras->srcLen, cur_paras->kind, stream);
+    aclError ret;
+    bool flag;
+    if (c10_npu::acl::AclrtMemcpyAsyncWithConditionExist() && cur_paras->kind == aclrtMemcpyKind::ACL_MEMCPY_DEVICE_TO_HOST) {
+        flag = true;
+        ret = c10_npu::acl::AclrtMemcpyAsyncWithCondition(cur_paras->dst, cur_paras->dstLen, cur_paras->src, cur_paras->srcLen, cur_paras->kind, stream);
+    } else {
+        flag = false;
+        ret = aclrtMemcpyAsync(cur_paras->dst, cur_paras->dstLen, cur_paras->src, cur_paras->srcLen, cur_paras->kind, stream);
+    }
     if (ret != ACL_ERROR_NONE) {
         auto ret_temp = c10_npu::acl::AclrtPeekAtLastError(ACL_RT_THREAD_LEVEL);
         if (ret_temp != ACL_ERROR_NONE) {
             ret = ret_temp;
         }
-        ASCEND_LOGE(
-            "aclrtMemcpyAsync error! ret = %d, dstLen = %zu, srcLen = %zu, kind = %d",
-            ret,
-            cur_paras->dstLen,
-            cur_paras->srcLen,
-            cur_paras->kind);
+        if (flag) {
+            ASCEND_LOGE(
+                "aclrtMemcpyAsyncWithCondition error! ret = %d, dstLen = %zu, srcLen = %zu, kind = %d",
+                ret,
+                cur_paras->dstLen,
+                cur_paras->srcLen,
+                cur_paras->kind);
+        } else {
+            ASCEND_LOGE(
+                "aclrtMemcpyAsync error! ret = %d, dstLen = %zu, srcLen = %zu, kind = %d",
+                ret,
+                cur_paras->dstLen,
+                cur_paras->srcLen,
+                cur_paras->kind);
+        }
     }
     logger->debug("MemcopyAsyncFunc Run, ret = %d.", ret);
     return ret;