diff --git a/inferrt/src/CMakeLists.txt b/inferrt/src/CMakeLists.txt
index 901648c4d6d214cf82f1270512295e81bf90a633..55da6881fe3687196572a72b6a8b83fb6e27a599 100644
--- a/inferrt/src/CMakeLists.txt
+++ b/inferrt/src/CMakeLists.txt
@@ -41,11 +41,12 @@ add_subdirectory(optimize)
 add_subdirectory(pybind)
 add_subdirectory(runtime)
 
-set(OBJECTS lexer_obj parser_obj ir_obj compiler_obj vm_obj tensor_obj runtime_obj ops_obj pass_obj)
+set(OBJECTS lexer_obj parser_obj ir_obj compiler_obj vm_obj tensor_obj runtime_obj ops_obj pass_obj hardware_abstract_obj hardware_ascend_obj)
 
+set(TEST_OBJECTS test_obj)
 # Create da execution file
-add_executable(da lang/cli/main.cc lang/cli/options.cc)
-target_link_libraries(da ${OBJECTS} stdc++fs)
+# add_executable(da lang/cli/main.cc lang/cli/options.cc)
+# target_link_libraries(da ${OBJECTS} stdc++fs)
 
 # Create shared library. target_compile_options(inferrt PRIVATE -fPIC)
 add_library(inferrt SHARED lang/api/c_api.cc)
diff --git a/inferrt/src/common/common.h b/inferrt/src/common/common.h
index 78c58f9101ff7bcdf902433f619692be71459e43..16cbf7cf6fafebf5938621e1fccc72274573f049 100644
--- a/inferrt/src/common/common.h
+++ b/inferrt/src/common/common.h
@@ -23,7 +23,6 @@
 #include <locale>
 #include <sstream>
 #include <string>
-
 #include "common/logger.h"
 
 #define ENDL '\n'
@@ -48,6 +47,146 @@
   ;          \
   ;
 
+#define DISABLE_COPY_AND_ASSIGN(ClassType) \
+  ClassType(const ClassType &) = delete;   \
+  ClassType &operator=(const ClassType &) = delete;
+
+inline uint32_t LongToUint(int64_t u) {
+  if (u < 0) {
+    LOG_ERROR << "The int64_t value(" << u << ") is less than 0.";
+  }
+  if (u > static_cast<int64_t>((std::numeric_limits<uint32_t>::max)())) {
+    LOG_ERROR << "The int64_t value(" << u << ") exceeds the maximum value of uint32_t.";
+  }
+  return static_cast<uint32_t>(u);
+}
+
+inline size_t FloatToSize(float u) {
+  if (u < 0) {
+    LOG_ERROR << "The float value(" << u << ") is less than 0.";
+  }
+
+  if (u > static_cast<float>((std::numeric_limits<size_t>::max)())) {
+    LOG_ERROR << "The float value(" << u << ") exceeds the maximum value of size_t.";
+  }
+  return static_cast<size_t>(u);
+}
+inline float IntToFloat(int32_t v) { return static_cast<float>(v); }
+
+inline size_t LongToSize(int64_t u) {
+  if (u < 0) {
+    LOG_ERROR << "The int64_t value(" << u << ") is less than 0.";
+  }
+  return static_cast<size_t>(u);
+}
+
+inline int FloatToInt(float u) {
+  if (u > static_cast<float>((std::numeric_limits<int>::max)())) {
+    LOG_ERROR << "The float value(" << u << ") exceeds the maximum value of int.";
+  }
+  return static_cast<int>(u);
+}
+
+inline int FloatToLong(float u) {
+  if (u > static_cast<float>((std::numeric_limits<int64_t>::max)())) {
+    LOG_ERROR << "The float value(" << u << ") exceeds the maximum value of int64_t.";
+  }
+  return static_cast<int64_t>(u);
+}
+
+inline int64_t DoubleToLong(double u) {
+  if (u > static_cast<double>((std::numeric_limits<int64_t>::max)())) {
+    LOG_ERROR << "The double value(" << u << ") exceeds the maximum value of int64_t.";
+  }
+  return static_cast<int64_t>(u);
+}
+
+inline float SizeToFloat(size_t v) { return static_cast<float>(v); }
+
+inline uint64_t SizeToUlong(size_t u) { return static_cast<uint64_t>(u); }
+
+inline int SizeToInt(size_t u) {
+  if (u > static_cast<size_t>((std::numeric_limits<int>::max)())) {
+    LOG_ERROR << "The size_t value(" << u << ") exceeds the maximum value of int.";
+  }
+  return static_cast<int>(u);
+}
+
+inline uint32_t SizeToUint(size_t u) {
+  if (u > static_cast<size_t>((std::numeric_limits<uint32_t>::max)())) {
+    LOG_ERROR << "The size_t value(" << u << ") exceeds the maximum value of uint32_t.";
+  }
+  return static_cast<uint32_t>(u);
+}
+
+inline int64_t SizeToLong(size_t u) {
+  if (u > static_cast<size_t>((std::numeric_limits<int64_t>::max)())) {
+    LOG_ERROR << "The size_t value(" << u << ") exceeds the maximum value of int64_t.";
+  }
+  return static_cast<int64_t>(u);
+}
+
+inline double LongToDouble(int64_t v) { return static_cast<double>(v); }
+
+inline float LongToFloat(int64_t v) { return static_cast<float>(v); }
+
+inline double FloatToDouble(float v) { return static_cast<double>(v); }
+
+inline uint32_t IntToUint(int32_t u) {
+  if (u < 0) {
+    LOG_ERROR << "The int32_t value(" << u << ") is less than 0.";
+  }
+  return static_cast<uint32_t>(u);
+}
+
+inline int32_t UintToInt(uint32_t u) {
+  if (u > static_cast<uint32_t>((std::numeric_limits<int32_t>::max)())) {
+    LOG_ERROR << "The uint32_t value(" << u << ") exceeds the maximum value of int32_t.";
+  }
+  return static_cast<int32_t>(u);
+}
+
+inline uint64_t LongToUlong(int64_t u) {
+  if (u < 0) {
+    LOG_ERROR << "The int64_t value(" << u << ") is less than 0.";
+  }
+  return static_cast<uint64_t>(u);
+}
+
+inline int32_t LongToInt(int64_t u) {
+  if (u > static_cast<int64_t>((std::numeric_limits<int32_t>::max)())) {
+    LOG_ERROR << "The size_t value(" << u << ") exceeds the maximum value of int.";
+  }
+  return static_cast<int32_t>(u);
+}
+
+inline int64_t IntToLong(int32_t v) { return static_cast<int64_t>(v); }
+
+inline int64_t UlongToLong(uint64_t u) {
+  if (u > static_cast<uint64_t>((std::numeric_limits<int64_t>::max)())) {
+    LOG_ERROR << "The uint64_t value(" << u << ") exceeds the maximum value of int64_t.";
+  }
+  return static_cast<int64_t>(u);
+}
+
+inline unsigned int UlongToUint(uint64_t u) {
+  if (u > static_cast<uint64_t>((std::numeric_limits<unsigned int>::max)())) {
+    LOG_ERROR << "The size_t value(" << u << ") exceeds the maximum value of unsigned int.";
+  }
+  return static_cast<unsigned int>(u);
+}
+
+inline uint8_t *AddressOffset(void *address, size_t offset) {
+  CHECK_IF_NULL(address);
+  return static_cast<uint8_t *>(address) + offset;
+}
+
+inline size_t CalAddressOffset(void *dst_address, void *ori_address) {
+  CHECK_IF_NULL(dst_address);
+  CHECK_IF_NULL(ori_address);
+  return static_cast<uint8_t *>(dst_address) - static_cast<uint8_t *>(ori_address);
+}
+
 static inline void CompileMessage(const std::string &filename, const int line, const int col, const std::string &msg) {
   std::cout << filename << ':' << line << ':' << (col + 1) << ": " << msg << '\n';
 }
@@ -73,6 +212,16 @@ static inline size_t SkipWhiteSpace(const char *str) {
   return pos;
 }
 
+#ifndef MS_UNLIKELY
+#ifdef _MSC_VER
+#define MS_UNLIKELY(x) (x)
+#define MS_LIKELY(x) (x)
+#else
+#define MS_LIKELY(x) __builtin_expect(!!(x), 1)
+#define MS_UNLIKELY(x) __builtin_expect(!!(x), 0)
+#endif
+#endif
+
 template <typename T>
 int FindNameIndex(const char *str, T *table, size_t tableSize) {
   const auto strLen = strlen(str);
diff --git a/inferrt/src/hardware/CMakeLists.txt b/inferrt/src/hardware/CMakeLists.txt
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..684141638e951733347e4c69a168c2553358c012 100644
--- a/inferrt/src/hardware/CMakeLists.txt
+++ b/inferrt/src/hardware/CMakeLists.txt
@@ -0,0 +1,5 @@
+
+add_subdirectory(hardware_abstract)
+add_subdirectory(ascend)
+add_dependencies(hardware_ascend_obj hardware_abstract_obj)
+add_subdirectory(tests)
diff --git a/inferrt/src/hardware/ascend/CMakeLists.txt b/inferrt/src/hardware/ascend/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2d98dc376af0179525a2ec27a380f060450445e
--- /dev/null
+++ b/inferrt/src/hardware/ascend/CMakeLists.txt
@@ -0,0 +1,21 @@
+check_debug_log_out()
+
+if(DEFINED ENV{ASCEND_CUSTOM_PATH})
+  set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH})
+else()
+  set(ASCEND_PATH /usr/local/Ascend)
+endif()
+
+message("Note compile ascend path: ${ASCEND_PATH}")
+include_directories(${ASCEND_PATH}/latest/include/)
+
+find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
+include_directories(${Python3_INCLUDE_DIRS})
+
+
+set(depname "pybind11")
+set(PYBIND11_PATH "${PROJECT_SOURCE_DIR}/${depname}-src")
+include_directories(${PYBIND11_PATH}/include)
+
+file(GLOB_RECURSE HARDWARE_ASCEND_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+add_library(hardware_ascend_obj STATIC ${HARDWARE_ASCEND_SRC_FILES})
\ No newline at end of file
diff --git a/inferrt/src/hardware/ascend/ascend_device_context.cc b/inferrt/src/hardware/ascend/ascend_device_context.cc
new file mode 100644
index 0000000000000000000000000000000000000000..6e50a7d4d3d471d0939030a4840c9a8b5b31f1c5
--- /dev/null
+++ b/inferrt/src/hardware/ascend/ascend_device_context.cc
@@ -0,0 +1,129 @@
+/**
+ * Copyright 2022-2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/ascend/ascend_device_context.h"
+#include <tuple>
+#include <algorithm>
+#include <sstream>
+#include <map>
+#include <thread>
+#include <set>
+#include "hardware/hardware_abstract/common.h"
+#include "hardware/hardware_abstract/device_context_manager.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_base_symbol.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h"
+#include "hardware/ascend/res_manager/symbol_interface/symbol_utils.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_compiler_symbol.h"
+#include "common/common.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+namespace {
+constexpr auto kSaturationMode = "Saturation";
+constexpr auto kINFNANMode = "INFNAN";
+const char kAscendDevice[] = "Ascend";
+}  // namespace
+
+void AscendDeviceContext::InitializeForAclop() const {
+  if (initialized_aclop_) {
+    return;
+  }
+
+  LOG_OUT << "Start initializing for acl.";
+  LoadAscendApiSymbols();
+  // if (!UseSimulationApi()) {
+  //   auto ms_context = MsContext::GetInstance();
+  //   CHECK_IF_NULL(ms_context);
+  //   auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
+  //   device::DeviceContextKey host_key = {device::GetDeviceNameByType(device::DeviceType::kAscend), device_id};
+  //   device::DeviceContext *host_context =
+  //     device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(host_key);
+  //   CHECK_IF_NULL(host_context);
+  //   CHECK_IF_NULL(host_context->device_res_manager_);
+  //   auto ascend_res_manager = dynamic_cast<device::ascend::AscendResManager
+  //   *>(host_context->device_res_manager_.get()); ascend_res_manager->InitializeForGe();
+  // }
+
+  // initialized_aclop_ = true;
+  LOG_OUT << "End initializing for acl.";
+}
+
+void AscendDeviceContext::Initialize() {
+  GilReleaseWithCheck gil_release;
+  std::lock_guard<std::mutex> lock(init_mutex_);
+  if (initialized_) {
+    return;
+  }
+
+  LOG_OUT << "Start initializing device context.";
+  LoadAscendApiSymbols();
+  // set overflow mode
+  // auto ms_context = MsContext::GetInstance();
+  // CHECK_IF_NULL(ms_context);
+  // const auto &soc_version = ms_context->ascend_soc_version();
+  // if (soc_version == "ascend910b" || soc_version == "ascend910_93") {
+  //   bool is_sat = (common::GetEnv("MS_ASCEND_CHECK_OVERFLOW_MODE") == "SATURATION_MODE");
+  //   auto mode = (is_sat) ? aclrtFloatOverflowMode::ACL_RT_OVERFLOW_MODE_SATURATION
+  //                        : aclrtFloatOverflowMode::ACL_RT_OVERFLOW_MODE_INFNAN;
+  //   auto overflow_mode = (is_sat) ? kSaturationMode : kINFNANMode;
+  //   MS_LOG(INFO) << "The current overflow detection mode is " << overflow_mode << ".";
+  //   auto ret = CALL_ASCEND_API(aclrtSetDeviceSatMode, mode);
+  //   if (ret != ACL_SUCCESS) {
+  //     MS_LOG(EXCEPTION) << "Set " << overflow_mode << " mode failed.";
+  //   }
+  // }
+
+  CHECK_IF_NULL(device_res_manager_);
+  device_res_manager_->Initialize();
+
+  // set MS_CTX_ENABLE_GE_HETEROGENOUS true according to heterogeneous mode
+  // ms_context->set_param<bool>(MS_CTX_ENABLE_GE_HETEROGENOUS, false);
+
+  // if (ms_context->GetBackend() == kBackendGE) {
+  //   InitializeForAclop();
+  // }
+
+  initialized_ = true;
+  pid_ = getpid();  // set the pid when first initialize
+  LOG_OUT << "End initializing device context.";
+}
+
+void AscendDeviceContext::Destroy() {
+  if (pid_ != getpid()) {
+    // Check whether the device context needs to be released.
+    // The device context is copied by the dataset independent process, but does not need to be released
+    // in the dataset independent process.
+    // The device context is copied from main process by fork
+    LOG_OUT << "The device context is not initialized by current process, it doesn't need to be destroyed.";
+    return;
+  }
+
+  if (device_res_manager_ == nullptr) {
+    return;
+  }
+  // Device resource manager must be destroyed before 'FinalizeGe' unless some runtime APIs will throw exception.
+  // for ge, has destropy in graph_executor->finalize
+  device_res_manager_->Destroy();
+  // device::ascend::AclnnFinalize();
+
+  initialized_ = false;
+}
+
+MS_REGISTER_DEVICE(kAscendDevice, AscendDeviceContext);
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/ascend/ascend_device_context.h b/inferrt/src/hardware/ascend/ascend_device_context.h
new file mode 100644
index 0000000000000000000000000000000000000000..2a2b3c6f976369134520079f6f164bf254f2daf8
--- /dev/null
+++ b/inferrt/src/hardware/ascend/ascend_device_context.h
@@ -0,0 +1,53 @@
+/**
+ * Copyright 2022-2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_RUNTIME_HARDWARE_ASCEND_ASCEND_DEVICE_CONTEXT_H_
+#define MINDSPORE_CCSRC_RUNTIME_HARDWARE_ASCEND_ASCEND_DEVICE_CONTEXT_H_
+
+#include <memory>
+#include <string>
+#include <map>
+#include "common/common.h"
+#include "hardware/hardware_abstract/device_context.h"
+#include "hardware/hardware_abstract/memory_manager.h"
+#include "hardware/ascend/res_manager/ascend_res_manager.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+class AscendResManager;
+
+class AscendDeviceContext : public DeviceInterface<AscendResManager> {
+ public:
+  explicit AscendDeviceContext(const DeviceContextKey &device_context_key) : DeviceInterface(device_context_key) {}
+  ~AscendDeviceContext() override = default;
+
+  void Initialize() override;
+
+  void InitializeForAclop() const;
+
+  void Destroy() override;
+
+ private:
+  DISABLE_COPY_AND_ASSIGN(AscendDeviceContext);
+
+  mutable bool initialized_aclop_{false};
+  pid_t pid_;  // Indicates the process id which creates the context.
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_RUNTIME_HARDWARE_ASCEND_ASCEND_DEVICE_CONTEXT_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/ascend_event.cc b/inferrt/src/hardware/ascend/res_manager/ascend_event.cc
new file mode 100644
index 0000000000000000000000000000000000000000..29d645a16d6d72aa654ae87f66ca9bf759df975b
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/ascend_event.cc
@@ -0,0 +1,204 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/ascend/res_manager/ascend_event.h"
+#include <cstdint>
+#include <string>
+#include "hardware/ascend/res_manager/ascend_stream_manager.h"
+
+#include "common/common.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h"
+#include "hardware/ascend/res_manager/symbol_interface/symbol_utils.h"
+
+namespace mindspore::device::ascend {
+AscendEvent::AscendEvent() {
+  auto ret = CALL_ASCEND_API(aclrtCreateEvent, &event_);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtCreateEvent failed, ret:" << ret;
+    event_ = nullptr;
+  }
+}
+
+AscendEvent::AscendEvent(uint32_t flag, bool use_extensional_api) {
+  aclError ret;
+  if (use_extensional_api) {
+    ret = CALL_ASCEND_API(aclrtCreateEventExWithFlag, &event_, flag);
+  } else {
+    ret = CALL_ASCEND_API(aclrtCreateEventWithFlag, &event_, flag);
+  }
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtCreateEventExWithFlag failed, ret:" << ret;
+    event_ = nullptr;
+  }
+  has_flag_ = true;
+  LOG_OUT << "Create ascend event success, flag : " << flag << ".";
+}
+
+AscendTimeEvent::AscendTimeEvent() {
+  auto ret = CALL_ASCEND_API(aclrtCreateEventWithFlag, &event_, ACL_EVENT_TIME_LINE);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtCreateEvent failed, ret:" << ret;
+    event_ = nullptr;
+  }
+}
+
+AscendEvent::~AscendEvent() {
+  if (!event_destroyed_) {
+    auto ret = CALL_ASCEND_API(aclrtDestroyEvent, event_);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "aclrtDestroyEvent failed, ret:" << ret;
+    }
+  }
+
+  event_ = nullptr;
+  wait_stream_ = nullptr;
+  record_stream_ = nullptr;
+}
+
+bool AscendEvent::IsReady() const { return event_ != nullptr; }
+
+void AscendEvent::RecordEvent() {
+  CHECK_IF_NULL(event_);
+  CHECK_IF_NULL(record_stream_);
+  auto ret = CALL_ASCEND_API(aclrtRecordEvent, event_, record_stream_);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtRecordEvent failed, ret:" << ret;
+  }
+  need_wait_ = true;
+}
+
+void AscendEvent::RecordEvent(uint32_t stream_id) {
+  LOG_OUT << "Ascend record event on stream id : " << stream_id << ".";
+  CHECK_IF_NULL(event_);
+  record_stream_ = AscendStreamMng::GetInstance().GetStream(stream_id);
+  CHECK_IF_NULL(record_stream_);
+  auto ret = CALL_ASCEND_API(aclrtRecordEvent, event_, record_stream_);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtRecordEvent failed, ret:" << ret;
+  }
+  need_wait_ = true;
+}
+
+void AscendEvent::WaitEvent() {
+  CHECK_IF_NULL(event_);
+  CHECK_IF_NULL(wait_stream_);
+  auto ret = CALL_ASCEND_API(aclrtStreamWaitEvent, wait_stream_, event_);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtStreamWaitEvent failed, ret:" << ret;
+  }
+  if (!has_flag_) {
+    // The event created by aclrtCreateEventExWithFlag is not support to call
+    // aclrtResetEvent/aclrtQueryEvent/aclrtQueryEventWaitStatus.
+    LOG_OUT << "Reset Event";
+    ret = CALL_ASCEND_API(aclrtResetEvent, event_, wait_stream_);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "aclrtResetEvent failed, ret:" << ret;
+    }
+  }
+  need_wait_ = false;
+}
+
+bool AscendEvent::WaitEvent(uint32_t stream_id) {
+  LOG_OUT << "Ascend wait event on stream id : " << stream_id << ".";
+  wait_stream_ = AscendStreamMng::GetInstance().GetStream(stream_id);
+  auto ret = CALL_ASCEND_API(aclrtStreamWaitEvent, wait_stream_, event_);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtStreamWaitEvent failed, ret:" << ret;
+  }
+  if (!has_flag_) {
+    // Reset event after wait so that event can be reused.
+    ret = CALL_ASCEND_API(aclrtResetEvent, event_, wait_stream_);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "aclrtResetEvent failed, ret:" << ret;
+    }
+  }
+  need_wait_ = false;
+  return true;
+}
+
+void AscendEvent::WaitEventWithoutReset() {
+  CHECK_IF_NULL(event_);
+  CHECK_IF_NULL(wait_stream_);
+  // Query result will be reset after aclrtResetEvent is called.
+  auto ret = CALL_ASCEND_API(aclrtStreamWaitEvent, wait_stream_, event_);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtStreamWaitEvent failed, ret:" << ret;
+  }
+  need_wait_ = false;
+}
+
+void AscendEvent::WaitEventWithoutReset(uint32_t stream_id) {
+  wait_stream_ = AscendStreamMng::GetInstance().GetStream(stream_id);
+  WaitEventWithoutReset();
+}
+
+void AscendEvent::ResetEvent() {
+  CHECK_IF_NULL(event_);
+  CHECK_IF_NULL(wait_stream_);
+
+  LOG_OUT << "Reset Event";
+  auto ret = CALL_ASCEND_API(aclrtResetEvent, event_, wait_stream_);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtResetEvent failed, ret:" << ret;
+  }
+}
+
+void AscendEvent::ResetEvent(uint32_t stream_id) {
+  wait_stream_ = AscendStreamMng::GetInstance().GetStream(stream_id);
+  ResetEvent();
+}
+
+void AscendEvent::SyncEvent() {
+  CHECK_IF_NULL(event_);
+  auto ret = CALL_ASCEND_API(aclrtSynchronizeEvent, event_);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtSynchronizeEvent failed, ret:" << ret;
+  }
+}
+
+bool AscendEvent::QueryEvent() {
+  CHECK_IF_NULL(event_);
+  aclrtEventRecordedStatus status;
+  auto ret = CALL_ASCEND_API(aclrtQueryEventStatus, event_, &status);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclQueryEventStatus failed, ret:" << ret;
+  }
+  return status == ACL_EVENT_RECORDED_STATUS_COMPLETE;
+}
+
+void AscendEvent::ElapsedTime(float *cost_time, const DeviceEvent *other) {
+  CHECK_IF_NULL(event_);
+  auto ascend_other = static_cast<const AscendEvent *>(other);
+  CHECK_IF_NULL(ascend_other);
+  CHECK_IF_NULL(ascend_other->event_);
+  auto ret = CALL_ASCEND_API(aclrtEventElapsedTime, cost_time, event_, ascend_other->event_);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtEventElapsedTime failed, ret:" << ret;
+  }
+}
+
+bool AscendEvent::NeedWait() { return need_wait_; }
+
+bool AscendEvent::DestroyEvent() {
+  CHECK_IF_NULL(event_);
+  auto ret = CALL_ASCEND_API(aclrtDestroyEvent, event_);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtDestroyEvent failed, ret:" << ret;
+  }
+  event_destroyed_ = true;
+  return true;
+}
+}  // namespace mindspore::device::ascend
diff --git a/inferrt/src/hardware/ascend/res_manager/ascend_event.h b/inferrt/src/hardware/ascend/res_manager/ascend_event.h
new file mode 100644
index 0000000000000000000000000000000000000000..8dc5f87d04ed1ab113732ce8a995186904dfa0f4
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/ascend_event.h
@@ -0,0 +1,67 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_ASCEND_EVENT_H
+#define MINDSPORE_ASCEND_EVENT_H
+
+#include "hardware/hardware_abstract/device_event.h"
+#include "acl/acl_rt.h"
+#include "hardware/hardware_abstract/visible.h"
+
+namespace mindspore::device::ascend {
+constexpr uint32_t ACL_EVENT_DEFAULT = 0x0000000Eu;
+
+class HARDWARE_EXPORT AscendEvent : public DeviceEvent {
+ public:
+  AscendEvent();
+  explicit AscendEvent(uint32_t flag, bool use_extensional_api = true);
+  ~AscendEvent() override;
+
+  bool IsReady() const override;
+  void WaitEvent() override;
+  bool WaitEvent(uint32_t stream_id) override;
+  void WaitEventWithoutReset() override;
+  void WaitEventWithoutReset(uint32_t stream_id) override;
+
+  void ResetEvent() override;
+  void ResetEvent(uint32_t stream_id) override;
+
+  void RecordEvent() override;
+  void RecordEvent(uint32_t stream_id) override;
+  bool NeedWait() override;
+  void SyncEvent() override;
+  bool QueryEvent() override;
+  void ElapsedTime(float *cost_time, const DeviceEvent *other) override;
+  bool DestroyEvent() override;
+  void set_wait_stream(aclrtStream wait_stream) override { wait_stream_ = wait_stream; }
+  void set_record_stream(aclrtStream record_stream) override { record_stream_ = record_stream; }
+
+ protected:
+  aclrtEvent event_{nullptr};
+  aclrtStream wait_stream_{nullptr};
+  aclrtStream record_stream_{nullptr};
+  bool need_wait_{false};
+  bool event_destroyed_{false};
+  bool has_flag_{false};
+};
+
+class HARDWARE_EXPORT AscendTimeEvent : public AscendEvent {
+ public:
+  AscendTimeEvent();
+  ~AscendTimeEvent() override = default;
+};
+}  // namespace mindspore::device::ascend
+#endif  // MINDSPORE_ASCEND_EVENT_H
diff --git a/inferrt/src/hardware/ascend/res_manager/ascend_hal_manager.cc b/inferrt/src/hardware/ascend/res_manager/ascend_hal_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ffd7129ddc0d2c7ac1a6e4c5904a3dafcd08118f
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/ascend_hal_manager.cc
@@ -0,0 +1,209 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/ascend/res_manager/ascend_hal_manager.h"
+
+#include <unistd.h>
+#include <fstream>
+#include <string>
+#include "common/common.h"
+#include "acl/acl_rt.h"
+
+#include "hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_symbol.h"
+#include "hardware/ascend/res_manager/symbol_interface/symbol_utils.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+namespace {
+constexpr auto kSaturationMode = "Saturation";
+constexpr auto kINFNANMode = "INFNAN";
+
+// std::string GenerateAclInitJsonPath() {
+//   const pid_t pid = getpid();
+//   std::string rankid_str = common::GetEnv("RANK_ID");
+//   if (mindspore::DistributedMeta::GetInstance()->initialized()) {
+//     rankid_str = std::to_string(mindspore::DistributedMeta::GetInstance()->global_rank_id());
+//   }
+//   constexpr size_t random_len = 12;
+//   auto rand_str = Common::GetRandomStr(random_len);
+//   return "/tmp/aclinit_" + rankid_str + "_" + std::to_string(pid) + "_" + rand_str + ".json";
+// }
+}  // namespace
+static thread_local aclrtContext thread_local_rt_context{nullptr};
+
+AscendHalManager AscendHalManager::instance_{};
+AscendHalManager &AscendHalManager::GetInstance() { return instance_; }
+
+void AscendHalManager::InitDevice(uint32_t device_id) {
+  LOG_OUT << "Enter SetRtDevice, current initialize device number:" << initialized_device_set_.size();
+  if (initialized_device_set_.find(device_id) != initialized_device_set_.end()) {
+    LOG_OUT << "Device " << device_id << " has been set";
+    return;
+  }
+
+  auto ret = CALL_ASCEND_API(aclrtSetDevice, UintToInt(device_id));
+  if (ret != ACL_SUCCESS) {
+    auto device_count = GetDeviceCount();
+    LOG_ERROR << "Call aclrtSetDevice failed, ret[" << static_cast<int>(ret) << "]. Got device count[" << device_count
+              << "] and device id[" << device_id << "], please check if device id is valid.";
+  }
+
+  aclrtContext rt_context;
+  ret = CALL_ASCEND_API(aclrtGetCurrentContext, &rt_context);
+  if (ret != ACL_SUCCESS || rt_context == nullptr) {
+    LOG_ERROR << "Call aclrtGetCurrentContext failed, ret[" << ret << "]";
+    return;
+  }
+
+  default_device_context_map_[device_id] = rt_context;
+  (void)initialized_device_set_.insert(device_id);
+}
+
+void AscendHalManager::ResetDevice(uint32_t device_id) {
+  if (initialized_device_set_.find(device_id) != initialized_device_set_.end()) {
+    auto ret = CALL_ASCEND_API(aclrtResetDevice, UintToInt(device_id));
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "Call aclrtResetDevice, ret[" << ret << "]";
+    }
+    default_device_context_map_[device_id] = nullptr;
+    (void)initialized_device_set_.erase(device_id);
+  }
+}
+
+uint32_t AscendHalManager::GetDeviceCount() {
+  uint32_t device_count = 0;
+  auto ret = CALL_ASCEND_API(aclrtGetDeviceCount, &device_count);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Call rtGetDeviceCount, ret[" << static_cast<int>(ret) << "]";
+  }
+  return device_count;
+}
+
+void AscendHalManager::SetDeviceSatMode(const aclrtFloatOverflowMode &overflow_mode) {
+  auto overflow_mode_str =
+    (overflow_mode == aclrtFloatOverflowMode::ACL_RT_OVERFLOW_MODE_SATURATION) ? kSaturationMode : kINFNANMode;
+  LOG_OUT << "The current overflow detection mode is " << overflow_mode_str << ".";
+  auto ret = CALL_ASCEND_API(aclrtSetDeviceSatMode, overflow_mode);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Set " << overflow_mode_str << " mode failed.";
+  }
+}
+
+void AscendHalManager::SetOpWaitTimeout(uint32_t op_wait_timeout) {
+  LOG_OUT << "Set op wait timeout: " << op_wait_timeout << " s";
+  auto acl_ret = CALL_ASCEND_API(aclrtSetOpWaitTimeout, op_wait_timeout);
+  if (acl_ret != ACL_SUCCESS) {
+    LOG_ERROR << "Set op wait timeout failed, error: " << acl_ret;
+  }
+}
+
+void AscendHalManager::SetOpExecuteTimeOut(uint32_t op_execute_timeout) {
+  LOG_OUT << "Set op execute timeout: " << op_execute_timeout << " s";
+  auto acl_ret = CALL_ASCEND_API(aclrtSetOpExecuteTimeOut, op_execute_timeout);
+  if (acl_ret != ACL_SUCCESS) {
+    LOG_ERROR << "Set op execute timeout failed, error: " << acl_ret;
+  }
+}
+
+aclrtContext AscendHalManager::CreateContext(uint32_t device_id) {
+  aclrtContext rt_context;
+  auto ret = CALL_ASCEND_API(aclrtCreateContext, &rt_context, device_id);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Call aclrtCreateContext failed, ret: " << ret;
+  }
+  rt_contexts_.insert(rt_context);
+  return rt_context;
+}
+
+void AscendHalManager::ResetContext(uint32_t device_id) {
+  aclrtContext rt_context = CreateContext(device_id);
+  default_device_context_map_[device_id] = rt_context;
+}
+
+void AscendHalManager::DestroyContext(aclrtContext context) {
+  auto ret = CALL_ASCEND_API(aclrtDestroyContext, context);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Failed to destroy context, ret = " << ret << ".";
+  }
+  rt_contexts_.erase(context);
+}
+
+void AscendHalManager::DestroyAllContext() {
+  for (auto context : rt_contexts_) {
+    auto ret = CALL_ASCEND_API(aclrtDestroyContext, context);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "Failed to destroy context, ret = " << ret << ".";
+    }
+  }
+  rt_contexts_.clear();
+}
+
+void AscendHalManager::SetContextForce(uint32_t device_id) {
+  if (default_device_context_map_[device_id] == nullptr) {
+    return;
+  }
+  auto ret = CALL_ASCEND_API(aclrtSetCurrentContext, default_device_context_map_[device_id]);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Call aclrtSetCurrentContext, ret[" << ret << "]";
+  }
+}
+
+void AscendHalManager::SetContext(uint32_t device_id) {
+  if (default_device_context_map_[device_id] == nullptr) {
+    return;
+  }
+  if (thread_local_rt_context == default_device_context_map_[device_id]) {
+    return;
+  }
+  auto ret = CALL_ASCEND_API(aclrtSetCurrentContext, default_device_context_map_[device_id]);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Call aclrtSetCurrentContext, ret[" << ret << "]";
+  }
+  thread_local_rt_context = default_device_context_map_[device_id];
+}
+
+void AscendHalManager::InitializeAcl() {
+  // std::lock_guard<std::mutex> lock(acl_init_mutex_);
+  // if (acl_initialized_) {
+  //   return;
+  // }
+  // acl_initialized_ = true;
+  // std::string file_name = GenerateAclInitJsonPath();
+  // std::string json_str;
+  // auto realpath = Common::CreatePrefixPath(file_name);
+  // if (!realpath.has_value()) {
+  //   MS_LOG(WARNING) << "Failed to get real path: [" << file_name << "] in generate aclInit json file path.";
+  //   return;
+  // }
+  // if (!OpDebugConf::GetInstance()->GenerateAclInitJson(realpath.value(), &json_str)) {
+  //   MS_LOG(WARNING) << "Failed to generate aclinit json, the file path is " << realpath.value() << ".";
+  //   return;
+  // }
+  // aclError ret = CALL_ASCEND_API(aclInit, realpath.value().c_str());
+  // TempFileManager::GetInstance().RemoveFile(realpath.value());
+  // if (ret != ACL_SUCCESS) {
+  //   MS_LOG(WARNING) << "Call aclInit failed, the error number is " << ret << ", json is " << json_str;
+  // } else {
+  //   MS_LOG(INFO) << "Call aclInit successfully, json is " << json_str;
+  // }
+}
+
+bool AscendHalManager::EnableLccl() { return false; }
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/ascend/res_manager/ascend_hal_manager.h b/inferrt/src/hardware/ascend/res_manager/ascend_hal_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..c6722bac702c1bbffd6492b0b84de8f838c1e8fb
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/ascend_hal_manager.h
@@ -0,0 +1,72 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_ASCEND_HAL_MANAGER_ASCEND_HAL_MANAGER_H_
+#define MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_ASCEND_HAL_MANAGER_ASCEND_HAL_MANAGER_H_
+
+#include <map>
+#include <mutex>
+#include <set>
+#include "acl/acl_rt.h"
+#include "hardware/hardware_abstract/visible.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+class HARDWARE_EXPORT AscendHalManager {
+ public:
+  static AscendHalManager &GetInstance();
+
+  ~AscendHalManager() {}
+  // init
+
+  // device
+  uint32_t GetDeviceCount();
+  void InitDevice(uint32_t device_id);
+  void ResetDevice(uint32_t device_id);
+  void SetDeviceSatMode(const aclrtFloatOverflowMode &overflow_mode);
+  void SetOpWaitTimeout(uint32_t op_wait_timeout);
+  void SetOpExecuteTimeOut(uint32_t op_execute_timeout);
+  void InitializeAcl();
+  bool EnableLccl();
+
+  // context
+  aclrtContext CreateContext(uint32_t device_id);
+  // reset the default context of device_id
+  void ResetContext(uint32_t device_id);
+  void SetContext(uint32_t device_id);
+  void SetContextForce(uint32_t device_id);
+  void DestroyContext(aclrtContext context);
+  void DestroyAllContext();
+
+ private:
+  static AscendHalManager instance_;
+  std::set<uint32_t> initialized_device_set_{};
+  // default <device_id, aclrtcontext> pair
+  std::map<uint32_t, aclrtContext> default_device_context_map_;
+
+  // rt_contexts by aclrtCreateContext, to destroy
+  std::set<aclrtContext> rt_contexts_;
+
+  bool acl_initialized_ = false;
+  std::mutex acl_init_mutex_;
+};
+
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_ASCEND_HAL_MANAGER_ASCEND_HAL_MANAGER_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/ascend_res_manager.cc b/inferrt/src/hardware/ascend/res_manager/ascend_res_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..72ac05495a0d488054a939819a29f74994c0e535
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/ascend_res_manager.cc
@@ -0,0 +1,461 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/ascend/res_manager/ascend_res_manager.h"
+#ifndef _WIN32
+#include <dlfcn.h>
+#include <libgen.h>
+#endif
+#include <utility>
+#include <unordered_set>
+#include <vector>
+#include <algorithm>
+#include <numeric>
+#include <set>
+#include <mutex>
+#include "hardware/hardware_abstract/dlopen_macro.h"
+#include "hardware/ascend/res_manager/mem_manager/ascend_memory_manager.h"
+#include "hardware/ascend/res_manager/mem_manager/ascend_vmm_adapter.h"
+#include "hardware/ascend/res_manager/ascend_event.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_compiler_symbol.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h"
+#include "hardware/ascend/res_manager/symbol_interface/symbol_utils.h"
+#include "acl/acl_rt.h"
+#include "hardware/hardware_abstract/device_context.h"
+#include "hardware/hardware_abstract/device_context_manager.h"
+#include "hardware/ascend/res_manager/ascend_hal_manager.h"
+#include "common/common.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+namespace {
+constexpr uint32_t kDefaultHcclExecTimeout = 1800;
+
+using Callback = std::function<void(void)>;
+std::mutex set_opt_mutex;
+
+void AclrtLaunchCallback(void *user_data) {
+  Callback *callback_func = reinterpret_cast<Callback *>(user_data);
+  (*callback_func)();
+  delete callback_func;
+}
+}  // namespace
+
+void AscendResManager::Initialize() {
+  // use 0 temporarily.
+  device_id_ = 0;
+  if (initialized_) {
+    AscendHalManager::GetInstance().SetContextForce(device_id_);
+    return;
+  }
+  // init device
+  AscendHalManager::GetInstance().InitDevice(device_id_);
+  AscendStreamMng::GetInstance().CreateDefaultStream();
+  mem_manager_ = std::make_shared<EnhancedAscendMemoryManager>();
+  CHECK_IF_NULL(mem_manager_);
+  mem_manager_->Initialize();
+  initialized_ = true;
+}
+
+void AscendResManager::Destroy() {
+  if (!initialized_) {
+    AscendHalManager::GetInstance().SetContextForce(device_id_);
+    return;
+  }
+  // To avoid call aclrtProcessReport after process exit, we should to clear all callback threads first.
+  AscendStreamMng::GetInstance().Clear();
+
+  (void)DestroyAllEvents();
+
+  AscendStreamMng::GetInstance().DestroyAllRtEvents();
+  if (!AscendStreamMng::GetInstance().DestroyAllStreams()) {
+    LOG_ERROR << "Fail to destroy all streams when reset device.";
+  }
+  // Release memory.
+  if (mem_manager_ != nullptr) {
+    mem_manager_->Finalize();
+    mem_manager_ = nullptr;
+  }
+
+  // All unmap/free operations will fail after calling aclrtResetDevice in ResetDevice,
+  // so it must be called before that.
+  AscendVmmAdapter::GetInstance().ClearAllMemory();
+  AscendHalManager::GetInstance().ResetDevice(device_id_);
+
+  initialized_ = false;
+}
+
+bool AscendResManager::IsEnableVmm() const { return AscendVmmAdapter::GetInstance().IsEnabled(); }
+
+void *AscendResManager::AllocateMemory(size_t size, uint32_t stream_id) const {
+  AscendHalManager::GetInstance().SetContext(device_id_);
+  CHECK_IF_NULL(mem_manager_);
+  return mem_manager_->MallocMemFromMemPool(size, false, false, stream_id);
+}
+
+void *AscendResManager::AllocateStaticMemory(size_t size, uint32_t stream_id) const {
+  AscendHalManager::GetInstance().SetContext(device_id_);
+  return mem_manager_->MallocMemFromMemPool(size, true, false, stream_id);
+}
+
+size_t AscendResManager::GetMaxUsedMemorySize() const {
+  CHECK_IF_NULL(mem_manager_);
+  return mem_manager_->GetMaxUsedMemorySize();
+}
+
+void AscendResManager::FreeMemory(void *ptr) const {
+  CHECK_IF_NULL(ptr);
+  CHECK_IF_NULL(mem_manager_);
+  mem_manager_->FreeMemFromMemPool(ptr);
+}
+
+void AscendResManager::FreePartMemorys(const std::vector<void *> &free_addrs, const std::vector<void *> &keep_addrs,
+                                       const std::vector<size_t> &keep_addr_sizes) const {
+  AscendMemoryPool::GetInstance().FreePartTensorMems(free_addrs, keep_addrs, keep_addr_sizes);
+}
+
+void AscendResManager::DefragMemory() { AscendMemoryPool::GetInstance().DefragMemory(); }
+
+// Relevant function to manage memory statistics
+size_t AscendResManager::GetTotalMemStatistics() const {
+  CHECK_IF_NULL(mem_manager_);
+  return mem_manager_->GetTotalMemStatistics();
+}
+
+size_t AscendResManager::GetTotalUsedMemStatistics() const {
+  CHECK_IF_NULL(mem_manager_);
+  return mem_manager_->GetTotalUsedMemStatistics();
+}
+
+size_t AscendResManager::GetTotalIdleMemStatistics() const {
+  CHECK_IF_NULL(mem_manager_);
+  return mem_manager_->GetTotalIdleMemStatistics();
+}
+
+size_t AscendResManager::GetTotalEagerFreeMemStatistics() const {
+  CHECK_IF_NULL(mem_manager_);
+  return mem_manager_->GetTotalEagerFreeMemStatistics();
+}
+
+size_t AscendResManager::GetUsedMemPeakStatistics() const {
+  CHECK_IF_NULL(mem_manager_);
+  return mem_manager_->GetUsedMemPeakStatistics();
+}
+
+size_t AscendResManager::GetReservedMemPeakStatistics() const {
+  CHECK_IF_NULL(mem_manager_);
+  return mem_manager_->GetReservedMemPeakStatistics();
+}
+
+std::unordered_map<std::string, std::size_t> AscendResManager::GetBlockCountsStatistics() const {
+  CHECK_IF_NULL(mem_manager_);
+  return mem_manager_->GetBlockCountsStatistics();
+}
+
+std::unordered_map<std::string, std::size_t> AscendResManager::GetBlockUnitSizeStatistics() const {
+  CHECK_IF_NULL(mem_manager_);
+  return mem_manager_->GetBlockUnitSizeStatistics();
+}
+
+DeviceMemInfo AscendResManager::GetCommonMemBlocksInfoStatistics() const {
+  CHECK_IF_NULL(mem_manager_);
+  return mem_manager_->GetCommonMemBlocksInfoStatistics();
+}
+
+DeviceMemInfo AscendResManager::GetPersistentMemBlocksInfoStatistics() const {
+  CHECK_IF_NULL(mem_manager_);
+  return mem_manager_->GetPersistentMemBlocksInfoStatistics();
+}
+
+void AscendResManager::ResetMaxMemoryReserved() {
+  CHECK_IF_NULL(mem_manager_);
+  auto memory_pool = mem_manager_->GetMemoryPool();
+  CHECK_IF_NULL(memory_pool);
+  memory_pool->ResetMaxMemReserved();
+}
+
+void AscendResManager::ResetMaxMemoryAllocated() {
+  CHECK_IF_NULL(mem_manager_);
+  auto memory_pool = mem_manager_->GetMemoryPool();
+  CHECK_IF_NULL(memory_pool);
+  memory_pool->ResetMaxMemAllocated();
+}
+
+size_t AscendResManager::EmptyCache() {
+  CHECK_IF_NULL(mem_manager_);
+  auto memory_pool = mem_manager_->GetMemoryPool();
+  CHECK_IF_NULL(memory_pool);
+  return memory_pool->EmptyCache();
+}
+
+std::vector<void *> AscendResManager::AllocateContinuousMemory(const std::vector<size_t> &size_list,
+                                                               uint32_t stream_id) const {
+  AscendHalManager::GetInstance().SetContext(device_id_);
+
+  CHECK_IF_NULL(mem_manager_);
+  std::vector<size_t> aligned_size_list;
+  for (auto size : size_list) {
+    auto align_size = device::MemoryManager::GetCommonAlignSize(size);
+    aligned_size_list.emplace_back(align_size);
+  }
+  return mem_manager_->MallocContinuousMemFromMemPool(aligned_size_list, stream_id);
+}
+
+bool AscendResManager::BindDeviceToCurrentThread(bool force_bind) const {
+  static thread_local std::once_flag is_set;
+  std::call_once(is_set, [this]() {
+    auto ret = CALL_ASCEND_API(aclrtSetDevice, static_cast<int32_t>(device_id_));
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "Device " << device_id_ << " call aclrtSetDevice failed, ret:" << static_cast<int>(ret);
+    }
+  });
+
+  if (force_bind) {
+    AscendHalManager::GetInstance().SetContextForce(device_id_);
+  } else {
+    AscendHalManager::GetInstance().SetContext(device_id_);
+  }
+
+  return true;
+}
+
+bool AscendResManager::CreateStream(size_t *stream_id) const {
+  if (!BindDeviceToCurrentThread(false)) {
+    LOG_ERROR << "Bind context to current thread failed";
+    return false;
+  }
+  AscendStreamMng::GetInstance().CreateStream(stream_id);
+  return true;
+}
+
+bool AscendResManager::CreateStreamWithPriority(size_t *stream_id, int32_t priority) const {
+  if (!BindDeviceToCurrentThread(false)) {
+    LOG_ERROR << "Bind context to current thread failed";
+    return false;
+  }
+  AscendStreamMng::GetInstance().CreateStreamWithFlags(stream_id, ACL_STREAM_FAST_LAUNCH | ACL_STREAM_FAST_SYNC,
+                                                       static_cast<uint32_t>(priority));
+  return true;
+}
+
+bool AscendResManager::DestroyStream(size_t stream_id) const {
+  if (!BindDeviceToCurrentThread(false)) {
+    LOG_ERROR << "Bind context to current thread failed";
+    return false;
+  }
+  AscendStreamMng::GetInstance().DestroyStream(stream_id);
+  return true;
+}
+
+size_t AscendResManager::QueryStreamSize() const { return AscendStreamMng::GetInstance().QueryStreamSize(); }
+
+std::vector<uint32_t> AscendResManager::GetStreamIds() const { return AscendStreamMng::GetInstance().GetStreamIds(); }
+
+bool AscendResManager::single_op_multi_stream_enable() const {
+  return AscendStreamMng::GetInstance().single_op_multi_stream_enable();
+}
+
+void AscendResManager::set_single_op_multi_stream_enable(bool single_op_multi_stream_enable) {
+  return AscendStreamMng::GetInstance().set_single_op_multi_stream_enable(single_op_multi_stream_enable);
+}
+
+void *AscendResManager::GetStream(size_t stream_id) const {
+  if (!BindDeviceToCurrentThread(false)) {
+    LOG_ERROR << "Bind context to current thread failed";
+    return nullptr;
+  }
+  return AscendStreamMng::GetInstance().GetStream(stream_id);
+}
+
+void AscendResManager::SetCurrentStreamId(size_t stream_id) {
+  if (!BindDeviceToCurrentThread(false)) {
+    LOG_ERROR << "Bind context to current thread failed";
+    return;
+  }
+  AscendStreamMng::GetInstance().set_current_stream(stream_id);
+}
+
+size_t AscendResManager::GetCurrentStreamId() const {
+  if (!BindDeviceToCurrentThread(false)) {
+    LOG_ERROR << "Bind context to current thread failed";
+    return SIZE_MAX;
+  }
+  return AscendStreamMng::GetInstance().current_stream();
+}
+
+bool AscendResManager::QueryStream(size_t stream_id) const {
+  if (!BindDeviceToCurrentThread(false)) {
+    LOG_ERROR << "Bind context to current thread failed";
+    return false;
+  }
+  return AscendStreamMng::GetInstance().QueryStream(stream_id);
+}
+
+bool AscendResManager::SyncStream(size_t stream_id) const {
+  if (!BindDeviceToCurrentThread(false)) {
+    LOG_ERROR << "Bind context to current thread failed";
+    return false;
+  }
+  return AscendStreamMng::GetInstance().SyncStream(stream_id);
+}
+
+bool AscendResManager::SyncAllStreams(bool sync_device) const {
+  AscendHalManager::GetInstance().SetContext(device_id_);
+  return AscendStreamMng::GetInstance().SyncAllStreams(sync_device);
+}
+
+bool AscendResManager::SyncNotDefaultStreams() const {
+  if (!BindDeviceToCurrentThread(false)) {
+    LOG_ERROR << "Bind context to current thread failed";
+    return false;
+  }
+  return AscendStreamMng::GetInstance().SyncNotDefaultStreams();
+}
+
+size_t AscendResManager::DefaultStream() const {
+  if (!BindDeviceToCurrentThread(false)) {
+    LOG_ERROR << "Bind context to current thread failed";
+    return SIZE_MAX;
+  }
+  return AscendStreamMng::GetInstance().default_stream_id();
+}
+
+// ACL_EVENT_TIME_LINE: indicates that the number of created events is not limited, and the created events can be used
+//  to compute the elapsed time between events, which may cause lost some performance.
+// ACL_EVENT_SYNC: indicates that the number of created events is limited, and the created events can be used for
+//  synchronization between multiple streams.
+// ACL_EVENT_CAPTURE_STREAM_PROGRESS: indicates that the number of created events is not limited and high performance,
+//  and the created events can not be used for timing and synchronization.
+DeviceEventPtr AscendResManager::CreateRuntimeEvent(bool enable_blocking, bool enable_record_wait) {
+  if (!enable_blocking && !enable_record_wait) {
+    LOG_ERROR << "Bad parameters, enable_blocking is false and enable_record_wait is false.";
+  }
+
+  uint32_t flag = 0;
+  if (enable_blocking) {
+    flag |= ACL_EVENT_SYNC;
+  }
+  if (enable_record_wait) {
+    flag |= ACL_EVENT_CAPTURE_STREAM_PROGRESS;
+  }
+  return std::make_shared<AscendEvent>(flag);
+}
+
+DeviceEventPtr AscendResManager::CreateEventWithFlag(bool enable_timing, bool blocking, bool use_extensional_api) {
+  auto flag = enable_timing ? (ACL_EVENT_TIME_LINE | ACL_EVENT_SYNC) : ACL_EVENT_SYNC;
+  auto event = std::make_shared<AscendEvent>(flag, use_extensional_api);
+  CHECK_IF_NULL(event);
+  std::lock_guard<std::mutex> lock(device_events_mutex_);
+  device_events_.push_back(event);
+  return event;
+}
+
+bool AscendResManager::DestroyEvent(const DeviceEventPtr &event) {
+  CHECK_IF_NULL(event);
+  if (!event->DestroyEvent()) {
+    LOG_ERROR << "Destroy Event failed.";
+    return false;
+  }
+  std::lock_guard<std::mutex> lock(device_events_mutex_);
+  const auto &iter = std::find(device_events_.begin(), device_events_.end(), event);
+  if (iter == device_events_.end()) {
+    LOG_OUT << "Can't find specified device event.";
+    return false;
+  }
+  (void)device_events_.erase(iter);
+  return true;
+}
+
+bool AscendResManager::DestroyAllEvents() {
+  DeviceEventPtrList device_events_inner;
+  {
+    std::lock_guard<std::mutex> lock(device_events_mutex_);
+    device_events_inner = device_events_;
+    device_events_.clear();
+  }
+  (void)std::for_each(device_events_inner.begin(), device_events_inner.end(), [this](const auto &event) {
+    CHECK_IF_NULL(event);
+    if (!event->DestroyEvent()) {
+      LOG_ERROR << "Destroy Event failed.";
+    }
+  });
+  device_events_.clear();
+  return true;
+}
+
+void *AscendResManager::GetCopyDataStream() const {
+  auto copy_out_data_stream = AscendStreamMng::GetInstance().GetCopyOutStream();
+  if (copy_out_data_stream == nullptr) {
+    size_t copy_stream_id;
+    AscendStreamMng::GetInstance().CreateStream(&copy_stream_id);
+    LOG_OUT << "Create ascend copy data stream, stream id: " << copy_stream_id;
+    copy_out_data_stream = AscendStreamMng::GetInstance().GetStream(copy_stream_id);
+    AscendStreamMng::GetInstance().SetCopyOutStream(copy_out_data_stream);
+  }
+  return copy_out_data_stream;
+}
+
+bool AscendResManager::RecordEvent(int64_t task_id_on_stream, uint32_t user_stream_id,
+                                   const std::vector<std::pair<uint32_t, DeviceMemPtr>> &memory_stream_addresses,
+                                   const DeviceEventPtr &input_event) {
+  return mem_manager_->RecordEvent(task_id_on_stream, user_stream_id, memory_stream_addresses, input_event);
+}
+
+bool AscendResManager::WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id) {
+  return mem_manager_->WaitEvent(task_id_on_stream, user_stream_id, memory_stream_id);
+}
+
+bool AscendResManager::WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id) {
+  return mem_manager_->WaitEvent(task_id_on_stream, user_stream_id);
+}
+
+bool AscendResManager::SyncAllEvents() { return mem_manager_->SyncAllEvents(); }
+
+bool AscendResManager::LaunchCallback(std::function<void(void)> callback_func, size_t stream_id, bool is_block) const {
+  auto stream = AscendStreamMng::GetInstance().GetStream(stream_id);
+  if (stream == nullptr) {
+    stream = AscendStreamMng::GetInstance().default_stream();
+  }
+  CHECK_IF_NULL(stream);
+  auto block_type =
+    is_block ? aclrtCallbackBlockType::ACL_CALLBACK_BLOCK : aclrtCallbackBlockType::ACL_CALLBACK_NO_BLOCK;
+  auto callback_func_ptr = new Callback(callback_func);
+  aclError ret = CALL_ASCEND_API(aclrtLaunchCallback, AclrtLaunchCallback, callback_func_ptr, block_type, stream);
+  LOG_OUT << "Launch callback for stream_id : " << stream_id << ", ret : " << ret << ".";
+  if (ret) {
+    delete callback_func_ptr;
+    LOG_ERROR << "Launch callback for stream_id : " << stream_id << " failed, ret : " << ret << ".";
+    if (SyncStream(stream_id)) {
+      callback_func();
+      return true;
+    }
+
+    ResetStreamAndCtx();
+    return false;
+  }
+  return true;
+}
+
+void AscendResManager::ResetStreamAndCtx() const {
+  AscendStreamMng::GetInstance().DestroyAllStreams();
+  AscendHalManager::GetInstance().ResetContext(device_id_);
+  AscendStreamMng::GetInstance().CreateDefaultStream();
+}
+
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/ascend/res_manager/ascend_res_manager.h b/inferrt/src/hardware/ascend/res_manager/ascend_res_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..a75ad983972e905337140eed217318e868aa2be5
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/ascend_res_manager.h
@@ -0,0 +1,133 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_ASCEND_ASCEND_RES_MANAGER_H_
+#define MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_ASCEND_ASCEND_RES_MANAGER_H_
+
+#include <vector>
+#include <memory>
+#include <string>
+#include <map>
+#include <unordered_map>
+#include <utility>
+#include "acl/acl_rt.h"
+#include "hardware/ascend/res_manager/ascend_stream_manager.h"
+#include "hardware/hardware_abstract/device_event.h"
+#include "hardware/hardware_abstract/device_context.h"
+#include "hardware/hardware_abstract/visible.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+std::string GetCurrentDir();
+
+using DeviceMemInfo = std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>;
+class HARDWARE_EXPORT AscendResManager : public DeviceResManager {
+ public:
+  AscendResManager() = default;
+  ~AscendResManager() override = default;
+
+  void Initialize() override;
+
+  void Destroy() override;
+
+  std::shared_ptr<MemoryManager> mem_manager() const override { return mem_manager_; }
+
+  std::vector<void *> AllocateContinuousMemory(const std::vector<size_t> &size_list,
+                                               uint32_t stream_id = kDefaultStreamIndex) const override;
+  bool IsEnableVmm() const override;
+
+  bool BindDeviceToCurrentThread(bool force_bind) const override;
+  void *GetStream() const override { return AscendStreamMng::GetInstance().default_stream(); }
+  void *GetCopyDataStream() const;
+
+  void *AllocateStaticMemory(size_t size, uint32_t stream_id = kDefaultStreamIndex) const;
+  void *AllocateMemory(size_t size, uint32_t stream_id = kDefaultStreamIndex) const override;
+  void FreeMemory(void *ptr) const override;
+  void FreePartMemorys(const std::vector<void *> &free_addrs, const std::vector<void *> &keep_addrs,
+                       const std::vector<size_t> &keep_addr_sizes) const override;
+  void DefragMemory() override;
+
+  size_t GetMaxUsedMemorySize() const override;
+
+  // Relevant function to manage memory statistics
+  size_t GetTotalMemStatistics() const override;
+  size_t GetTotalUsedMemStatistics() const override;
+  size_t GetTotalIdleMemStatistics() const override;
+  size_t GetTotalEagerFreeMemStatistics() const override;
+  size_t GetUsedMemPeakStatistics() const override;
+  size_t GetReservedMemPeakStatistics() const override;
+  std::unordered_map<std::string, std::size_t> GetBlockCountsStatistics() const override;
+  std::unordered_map<std::string, std::size_t> GetBlockUnitSizeStatistics() const override;
+  std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>> GetCommonMemBlocksInfoStatistics()
+    const override;
+  std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>
+  GetPersistentMemBlocksInfoStatistics() const override;
+  void ResetMaxMemoryReserved() override;
+  void ResetMaxMemoryAllocated() override;
+
+  size_t EmptyCache() override;
+
+  bool CreateStream(size_t *stream_id) const override;
+  bool CreateStreamWithPriority(size_t *stream_id, int32_t priority) const override;
+  bool DestroyStream(size_t stream_id) const override;
+  size_t QueryStreamSize() const override;
+  std::vector<uint32_t> GetStreamIds() const override;
+  void *GetStream(size_t stream_id) const override;
+  void SetCurrentStreamId(size_t stream_id) override;
+  size_t GetCurrentStreamId() const override;
+  bool QueryStream(size_t stream_id) const override;
+  bool SyncStream(size_t stream_id = 0) const override;
+  bool SyncAllStreams(bool sync_device = true) const override;
+  bool SyncNotDefaultStreams() const override;
+  size_t DefaultStream() const override;
+
+  DeviceEventPtr CreateRuntimeEvent(bool enable_blocking, bool enable_record_wait) override;
+  DeviceEventPtr CreateEventWithFlag(bool enable_timing, bool blocking, bool use_extensional_api) override;
+  bool DestroyEvent(const DeviceEventPtr &event) override;
+  bool DestroyAllEvents() override;
+
+  bool single_op_multi_stream_enable() const override;
+  void set_single_op_multi_stream_enable(bool single_op_multi_stream_enable) override;
+  // Only used in graph_mode with MS_DISABLE_REF_MODE, delete it when delete MS_DISABLE_REF_MODEF
+  void SetCPUMemManager();
+
+  // Override interface for multi stream event control.
+  bool RecordEvent(int64_t task_id_on_stream, uint32_t user_stream_id,
+                   const std::vector<std::pair<uint32_t, DeviceMemPtr>> &memory_stream_addresses,
+                   const DeviceEventPtr &input_event) override;
+
+  bool WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id) override;
+
+  bool WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id) override;
+
+  bool SyncAllEvents() override;
+
+  bool LaunchCallback(std::function<void(void)> callback_func, size_t stream_id, bool is_block = false) const override;
+
+  void ResetStreamAndCtx() const override;
+
+ private:
+  bool initialized_ = false;
+  std::shared_ptr<MemoryManager> mem_manager_{nullptr};
+  DeviceEventPtrList device_events_{};
+  std::mutex device_events_mutex_;
+  uint32_t device_id_{0};
+  bool enable_memory_tracker_{false};
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_ASCEND_ASCEND_RES_MANAGER_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/ascend_stream_manager.cc b/inferrt/src/hardware/ascend/res_manager/ascend_stream_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ce6aeb19196527ab2dec390a35ee38c494a8cabe
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/ascend_stream_manager.cc
@@ -0,0 +1,408 @@
+/**
+ * Copyright 2022-2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/ascend/res_manager/ascend_stream_manager.h"
+
+#include <string>
+#include "common/common.h"
+#include "hardware/hardware_abstract/common.h"
+#include "acl/error_codes/rt_error_codes.h"
+#include "hardware/ascend/res_manager/mem_manager/ascend_gmem_adapter.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h"
+#include "hardware/ascend/res_manager/symbol_interface/symbol_utils.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+namespace {
+constexpr size_t kIndex0 = 0;
+}
+AscendStreamMng &AscendStreamMng::GetInstance() {
+  static AscendStreamMng instance{};
+  return instance;
+}
+
+void AscendStreamMng::DestroyAllRtEvents() {
+  for (size_t i = 0; i < events_.size(); ++i) {
+    if (events_[i] != nullptr) {
+      auto rt_ret = CALL_ASCEND_API(aclrtDestroyEvent, events_[i]);
+      if (rt_ret != ACL_SUCCESS) {
+        LOG_ERROR << "Call aclrtDestroyEvent failed, ret:" << rt_ret;
+      }
+    }
+  }
+  events_.clear();
+}
+
+void AscendStreamMng::DeleteEvent() {
+  if (cur_event_num_ == 0) {
+    LOG_OUT << "total event num is 0, no event to delete";
+  } else {
+    --cur_event_num_;
+  }
+}
+
+void AscendStreamMng::DeleteStream() {
+  if (cur_stream_num_ == 0) {
+    LOG_OUT << " total stream num is 0, no stream to delete";
+  } else {
+    --cur_stream_num_;
+  }
+}
+
+uint32_t AscendStreamMng::GetCurAllocStreamId() const {
+  if (cur_stream_num_ == 0) {
+    LOG_ERROR << "stream nums is 0, no stream id should be get";
+  }
+  return cur_stream_num_ - 1;
+}
+
+void AscendStreamMng::CreateStream(aclrtStream *stream, int32_t priority) {
+  std::lock_guard<std::mutex> lock_streams(stream_mutex_);
+  auto ret = CALL_ASCEND_API(aclrtCreateStreamWithConfig, stream, IntToUint(priority),
+                             (ACL_STREAM_FAST_LAUNCH | ACL_STREAM_FAST_SYNC));
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Create stream failed, ret:" << ret;
+  }
+  ret = CALL_ASCEND_API(aclrtSetStreamFailureMode, *stream, ACL_STOP_ON_FAILURE);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtSetStreamFailureMode failed, ret:" << ret;
+  }
+  (void)streams_.emplace_back(*stream);
+}
+
+void AscendStreamMng::CreateStream(size_t *stream_id, int32_t priority) {
+  std::lock_guard<std::mutex> lock_streams(stream_mutex_);
+  aclrtStream stream;
+  auto ret = CALL_ASCEND_API(aclrtCreateStreamWithConfig, &stream, IntToUint(priority),
+                             (ACL_STREAM_FAST_LAUNCH | ACL_STREAM_FAST_SYNC));
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Create stream failed, ret:" << ret;
+  }
+  ret = CALL_ASCEND_API(aclrtSetStreamFailureMode, stream, ACL_STOP_ON_FAILURE);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtSetStreamFailureMode failed, ret:" << ret;
+  }
+  *stream_id = streams_.size();
+  (void)streams_.emplace_back(stream);
+}
+
+void AscendStreamMng::CreateStreamWithFlags(aclrtStream *stream, uint32_t flags, int32_t priority) {
+  std::lock_guard<std::mutex> lock_streams(stream_mutex_);
+  auto ret = CALL_ASCEND_API(aclrtCreateStreamWithConfig, stream, IntToUint(priority), flags);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Create stream failed, ret:" << ret;
+  }
+  ret = CALL_ASCEND_API(aclrtSetStreamFailureMode, *stream, ACL_STOP_ON_FAILURE);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtSetStreamFailureMode failed, ret:" << ret;
+  }
+  (void)streams_.emplace_back(*stream);
+}
+
+void AscendStreamMng::CreateStreamWithFlags(size_t *stream_id, uint32_t flags, int32_t priority) {
+  std::lock_guard<std::mutex> lock_streams(stream_mutex_);
+  aclrtStream stream;
+  auto ret = CALL_ASCEND_API(aclrtCreateStreamWithConfig, &stream, IntToUint(priority), flags);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Create stream failed, ret:" << ret;
+  }
+  ret = CALL_ASCEND_API(aclrtSetStreamFailureMode, stream, ACL_STOP_ON_FAILURE);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtSetStreamFailureMode failed, ret:" << ret;
+  }
+  *stream_id = streams_.size();
+  (void)streams_.emplace_back(stream);
+}
+
+aclrtEvent AscendStreamMng::ApplyRtEvent() {
+  aclrtEvent rt_event = nullptr;
+  // Use ex api of event, so that no limits on event total size.
+  uint32_t flag = ACL_EVENT_SYNC;
+  auto ret = CALL_ASCEND_API(aclrtCreateEventExWithFlag, &rt_event, flag);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "aclrtCreateEventExWithFlag failed, ret : " << ret << ".";
+  }
+  (void)events_.emplace_back(rt_event);
+  return rt_event;
+}
+
+bool AscendStreamMng::DestroyStream(size_t stream_id) {
+  std::lock_guard<std::mutex> lock_streams(stream_mutex_);
+  if (stream_id >= streams_.size()) {
+    LOG_ERROR << "Ascend stream not found for stream id " << stream_id;
+    return false;
+  }
+  if (streams_.at(stream_id) == nullptr) {
+    LOG_OUT << "Ascend stream hsa been destroyed for stream id " << stream_id;
+    return true;
+  }
+  const auto ret = CALL_ASCEND_API(aclrtDestroyStream, streams_.at(stream_id));
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Call aclrtDestroyStream, ret[" << ret << "]";
+  }
+  streams_[stream_id] = nullptr;
+  if (communication_stream_id_ == stream_id) {
+    communication_stream_ = nullptr;
+  }
+  if (default_stream_id_ == stream_id) {
+    default_stream_ = nullptr;
+  }
+
+  return true;
+}
+
+bool AscendStreamMng::ForceDestroyAllStreams() {
+  std::lock_guard<std::mutex> lock_streams(stream_mutex_);
+  for (const auto &stream : streams_) {
+    if (stream == nullptr) {
+      continue;
+    }
+    const auto ret = CALL_ASCEND_API(aclrtDestroyStreamForce, stream);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "Call aclrtDestroyStream, ret[" << ret << "]";
+    }
+  }
+  streams_.clear();
+  default_stream_ = nullptr;
+  communication_stream_ = nullptr;
+  return true;
+}
+
+bool AscendStreamMng::DestroyAllStreams() {
+  std::lock_guard<std::mutex> lock_streams(stream_mutex_);
+  for (const auto &stream : streams_) {
+    if (stream == nullptr) {
+      continue;
+    }
+    const auto ret = CALL_ASCEND_API(aclrtDestroyStream, stream);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "Call aclrtDestroyStream, ret[" << ret << "]";
+    }
+  }
+  streams_.clear();
+  default_stream_ = nullptr;
+  communication_stream_ = nullptr;
+  return true;
+}
+
+aclrtStream AscendStreamMng::GetStream(size_t stream_id) const {
+  if (stream_id >= streams_.size()) {
+    LOG_OUT << "Stream for stream id[" << stream_id << "] not found, return nullptr.";
+    return nullptr;
+  }
+  return streams_[stream_id];
+}
+
+bool AscendStreamMng::SyncStream(size_t stream_id) const {
+  if (stream_id >= streams_.size()) {
+    LOG_ERROR << "Stream for stream id[" << stream_id << "] has not been created.";
+  }
+  const auto stream = streams_[stream_id];
+  if (stream == nullptr) {
+    LOG_OUT << "Stream for stream id[" << stream_id << "] has been destroyed.";
+    return false;
+  }
+  return SyncStream(stream);
+}
+
+bool AscendStreamMng::SyncStream(aclrtStream stream) const {
+  CHECK_IF_NULL(stream);
+  LOG_OUT << "Sync stream: " << stream;
+  auto RET = ACL_SUCCESS;
+  try {
+    GilReleaseWithCheck gil_release;
+    RET = CALL_ASCEND_API(aclrtSynchronizeStreamWithTimeout, stream, -1);
+    if (RET != ACL_SUCCESS && RET != ACL_ERROR_RT_AICORE_OVER_FLOW) {  // o for switch stream
+      LOG_ERROR << "Call runtime aclrtSynchronizeStreamWithTimeout error."
+                << "Please do the following three things to confirm whether it is caused by the "
+                << "execution failure of a certain operator.\n"
+                << "    1.Set mindspore.runtime.launch_blocking() at the beginning of your python script.\n"
+                << "    2.Run again your python script.\n"
+                << "    3.Grep 'Sync run failed' in your logs, it always stays at the end of your logs.\n"
+                << "Now you will get the certain failed op detailed infos.";
+      return false;
+    }
+  } catch (const std::exception &e) {
+    LOG_ERROR << "Sync stream failed. " << e.what()
+              << "Please do the following three things to confirm whether it is caused by the "
+              << "execution failure of a certain operator.\n"
+              << "    1.Set mindspore.runtime.launch_blocking() at the beginning of your python script.\n"
+              << "    2.Run again your python script.\n"
+              << "    3.Grep 'Sync run failed' in your logs, it always stays at the end of your logs.\n"
+              << "Now you will get the certain failed op detailed infos.";
+    return false;
+  }
+  if (RET == ACL_ERROR_RT_AICORE_OVER_FLOW) {
+    LOG_OUT << "Call runtime aclrtSynchronizeStreamWithTimeout, the stream get overflow.";
+  }
+  return true;
+}
+
+bool AscendStreamMng::SyncAllStreams(bool sync_device) const {
+  auto RET = ACL_ERROR_NONE;
+  try {
+    GilReleaseWithCheck gil_release;
+    if (sync_device) {
+      // According to CANN, we need to set timeout to 2 hours for aclrtSynchronizeDeviceWithTimeout.
+      int timeout = 7200000;
+      RET = CALL_ASCEND_API(aclrtSynchronizeDeviceWithTimeout, timeout);
+      if (RET != ACL_ERROR_NONE && RET != ACL_ERROR_RT_AICORE_OVER_FLOW) {
+        LOG_ERROR << "Call runtime aclrtSynchronizeDeviceWithTimeout error."
+                  << "Please do the following three things to confirm whether it is caused by the "
+                  << "execution failure of a certain operator.\n"
+                  << "    1.Set mindspore.runtime.launch_blocking() at the beginning of your python script.\n"
+                  << "    2.Run again your python script.\n"
+                  << "    3.Grep 'Sync run failed' in your logs, it always stays at the end of your logs.\n"
+                  << "Now you will get the certain failed op detailed infos.";
+        return false;
+      }
+    } else {
+      for (size_t i = 0; i < streams_.size(); i++) {
+        const auto stream = streams_[i];
+        if (stream != nullptr && !SyncStream(stream)) {
+          LOG_ERROR << "SyncStream for stream id " << i << " failed.";
+          return false;
+        }
+      }
+    }
+  } catch (const std::exception &e) {
+    std::string sync_method = sync_device ? "aclrtSynchronizeDeviceWithTimeout" : "aclrtSynchronizeStreamWithTimeout";
+    LOG_ERROR << sync_method << " failed. " << e.what()
+              << "Please do the following three things to confirm whether it is caused by the "
+              << "execution failure of a certain operator.\n"
+              << "    1.Set mindspore.runtime.launch_blocking() at the beginning of your python script.\n"
+              << "    2.Run again your python script.\n"
+              << "    3.Grep 'Sync run failed' in your logs, it always stays at the end of your logs.\n"
+              << "Now you will get the certain failed op detailed infos.";
+    return false;
+  }
+  if (RET == ACL_ERROR_RT_AICORE_OVER_FLOW) {
+    std::string sync_method = sync_device ? "aclrtSynchronizeDeviceWithTimeout" : "aclrtSynchronizeStreamWithTimeout";
+    LOG_OUT << "Call runtime " << sync_method << ", the stream get overflow."
+            << "Please do the following three things to confirm whether it is caused by the "
+            << "execution failure of a certain operator.\n"
+            << "    1.Set mindspore.runtime.launch_blocking() at the beginning of your python script.\n"
+            << "    2.Run again your python script.\n"
+            << "    3.Grep 'Sync run failed' in your logs, it always stays at the end of your logs.\n"
+            << "Now you will get the certain failed op detailed infos.";
+  }
+  return true;
+}
+
+bool AscendStreamMng::SyncNotDefaultStreams() const {
+  bool res = true;
+  for (size_t i = 0; i < streams_.size(); i++) {
+    if (i != default_stream_id_ && !SyncStream(i)) {
+      LOG_ERROR << "Failed to sync for ascend stream id: " << i;
+      res = false;
+    }
+  }
+  return res;
+}
+
+bool AscendStreamMng::SyncExceptStreamsInList(const std::set<aclrtStream> &except_streams) const {
+  bool res = true;
+  for (size_t i = 0; i < streams_.size(); i++) {
+    if (except_streams.count(streams_[i]) > 0) {
+      LOG_OUT << "Stream id:" << i << " is been synchronized.";
+      continue;
+    }
+    if (!SyncStream(i)) {
+      LOG_ERROR << "Failed to sync for ascend stream id: " << i;
+      res = false;
+    }
+  }
+  return res;
+}
+
+size_t AscendStreamMng::QueryStreamSize() const { return streams_.size(); }
+
+bool AscendStreamMng::QueryStream(size_t stream_id) {
+  if (stream_id >= streams_.size()) {
+    LOG_ERROR << "Stream for stream id[" << stream_id << "] has not been created.";
+  }
+  const auto stream = streams_[stream_id];
+  if (stream == nullptr) {
+    LOG_OUT << "Stream for stream id[" << stream_id << "] has been destroyed.";
+    return false;
+  }
+
+  aclrtStreamStatus status;
+  auto ret = CALL_ASCEND_API(aclrtStreamQuery, stream, &status);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Failed to query completion status for stream id: " << stream_id;
+  }
+  return status == ACL_STREAM_STATUS_COMPLETE;
+}
+
+size_t AscendStreamMng::GetStreamId(void *stream_ptr) {
+  auto iter = std::find(streams_.begin(), streams_.end(), stream_ptr);
+  if (iter == streams_.end()) {
+    LOG_ERROR << "Failed to find stream_ptr in streams_, stream_ptr:" << stream_ptr;
+  }
+
+  return LongToSize(std::distance(streams_.begin(), iter));
+}
+
+std::vector<uint32_t> AscendStreamMng::GetStreamIds() const {
+  std::vector<uint32_t> stream_ids;
+  for (size_t i = 0; i < streams_.size(); i++) {
+    if (streams_[i] != nullptr) {
+      (void)stream_ids.emplace_back(static_cast<uint32_t>(i));
+    }
+  }
+  return stream_ids;
+}
+
+void AscendStreamMng::CreateDefaultStream() {
+  if (default_stream_ == nullptr) {
+    CreateStream(&default_stream_id_);
+    LOG_OUT << "Create ascend default stream, stream id: " << default_stream_id_;
+    default_stream_ = GetStream(default_stream_id_);
+    CHECK_IF_NULL(default_stream_);
+  } else {
+    LOG_OUT << "The default compute stream is already created, skip.";
+  }
+
+  if (communication_stream_ == nullptr) {
+    CreateStream(&communication_stream_id_);
+    LOG_OUT << "Create ascend communication stream, stream id: " << communication_stream_id_;
+    communication_stream_ = GetStream(communication_stream_id_);
+    CHECK_IF_NULL(communication_stream_);
+  } else {
+    LOG_OUT << "The default communication stream is already created, skip.";
+  }
+}
+
+size_t AscendStreamMng::default_stream_id() const {
+  if (default_stream_ == nullptr) {
+    LOG_ERROR << "The default stream is not created";
+  }
+  return default_stream_id_;
+}
+size_t AscendStreamMng::communication_stream_id() const {
+  if (communication_stream_ == nullptr) {
+    LOG_ERROR << "The communication stream is not created";
+  }
+  return communication_stream_id_;
+}
+aclrtStream AscendStreamMng::default_stream() const { return default_stream_; }
+aclrtStream AscendStreamMng::communication_stream() const { return communication_stream_; }
+
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/ascend/res_manager/ascend_stream_manager.h b/inferrt/src/hardware/ascend/res_manager/ascend_stream_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..b738b3284b9055b427def13135ad30aab703ed1c
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/ascend_stream_manager.h
@@ -0,0 +1,149 @@
+/**
+ * Copyright 2021-2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_ASCEND_STREAM_MANAGER_ASCEND_STREAM_MANAGER_H_
+#define MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_ASCEND_STREAM_MANAGER_ASCEND_STREAM_MANAGER_H_
+
+#include <memory>
+#include <vector>
+#include <set>
+#include <mutex>
+
+#include "acl/acl_rt.h"
+#include "hardware/hardware_abstract/visible.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+class HARDWARE_EXPORT AscendStreamMng {
+ public:
+  static AscendStreamMng &GetInstance();
+
+  ~AscendStreamMng() = default;
+  void Clear() {
+  }
+
+  void ResetResource() {
+    cur_stream_num_ = 0;
+    cur_event_num_ = 0;
+  }
+
+  uint32_t ApplyNewStream() { return cur_stream_num_++; }
+
+  uint32_t ApplyNewEvent() { return cur_event_num_++; }
+
+  aclrtEvent ApplyRtEvent();
+  aclrtEvent ApplyRtEventWithFlag(uint32_t flag);
+  uint32_t GetRtEventId(const aclrtEvent &event) const;
+  void DestroyAllRtEvents();
+
+  void DeleteEvent();
+
+  void DeleteStream();
+
+  uint32_t GetCurAllocStreamId() const;
+
+  uint32_t cur_stream_num() const { return cur_stream_num_; }
+
+  uint32_t cur_event_num() const { return cur_event_num_; }
+
+  void CreateStream(aclrtStream *stream, int32_t priority = 0);
+  void CreateStream(size_t *stream_id, int32_t priority = 0);
+  void RegCallback(aclrtStream stream);
+  void UnRegCallback(aclrtStream stream, bool delete_item = true);
+  void CreateStreamWithFlags(aclrtStream *stream, uint32_t flags, int32_t priority = 0);
+  void CreateStreamWithFlags(size_t *stream_id, uint32_t flags, int32_t priority = 0);
+  bool DestroyStream(size_t stream_id);
+  bool DestroyAllStreams();
+  bool ForceDestroyAllStreams();
+  aclrtStream GetStream(size_t stream_id) const;
+  bool SyncStream(size_t stream_id) const;
+  bool SyncStream(aclrtStream stream) const;
+  // 'sync_device' means whether calling 'aclrtSynchronizeDeviceWithTimeout' or 'aclrtSynchronizeStreamWithTimeout'.
+  bool SyncAllStreams(bool sync_device = true) const;
+  bool SyncNotDefaultStreams() const;
+  // Sync all streams except the streams in except_streams.
+  bool SyncExceptStreamsInList(const std::set<aclrtStream> &except_streams) const;
+  size_t QueryStreamSize() const;
+  bool QueryStream(size_t stream_id);
+  size_t GetStreamId(void *stream_ptr);
+  std::vector<uint32_t> GetStreamIds() const;
+  void SetBusyStreamNum(uint32_t stream_num) { busy_stream_num_ = stream_num; }
+  uint32_t GetBusyStreamNum() const { return busy_stream_num_; }
+  void SetCopyInStream(aclrtStream stream) { copy_in_stream_ = stream; }
+  void SetCopyOutStream(aclrtStream stream) { copy_out_stream_ = stream; }
+  void SetForwardSendStream(aclrtStream stream) { forward_send_stream_ = stream; }
+  void SetBackwardSendStream(aclrtStream stream) { backward_send_stream_ = stream; }
+  void SetForwardRecvStream(aclrtStream stream) { forward_recv_stream_ = stream; }
+  void SetBackwardRecvStream(aclrtStream stream) { backward_recv_stream_ = stream; }
+  aclrtStream GetCopyInStream() const { return copy_in_stream_; }
+  aclrtStream GetCopyOutStream() const { return copy_out_stream_; }
+  aclrtStream GetForwardSendStream() const { return forward_send_stream_; }
+  aclrtStream GetBackwardSendStream() const { return backward_send_stream_; }
+  aclrtStream GetForwardRecvStream() const { return forward_recv_stream_; }
+  aclrtStream GetBackwardRecvStream() const { return backward_recv_stream_; }
+
+  void set_current_stream(size_t stream_id) { current_stream_id_ = stream_id; }
+  size_t current_stream() const { return current_stream_id_; }
+
+  void CreateDefaultStream();
+  size_t default_stream_id() const;
+  size_t communication_stream_id() const;
+  aclrtStream default_stream() const;
+  aclrtStream communication_stream() const;
+
+  bool single_op_multi_stream_enable() const { return single_op_multi_stream_enable_; }
+  void set_single_op_multi_stream_enable(bool single_op_multi_stream_enable) {
+    single_op_multi_stream_enable_ = single_op_multi_stream_enable;
+  }
+
+ private:
+  // Count streams and events number in task sink scenario
+  uint32_t cur_stream_num_{0};
+  uint32_t cur_event_num_{0};
+
+  // The max stream num on device ar a time
+  uint32_t busy_stream_num_{0};
+
+  // Ensure the thread safety for creating and destroying stream.
+  std::mutex stream_mutex_;
+  aclrtStream copy_in_stream_{nullptr};
+  aclrtStream copy_out_stream_{nullptr};
+  aclrtStream forward_send_stream_{nullptr};
+  aclrtStream backward_send_stream_{nullptr};
+  aclrtStream forward_recv_stream_{nullptr};
+  aclrtStream backward_recv_stream_{nullptr};
+
+  // all gpu CUDA streams including default_stream_.
+  std::vector<void *> streams_;
+  std::vector<aclrtEvent> events_{};
+
+  // Currently using stream id.
+  size_t current_stream_id_{0};
+
+  // Default stream. We consider the first stream created as default stream.
+  aclrtStream default_stream_{nullptr};
+  size_t default_stream_id_{0};
+  aclrtStream communication_stream_{nullptr};
+  size_t communication_stream_id_{0};
+
+  bool single_op_multi_stream_enable_{false};
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_ASCEND_STREAM_MANAGER_ASCEND_STREAM_MANAGER_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/CMakeLists.txt b/inferrt/src/hardware/ascend/res_manager/mem_manager/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c2ef17c933d5b2fa295cc86126ba79dce143d0c
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/CMakeLists.txt
@@ -0,0 +1,6 @@
+if(ENABLE_D OR ENABLE_ACL)
+  file(GLOB _ASCEND_MEM_MANAGER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+  set_property(SOURCE ${_ASCEND_MEM_MANAGER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
+    SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
+  add_library(_mindspore_ascend_mem_manager_obj OBJECT ${_ASCEND_MEM_MANAGER_SRC_LIST})
+endif()
\ No newline at end of file
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/abstract_ascend_memory_pool_support.cc b/inferrt/src/hardware/ascend/res_manager/mem_manager/abstract_ascend_memory_pool_support.cc
new file mode 100644
index 0000000000000000000000000000000000000000..978b2569e1085f62db73a2176f7074c1b2a6af68
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/abstract_ascend_memory_pool_support.cc
@@ -0,0 +1,189 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/ascend/res_manager/mem_manager/abstract_ascend_memory_pool_support.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "hardware/ascend/res_manager/mem_manager/ascend_memory_adapter.h"
+#include "hardware/ascend/res_manager/mem_manager/ascend_gmem_adapter.h"
+#include "hardware/ascend/res_manager/mem_manager/ascend_vmm_adapter.h"
+#include "hardware/ascend/res_manager/ascend_stream_manager.h"
+#include "common/common.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h"
+#include "hardware/ascend/res_manager/symbol_interface/symbol_utils.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+// The minimum unit size (8MB) of memory block used for dynamic extend in graph run mode.
+static const size_t ASCEND_COMMON_POOL_ALLOC_UNIT_SIZE_FOR_GRAPH_RUN_MODE = 8 << 20;
+constexpr char kGlobalOverflowWorkspace[] = "GLOBAL_OVERFLOW_WORKSPACE";
+
+void AbstractAscendMemoryPoolSupport::SetMemPoolBlockSize(size_t available_device_mem_size) {
+  // set by default configuration
+  SetMemAllocUintSize(kDynamicMemAllocUnitSize, kDynamicMemAllocUnitSize);
+}
+
+namespace {
+bool NoAdditionalMemory() {
+  // use default temporarily.
+  return true;
+}
+}  // namespace
+
+size_t AbstractAscendMemoryPoolSupport::CalMemBlockAllocSize(size_t size, bool from_persistent_mem, bool need_recycle) {
+  auto device_free_mem_size = free_mem_size();
+  if (device_free_mem_size < size) {
+    LOG_OUT << "The device memory is not enough, the free memory size is " << device_free_mem_size
+            << ", but the alloc size is " << size;
+    LOG_OUT << "The dynamic memory pool total size is " << TotalMemStatistics() / kMBToByte << "M, total used size is "
+            << TotalUsedMemStatistics() / kMBToByte << "M, used peak size is " << UsedMemPeakStatistics() / kMBToByte
+            << "M.";
+    LOG_OUT << "Memory Statistics:" << AscendMemAdapter::GetInstance()->DevMemStatistics();
+    return 0;
+  }
+
+  size_t alloc_mem_size;
+  SetMemPoolBlockSize(device_free_mem_size);
+  auto alloc_mem_unit_size = MemAllocUnitSize(from_persistent_mem);
+  if (need_recycle) {
+    alloc_mem_unit_size = kDynamicMemAllocUnitSize;
+  }
+  LOG_OUT << "Get unit block size " << alloc_mem_unit_size;
+  alloc_mem_size = alloc_mem_unit_size;
+
+  const bool is_graph_run_mode = true;
+  if (is_graph_run_mode) {
+    // Growing at adding alloc unit size
+    while (alloc_mem_size < size) {
+      alloc_mem_size = alloc_mem_size + alloc_mem_unit_size;
+    }
+  } else {
+    // Growing at twice of alloc unit size
+    constexpr size_t kDouble = 2;
+    while (alloc_mem_size < size) {
+      alloc_mem_size = alloc_mem_size * kDouble;
+    }
+  }
+
+  alloc_mem_size = std::min(alloc_mem_size, device_free_mem_size);
+  if (NoAdditionalMemory() && !need_recycle) {
+    alloc_mem_size = std::min(alloc_mem_size, size);
+  }
+  return alloc_mem_size;
+}
+
+size_t AbstractAscendMemoryPoolSupport::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
+  LOG_OUT << "Malloc Memory for Pool, size: " << size;
+  if (size == 0) {
+    LOG_ERROR << "Failed to alloc memory pool resource, the size is zero!";
+  }
+  *addr = AscendMemAdapter::GetInstance()->MallocStaticDevMem(size);
+  if (*addr == nullptr) {
+    LOG_ERROR << "Alloc device memory pool address is nullptr, failed to alloc memory pool resource!";
+  }
+  return size;
+}
+
+size_t AbstractAscendMemoryPoolSupport::GetMaxUsedMemSize() const {
+  void *min_used_addr = GetMinUsingMemoryAddr();
+  if (min_used_addr == nullptr) {
+    return 0;
+  }
+  return AscendMemAdapter::GetInstance()->GetDynamicMemUpperBound(min_used_addr);
+}
+
+size_t AbstractAscendMemoryPoolSupport::GetVmmUsedMemSize() const {
+  if (IsEnableVmm()) {
+    return AscendVmmAdapter::GetInstance().GetAllocatedSize();
+  }
+  return 0;
+}
+
+const bool AbstractAscendMemoryPoolSupport::IsEnableEagerFree() const {
+  return AscendGmemAdapter::GetInstance().is_eager_free_enabled();
+}
+
+const bool AbstractAscendMemoryPoolSupport::SyncAllStreams() { return AscendStreamMng::GetInstance().SyncAllStreams(); }
+
+size_t AbstractAscendMemoryPoolSupport::AllocDeviceMemByEagerFree(size_t size, DeviceMemPtr *addr) {
+  if (IsEnableVmm()) {
+    return AscendVmmAdapter::GetInstance().AllocDeviceMem(size, addr);
+  } else if (IsEnableEagerFree()) {
+    return AscendGmemAdapter::GetInstance().AllocDeviceMem(size, addr);
+  } else {
+    LOG_ERROR << "Eager free and VMM are both disabled.";
+    return 0;
+  }
+}
+
+size_t AbstractAscendMemoryPoolSupport::FreeDeviceMemByEagerFree(const DeviceMemPtr addr, const size_t size) {
+  if (IsEnableVmm()) {
+    return AscendVmmAdapter::GetInstance().EagerFreeDeviceMem(addr, size);
+  } else if (IsEnableEagerFree()) {
+    return AscendGmemAdapter::GetInstance().EagerFreeDeviceMem(addr, size);
+  } else {
+    LOG_ERROR << "Eager free and VMM are both disabled.";
+    return 0;
+  }
+}
+
+size_t AbstractAscendMemoryPoolSupport::EmptyCache() { return AscendVmmAdapter::GetInstance().EmptyCache(); }
+
+size_t AbstractAscendMemoryPoolSupport::MmapDeviceMem(const size_t size, const DeviceMemPtr addr) {
+  if (IsEnableVmm()) {
+    return AscendVmmAdapter::GetInstance().MmapDeviceMem(size, addr, total_mem_size());
+  } else if (IsEnableEagerFree()) {
+    auto ret = AscendGmemAdapter::GetInstance().MmapMemory(size, addr);
+    if (ret == nullptr) {
+      LOG_ERROR << "Mmap memory failed.";
+    }
+    return size;
+  }
+  LOG_ERROR << "Eager free and VMM are both disabled.";
+  return 0;
+}
+
+bool AbstractAscendMemoryPoolSupport::FreeDeviceMem(const DeviceMemPtr &addr) {
+  CHECK_IF_NULL(addr);
+  int64_t max_actual = ActualPeakStatistics();
+  LOG_OUT << "Max actual used memory size is " << max_actual;
+  AscendMemAdapter::GetInstance()->UpdateActualPeakMemory(max_actual);
+  int64_t max_peak = UsedMemPeakStatistics();
+  LOG_OUT << "Max peak used memory size is " << max_peak;
+  AscendMemAdapter::GetInstance()->UpdateUsedPeakMemory(max_peak);
+  // disable ge kernel use two pointer mem adapter, not support free.
+  // if (!IsEnableVmm() && !IsEnableEagerFree() && !IsDisableGeKernel()) {
+  //   return AscendMemAdapter::GetInstance()->FreeStaticDevMem(addr);
+  // }
+  return true;
+}
+
+void AbstractAscendMemoryPoolSupport::ResetIdleMemBuf() const {
+  // Warning : This method is not in used currently, removed in next release.
+}
+
+size_t AbstractAscendMemoryPoolSupport::free_mem_size() { return AscendMemAdapter::GetInstance()->FreeDevMemSize(); }
+
+uint64_t AbstractAscendMemoryPoolSupport::total_mem_size() const {
+
+  return AscendMemAdapter::GetInstance()->MaxHbmSizeForMs();
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/abstract_ascend_memory_pool_support.h b/inferrt/src/hardware/ascend/res_manager/mem_manager/abstract_ascend_memory_pool_support.h
new file mode 100644
index 0000000000000000000000000000000000000000..f5183ef4f5cffa870f3c9d27a62e657611f44816
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/abstract_ascend_memory_pool_support.h
@@ -0,0 +1,71 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ABSTRACT_ASCEND_ASCEND_MEMORY_POOL_SUPPORT_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ABSTRACT_ASCEND_ASCEND_MEMORY_POOL_SUPPORT_H_
+
+#include <memory>
+
+#include "hardware/hardware_abstract/memory/dynamic_mem_pool.h"
+#include "hardware/hardware_abstract/visible.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+// Definition for abstract ascend memory pool support class, wrap device interface of ascend.
+class HARDWARE_EXPORT AbstractAscendMemoryPoolSupport : virtual public DynamicMemPool {
+ public:
+  ~AbstractAscendMemoryPoolSupport() override = default;
+
+  size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
+
+  bool FreeDeviceMem(const DeviceMemPtr &addr) override;
+
+  size_t MmapDeviceMem(const size_t size, const DeviceMemPtr addr) override;
+
+  size_t GetMaxUsedMemSize() const override;
+
+  size_t GetVmmUsedMemSize() const override;
+
+  size_t free_mem_size() override;
+
+  uint64_t total_mem_size() const override;
+
+  // Set mem pool block size
+  void SetMemPoolBlockSize(size_t available_device_mem_size) override;
+
+  virtual void ResetIdleMemBuf() const;
+
+  // Calculate memory block required alloc size when adding the memory block.
+  size_t CalMemBlockAllocSize(size_t size, bool from_persistent_mem, bool need_recycle) override;
+
+  // The related interface of device memory eager free.
+  const bool IsEnableEagerFree() const override;
+
+  const bool SyncAllStreams() override;
+
+  size_t AllocDeviceMemByEagerFree(size_t size, DeviceMemPtr *addr) override;
+
+  size_t FreeDeviceMemByEagerFree(const DeviceMemPtr addr, const size_t size) override;
+
+  size_t EmptyCache() override;
+};
+using AbstractAscendMemoryPoolSupportPtr = std::shared_ptr<AbstractAscendMemoryPoolSupport>;
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+
+#endif  // #define MINDSPORE_CCSRC_RUNTIME_DEVICE_ABSTRACT_ASCEND_ASCEND_MEMORY_POOL_SUPPORT_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_dynamic_mem_adapter.cc b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_dynamic_mem_adapter.cc
new file mode 100644
index 0000000000000000000000000000000000000000..497763187ade5f3fd192a29531a573715034d4a2
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_dynamic_mem_adapter.cc
@@ -0,0 +1,127 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/ascend/res_manager/mem_manager/ascend_dynamic_mem_adapter.h"
+#include <algorithm>
+#include <set>
+#include "common/common.h"
+
+#include "hardware/ascend/res_manager/mem_manager/ascend_gmem_adapter.h"
+#include "hardware/ascend/res_manager/mem_manager/ascend_vmm_adapter.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h"
+#include "hardware/ascend/res_manager/symbol_interface/symbol_utils.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+constexpr size_t kMBToByte = 1024 << 10;
+
+uint8_t *AscendDynamicMemAdapter::MallocStaticDevMem(size_t size, const std::string &tag) {
+  std::lock_guard<std::mutex> locker(mutex_);
+  if (has_alloc_size + size > LongToSize(max_available_ms_hbm_size_)) {
+    LOG_ERROR << "No enough memory to allocate, has_alloc_size:" << has_alloc_size << ", size:" << size
+              << ", max_available_ms_moc_size:" << max_available_ms_hbm_size_;
+  }
+  auto addr = MallocFromRts(size);
+  if (addr != nullptr) {
+    has_alloc_size += size;
+    (void)static_memory_blocks_.emplace(addr, std::make_shared<MemoryBlock>(addr, size, tag));
+    LOG_OUT << "MallocStaticDevMem success, size:" << size << ", tag:" << tag;
+  }
+  return addr;
+}
+
+bool AscendDynamicMemAdapter::FreeStaticDevMem(void *addr) {
+  LOG_OUT << "FreeStaticDevMem addr:" << addr << ".";
+  std::lock_guard<std::mutex> locker(mutex_);
+  if (addr == nullptr) {
+    LOG_ERROR << "addr is nullptr.";
+    return false;
+  }
+  auto &&iter = static_memory_blocks_.find(addr);
+  if (iter == static_memory_blocks_.end()) {
+    LOG_ERROR << "addr is not in static memory blocks, addr:" << addr << ".";
+    return false;
+  }
+  auto mem_block = iter->second;
+  auto ret = FreeToRts(mem_block->mem_ptr, mem_block->mem_size);
+  if (!ret) {
+    LOG_ERROR << "Free memory failed.";
+    return false;
+  }
+  LOG_OUT << "Free memory success, addr:" << addr << ", size:" << mem_block->mem_size << ".";
+  has_alloc_size -= mem_block->mem_size;
+  static_memory_blocks_.erase(addr);
+  return true;
+}
+
+bool AscendDynamicMemAdapter::Initialize() {
+  if (initialized_) {
+    return true;
+  }
+  (void)AscendMemAdapter::Initialize();
+  initialized_ = true;
+  LOG_OUT << "Ascend Memory Adapter initialize success, Memory Statistics:" << DevMemStatistics();
+  return true;
+}
+
+bool AscendDynamicMemAdapter::DeInitialize() {
+  for (const auto &[addr, blk] : static_memory_blocks_) {
+    if (blk->mem_ptr != nullptr) {
+      auto ret = FreeToRts(blk->mem_ptr, blk->mem_size);
+      if (!ret) {
+        LOG_ERROR << "Free memory failed.";
+        return false;
+      }
+      LOG_OUT << "Free memory success, addr:" << addr << ", size:" << blk->mem_size << ", tag:" << blk->mem_tag;
+    }
+  }
+  (void)AscendMemAdapter::DeInitialize();
+  has_alloc_size = 0;
+  static_memory_blocks_.clear();
+  initialized_ = false;
+  return true;
+}
+
+uint64_t AscendDynamicMemAdapter::FreeDevMemSize() const { return max_available_ms_hbm_size_ - has_alloc_size; }
+
+uint8_t *AscendDynamicMemAdapter::MallocDynamicDevMem(size_t size, const std::string &) {
+  LOG_ERROR << "MallocDynamicDevMem is disabled.";
+  return nullptr;
+}
+
+void AscendDynamicMemAdapter::ResetDynamicMemory() { LOG_ERROR << "ResetDynamicMemory is disabled."; }
+
+std::string AscendDynamicMemAdapter::DevMemStatistics() const {
+  std::ostringstream oss;
+  oss << "\nDevice MOC memory size: " << device_hbm_total_size_ / kMBToByte << "M";
+  oss << "\nMindSpore Used memory size: " << ms_used_hbm_size_ / kMBToByte << "M";
+  auto print_actual_peak_memory = AscendVmmAdapter::GetInstance().IsEnabled()
+                                    ? AscendVmmAdapter::GetInstance().GetAllocatedSize()
+                                    : actual_peak_memory_;
+  oss << "\nUsed peak memory usage (without fragments): " << used_peak_memory_ / kMBToByte << "M";
+  oss << "\nActual peak memory usage (with fragments): " << print_actual_peak_memory / kMBToByte << "M";
+  oss << std::endl;
+  return oss.str();
+}
+
+size_t AscendDynamicMemAdapter::GetDynamicMemUpperBound(void *min_static_addr) const {
+  LOG_ERROR << "GetDynamicMemUpperBound is disabled.";
+  return 0;
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_dynamic_mem_adapter.h b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_dynamic_mem_adapter.h
new file mode 100644
index 0000000000000000000000000000000000000000..7f7eeef077735bde4f9bedd1b6b5dca54143bbf6
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_dynamic_mem_adapter.h
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_DYNAMIC_MEM_ADAPTER_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_DYNAMIC_MEM_ADAPTER_H_
+
+#include "hardware/ascend/res_manager/mem_manager/ascend_memory_adapter.h"
+#include <string>
+#include <map>
+#include <memory>
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+class AscendDynamicMemAdapter : public AscendMemAdapter {
+ public:
+  bool Initialize() override;
+  bool DeInitialize() override;
+  uint8_t *MallocStaticDevMem(size_t size, const std::string &tag = "") override;
+  bool FreeStaticDevMem(void *addr) override;
+  uint8_t *MallocDynamicDevMem(size_t size, const std::string &tag = "") override;
+  void ResetDynamicMemory() override;
+  std::string DevMemStatistics() const override;
+  size_t GetDynamicMemUpperBound(void *min_static_addr) const override;
+  [[nodiscard]] uint64_t FreeDevMemSize() const override;
+
+ private:
+  size_t has_alloc_size = 0;
+  std::map<void *, std::shared_ptr<MemoryBlock>> static_memory_blocks_;
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_DYNAMIC_MEM_ADAPTER_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_gmem_adapter.cc b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_gmem_adapter.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5a7dfab491d94ceaba9e1ccbd61813aab2fbdd08
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_gmem_adapter.cc
@@ -0,0 +1,127 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hardware/ascend/res_manager/mem_manager/ascend_gmem_adapter.h"
+#include <pthread.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <tuple>
+#include "common/common.h"
+#include "hardware/ascend/res_manager/ascend_stream_manager.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+static constexpr const char kGMemLibName[] = "libgmem.so";
+static constexpr const char kMsEnableGmem[] = "MS_ENABLE_GMEM";
+constexpr uint64_t kAscendMmapAlignSize = 1 << 21;
+constexpr int kMapPeerShared = 0x8000000;
+
+const size_t AscendGmemAdapter::GetRoundUpAlignSize(size_t input_size) const {
+  return (input_size + kAscendMmapAlignSize - 1) & ~(kAscendMmapAlignSize - 1);
+}
+
+const size_t AscendGmemAdapter::GetRoundDownAlignSize(size_t input_size) const {
+  return input_size & ~(kAscendMmapAlignSize - 1);
+}
+
+size_t AscendGmemAdapter::AllocDeviceMem(size_t size, DeviceMemPtr *addr) const {
+  size_t align_size = GetRoundUpAlignSize(size);
+  uint8_t *alloc_addr = MmapMemory(align_size, nullptr);
+  if (alloc_addr == nullptr) {
+    LOG_OUT << "Malloc memory failed.";
+    return 0;
+  }
+  *addr = alloc_addr;
+  return align_size;
+}
+
+size_t AscendGmemAdapter::EagerFreeDeviceMem(const DeviceMemPtr addr, const size_t size) const {
+  CHECK_IF_NULL(addr);
+  LOG_OUT << "Enter ascend eager free device mem, addr : " << addr << ", size : " << size << ".";
+  if (size == 0) {
+    LOG_OUT << "Eager free device mem, addr : " << addr << ", size is zero.";
+    return 0;
+  }
+  size_t addr_size_t = reinterpret_cast<size_t>(addr);
+  // Adjust addr -> round up addr, size -> round down size.
+  size_t from_addr = GetRoundUpAlignSize(addr_size_t);
+  size_t end_addr = GetRoundDownAlignSize(addr_size_t + size);
+  if (end_addr <= from_addr) {
+    LOG_OUT << "End addr : " << end_addr << " is not bigger than from_addr : " << from_addr << ".";
+    return 0;
+  }
+  size_t real_size = end_addr - from_addr;
+  int ret = free_eager_(from_addr, SizeToUlong(real_size), nullptr);
+  return ret != 0 ? 0 : real_size;
+}
+
+uint8_t *AscendGmemAdapter::MmapMemory(size_t size, void *addr) const {
+  LOG_OUT << "Enter mmap memory, size : " << size << ".";
+  if (size == 0) {
+    LOG_ERROR << "Mmap memory, addr : " << addr << ", size is zero.";
+    return nullptr;
+  }
+
+  int flags = MAP_PRIVATE | MAP_ANONYMOUS | kMapPeerShared;
+  int prot = PROT_READ | PROT_WRITE;
+  void *mapped_addr = mmap(addr, size, prot, flags, -1, 0);
+  if (mapped_addr == MAP_FAILED) {
+    LOG_ERROR << "Mmap failed.";
+  }
+  return static_cast<uint8_t *>(mapped_addr);
+}
+
+bool AscendGmemAdapter::MunmapMemory(void *addr, const size_t size) const {
+  LOG_OUT << "Enter munmap memory, addr : " << addr << ", size : " << size << ".";
+  auto ret = munmap(addr, size);
+  return ret != -1;
+}
+
+void AscendGmemAdapter::LoadGMemLib() noexcept {
+  LOG_OUT << "MS_ENABLE_GMEM is set, try to open gmem.";
+  gmem_handle_ = dlopen(kGMemLibName, RTLD_NOW);
+  if (gmem_handle_ != nullptr) {
+    LOG_OUT << "Open GMem lib success, mindspore will use gmem to optimize memory usage.";
+    LIB_FUNC(GMEM_FREE_EAGER) gmem_free_eager = DlsymFuncObj(gmemFreeEager, gmem_handle_);
+    if (gmem_free_eager != nullptr) {
+      is_eager_free_enabled_ = true;
+      free_eager_ = gmem_free_eager;
+    } else {
+      LOG_OUT << "Load gmem free eager failed.";
+      if (dlclose(gmem_handle_) != 0) {
+        LOG_ERROR << "Close GMem lib failed, detail : " << dlerror() << ".";
+      }
+    }
+  } else {
+    LOG_OUT << "Open GMem lib failed.";
+  }
+}
+
+void AscendGmemAdapter::UnloadGMemLib() noexcept {
+  if (gmem_handle_ != nullptr) {
+    LOG_OUT << "Close GMem lib.";
+    if (dlclose(gmem_handle_) != 0) {
+      LOG_ERROR << "Close GMem lib failed, detail : " << dlerror() << ".";
+    }
+    gmem_handle_ = nullptr;
+  }
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_gmem_adapter.h b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_gmem_adapter.h
new file mode 100644
index 0000000000000000000000000000000000000000..3edb8bd61ade23694fab1434fd44ece3342fa96a
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_gmem_adapter.h
@@ -0,0 +1,76 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_GMEM_ADAPTER_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_GMEM_ADAPTER_H_
+
+#include <atomic>
+#include <memory>
+
+#include "acl/acl.h"
+#include "hardware/hardware_abstract/dlopen_macro.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+#define CONCAT(l, r) l##r
+// Function Object definition marco.
+#define LIB_FUNC(func_name) CONCAT(func_name, FunObj)
+// Function definition marco, and then can ues `LIB_FUNC(func_name)`.
+#define DEFINE_LIB_METHOD(func_name, ...) ORIGIN_METHOD(func_name, __VA_ARGS__)
+
+// GMem mem free eager function name. Need to use origin name when export symbol from lib.
+#define GMEM_FREE_EAGER gmemFreeEager
+// Definition for GMem lib function : GMEM_FREE_EAGER.
+DEFINE_LIB_METHOD(GMEM_FREE_EAGER, size_t, uint64_t, size_t, void *);
+
+using DeviceMemPtr = void(*);
+class AscendGmemAdapter {
+ public:
+  static AscendGmemAdapter &GetInstance() {
+    static AscendGmemAdapter instance{};
+    return instance;
+  }
+
+  AscendGmemAdapter() { LoadGMemLib(); }
+  ~AscendGmemAdapter() { UnloadGMemLib(); }
+
+ public:
+  const size_t GetRoundUpAlignSize(size_t input_size) const;
+  const size_t GetRoundDownAlignSize(size_t input_size) const;
+
+  size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) const;
+  size_t EagerFreeDeviceMem(const DeviceMemPtr addr, const size_t size) const;
+
+  uint8_t *MmapMemory(size_t size, void *addr) const;
+  bool MunmapMemory(void *addr, const size_t size) const;
+
+  inline const bool is_eager_free_enabled() const { return is_eager_free_enabled_; }
+
+ private:
+  void LoadGMemLib() noexcept;
+  void UnloadGMemLib() noexcept;
+
+  bool is_eager_free_enabled_{false};
+  void *gmem_handle_{nullptr};
+  // Function for eager free.
+  LIB_FUNC(GMEM_FREE_EAGER) free_eager_;
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+
+#endif
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_adapter.cc b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_adapter.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a848e12bdc6790721699d588f242e3e946d6e584
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_adapter.cc
@@ -0,0 +1,295 @@
+/**
+ * Copyright 2021-2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/ascend/res_manager/mem_manager/ascend_memory_adapter.h"
+#include "hardware/ascend/res_manager/mem_manager/ascend_dynamic_mem_adapter.h"
+#include "hardware/ascend/res_manager/mem_manager/ascend_gmem_adapter.h"
+#include "hardware/ascend/res_manager/mem_manager/ascend_vmm_adapter.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h"
+#include "hardware/ascend/res_manager/symbol_interface/symbol_utils.h"
+#include "common/common.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+namespace {
+constexpr size_t kMBToByte = 1024 << 10;
+constexpr size_t kGBToByte = 1024 << 20;
+constexpr uint64_t kAscendMemAlignSize = 512;
+constexpr double kHalfRatio = 0.5;
+constexpr double kMSMemoryRatio = 0.9375;           // 15/16
+constexpr double kReservedMemoryRatio = 0.0625;     // 1/16
+constexpr size_t kPerHugePageMemorySize = 2097152;  // 2mb
+constexpr size_t kExtraReservedMemory = 10485760;   // 10mb
+constexpr size_t kSimuHBMTotalMemSizeGB = 64;
+}  // namespace
+AscendMemAdapterPtr AscendMemAdapter::instance_ = nullptr;
+
+AscendMemAdapterPtr AscendMemAdapter::GetInstance() {
+  if (instance_ == nullptr) {
+    instance_ = std::make_shared<AscendDynamicMemAdapter>();
+  }
+  return instance_;
+}
+
+size_t AscendMemAdapter::GetRoundDownAlignSize(size_t input_size) {
+  return (input_size / kAscendMemAlignSize) * kAscendMemAlignSize;
+}
+
+size_t AscendMemAdapter::GetRoundUpAlignSize(size_t input_size) {
+  return ((input_size + kAscendMemAlignSize - 1) / kAscendMemAlignSize) * kAscendMemAlignSize;
+}
+
+size_t AscendMemAdapter::GetDeviceMemSizeFromContext() const {
+  size_t size_from_context;
+  float total_device_memory = 32.0f;
+  auto max_device_memory = total_device_memory;
+  // if (context->ascend_soc_version() == kAscendVersion910b || context->ascend_soc_version() == kAscendVersion910_93) {
+  //   total_device_memory = 64.0f;
+  // }
+  // if (context->ascend_soc_version() == kAscendVersion310p) {
+  //   total_device_memory = 43.0f;
+  // }
+  LOG_OUT << "context max_device_memory:" << max_device_memory;
+  size_from_context = FloatToSize(max_device_memory * kGBToByte);
+
+  return size_from_context;
+}
+
+bool AscendMemAdapter::Initialize() {
+  if (initialized_) {
+    return true;
+  }
+
+  // use 0 temporarily.
+  float huge_page_reserve_size = 0;
+  device_hbm_huge_page_reserved_size_ = static_cast<size_t>(huge_page_reserve_size * kGBToByte);
+  if (AscendVmmAdapter::IsEnabled() && device_hbm_huge_page_reserved_size_ > 0) {
+    LOG_OUT << "Reserve huge page feature is not available when VMM is enabled.";
+  }
+  LOG_OUT << "Config huge_page_reserve_size : " << huge_page_reserve_size
+          << ", device_hbm_huge_page_reserved_size_ : " << device_hbm_huge_page_reserved_size_;
+
+  auto ret = CALL_ASCEND_API(aclrtGetMemInfo, ACL_HBM_MEM, &device_hbm_free_size_, &device_hbm_total_size_);
+  if (ret != ACL_SUCCESS || device_hbm_total_size_ == 0) {
+    LOG_ERROR << "Internal Error: Get Device MOC memory size failed, ret = " << ret
+              << ", total MOC size :" << device_hbm_total_size_;
+  }
+
+  if (device_hbm_free_size_ < LongToSize(DoubleToLong(device_hbm_total_size_ * kHalfRatio))) {
+    // use 0 temporarily.
+    unsigned int device_id = 0;
+    LOG_OUT << "Free memory size is less "
+               "than half of total memory size."
+            << "Device " << device_id << " Device MOC total size:" << device_hbm_total_size_
+            << " Device MOC free size:" << device_hbm_free_size_
+            << " may be other processes occupying this card, check as: ps -ef|grep python";
+  }
+
+  // get user define max backend memory
+  auto user_define_ms_size = GetDeviceMemSizeFromContext();
+  auto recommend_mem_size_for_others = LongToSize(DoubleToLong(device_hbm_free_size_ * kReservedMemoryRatio));
+  size_t reserved_mem_size_for_others;
+  if (user_define_ms_size == 0) {
+    ms_used_hbm_size_ = DoubleToLong(device_hbm_free_size_ * kMSMemoryRatio);
+    // sub the extra reserved 10mb after rounding down the 2mb
+    ms_used_hbm_size_ = (ms_used_hbm_size_ / kPerHugePageMemorySize) * kPerHugePageMemorySize - kExtraReservedMemory;
+    reserved_mem_size_for_others = device_hbm_free_size_ - SizeToLong(ms_used_hbm_size_);
+  } else {
+    if (user_define_ms_size >= device_hbm_free_size_) {
+      LOG_ERROR << "#umsg#Framework Error Message:#umsg#The Free Device Memory Size is "
+                << (SizeToFloat(device_hbm_free_size_) / kGBToByte) << " GB, max_device_memory should be in range (0-"
+                << (SizeToFloat(device_hbm_free_size_) / kMBToByte) << "]MB, but got "
+                << (SizeToFloat(user_define_ms_size) / kMBToByte)
+                << "MB, please set the context key max_device_memory in valid range.";
+    }
+    ms_used_hbm_size_ = SizeToLong(user_define_ms_size);
+
+    reserved_mem_size_for_others = device_hbm_total_size_ - LongToSize(ms_used_hbm_size_);
+    if (reserved_mem_size_for_others < recommend_mem_size_for_others) {
+      LOG_OUT << "Reserved memory size for other components(" << reserved_mem_size_for_others
+              << ") is less than recommend size(" << recommend_mem_size_for_others
+              << "), It may lead to Out Of Memory in HCCL or other components, Please double check context key "
+                 "'variable_memory_max_size'/'max_device_memory'";
+    }
+  }
+
+  if (AscendVmmAdapter::GetInstance().IsEnabled()) {
+    ms_used_hbm_size_ = SizeToLong(AscendVmmAdapter::GetInstance().GetRoundDownAlignSize(ms_used_hbm_size_));
+  } else if (AscendGmemAdapter::GetInstance().is_eager_free_enabled()) {
+    ms_used_hbm_size_ = SizeToLong(AscendGmemAdapter::GetInstance().GetRoundDownAlignSize(ms_used_hbm_size_));
+  } else {
+    ms_used_hbm_size_ = SizeToLong(GetRoundDownAlignSize(ms_used_hbm_size_));
+  }
+  max_available_ms_hbm_size_ = ms_used_hbm_size_;
+
+  auto get_init_info = [this, &reserved_mem_size_for_others, &recommend_mem_size_for_others,
+                        &user_define_ms_size]() -> std::string {
+    std::ostringstream oss;
+    oss << "Device MOC Size:" << device_hbm_total_size_ / kMBToByte
+        << "M, Device free MOC Size:" << device_hbm_free_size_ / kMBToByte
+        << "M, Reserved MOC size for Other Components(HCCL/rts/etc.):" << reserved_mem_size_for_others / kMBToByte
+        << "M, Recommend Reserved MOC size for Other Components:" << recommend_mem_size_for_others / kMBToByte
+        << "M, User define MindSpore MOC Size:" << user_define_ms_size / kGBToByte
+        << "G, MindSpore Used MOC Size:" << ms_used_hbm_size_ / kMBToByte << "M.";
+    return oss.str();
+  };
+
+  LOG_OUT << get_init_info();
+  initialized_ = true;
+  return true;
+}
+
+void AscendMemAdapter::SimulationInitialize() {
+  device_hbm_total_size_ = kSimuHBMTotalMemSizeGB * kGBToByte;
+  device_hbm_free_size_ = device_hbm_total_size_;
+  size_t reserved_mem_size_for_others;
+  auto user_define_ms_size = GetDeviceMemSizeFromContext();
+  if (user_define_ms_size == 0) {
+    ms_used_hbm_size_ = DoubleToLong(device_hbm_free_size_ * kMSMemoryRatio);
+    ms_used_hbm_size_ = (ms_used_hbm_size_ / kPerHugePageMemorySize) * kPerHugePageMemorySize - kExtraReservedMemory;
+    reserved_mem_size_for_others = device_hbm_free_size_ - SizeToLong(ms_used_hbm_size_);
+  } else {
+    ms_used_hbm_size_ = SizeToLong(user_define_ms_size);
+    if (user_define_ms_size > device_hbm_total_size_) {
+      device_hbm_total_size_ = user_define_ms_size;
+    }
+    reserved_mem_size_for_others = device_hbm_total_size_ - user_define_ms_size;
+  }
+
+  LOG_OUT << "Simulation Device MOC Size:" << device_hbm_total_size_ / kMBToByte
+          << "M, Device free MOC Size:" << device_hbm_free_size_ / kMBToByte
+          << "M, Reserved MOC size for Other Components(HCCL/rts/etc.):" << reserved_mem_size_for_others / kMBToByte
+          << "M, User define MindSpore MOC Size:" << user_define_ms_size / kGBToByte
+          << "G, MindSpore Used MOC Size:" << ms_used_hbm_size_ / kMBToByte << "M.";
+  max_available_ms_hbm_size_ = ms_used_hbm_size_;
+  initialized_ = true;
+}
+
+bool AscendMemAdapter::DeInitialize() {
+  if (!initialized_) {
+    LOG_OUT << "DeInitialize Ascend Memory Adapter when it is not initialize";
+    return false;
+  }
+  std::ostringstream oss_buf;
+  oss_buf << "Ascend Memory Adapter deinitialize success, statistics:" << DevMemStatistics();
+  LOG_OUT << oss_buf.str();
+  device_hbm_total_size_ = 0;
+  device_hbm_free_size_ = 0;
+  ms_used_hbm_size_ = 0;
+  max_available_ms_hbm_size_ = 0;
+  initialized_ = false;
+  return true;
+}
+
+namespace {
+struct HugeMemReserver {
+  HugeMemReserver(size_t size, size_t reserver_size) {
+    LOG_OUT << "Allocate size : " << size << ", reserve_size : " << reserver_size << ".";
+    if (reserver_size < kMBToByte) {
+      return;
+    }
+    size_t free_size = 0;
+    size_t total_size = 0;
+    auto ret = CALL_ASCEND_API(aclrtGetMemInfo, ACL_HBM_MEM_HUGE, &free_size, &total_size);
+    LOG_OUT << "Huge mem reserve free_size : " << free_size << ", total_size : " << total_size << ".";
+    if (ret == ACL_SUCCESS) {
+      if (free_size < reserver_size + size) {
+        LOG_OUT << "Free size of huge page mem[" << free_size
+                << "] is less than the sum of reserver_size and allocate size. Reserve size " << reserver_size
+                << ", allocate size : " << size << ", total ACL_HBM_MEM_HUGE size : " << total_size << ".";
+        if (free_size < reserver_size) {
+          LOG_ERROR << "Free size of huge page mem[" << free_size << "] is less than reserver_size : " << reserver_size
+                    << ", change reserve operation with free size.";
+          reserver_size = free_size;
+        }
+        ret = CALL_ASCEND_API(aclrtMalloc, reinterpret_cast<void **>(&addr_), reserver_size, ACL_MEM_MALLOC_HUGE_ONLY);
+        if (ret != ACL_RT_SUCCESS) {
+          addr_ = nullptr;
+          LOG_ERROR << "aclrtMalloc mem size[" << reserver_size << "] fail, ret[" << ret << "]";
+        } else {
+          LOG_OUT << "Huge mem reserve success, addr : " << addr_ << ", size : " << reserver_size << ".";
+        }
+      }
+    } else {
+      LOG_OUT << "aclrtGetMemInfo mem size[" << size << "] fail, ret[" << ret << "]";
+    }
+  }
+
+  ~HugeMemReserver() {
+    if (addr_ != nullptr) {
+      auto ret = CALL_ASCEND_API(aclrtFree, addr_);
+      if (ret != ACL_SUCCESS) {
+        LOG_ERROR << "aclrtFree mem [" << addr_ << "] fail, ret[" << ret << "]";
+      } else {
+        LOG_OUT << "Huge mem reserve success, free : " << addr_ << ".";
+      }
+    }
+  }
+
+  void *addr_{nullptr};
+};
+}  // namespace
+
+uint8_t *AscendMemAdapter::MallocFromRts(size_t size) const {
+  uint8_t *ptr = nullptr;
+  if (AscendVmmAdapter::GetInstance().IsEnabled()) {
+    return nullptr;
+  }
+  if (AscendGmemAdapter::GetInstance().is_eager_free_enabled()) {
+    return AscendGmemAdapter::GetInstance().MmapMemory(size, reinterpret_cast<void *>(ptr));
+  }
+
+  HugeMemReserver huge_mem_reserver(size, device_hbm_huge_page_reserved_size_);
+  auto ret = CALL_ASCEND_API(aclrtMalloc, reinterpret_cast<void **>(&ptr), size, ACL_MEM_TYPE_HIGH_BAND_WIDTH);
+  if (ret != ACL_RT_SUCCESS) {
+    if (ret == ACL_ERROR_RT_MEMORY_ALLOCATION) {
+      // use 0 temporarily.
+      unsigned int device_id = 0;
+      size_t free_size = 0;
+      size_t total = 0;
+      (void)CALL_ASCEND_API(aclrtGetMemInfo, ACL_HBM_MEM, &free_size, &total);
+      LOG_ERROR << "#umsg#Framework Error Message:#umsg#Malloc device memory failed, size[" << size << "], ret[" << ret
+                << "], "
+                << "Device " << device_id << " Available MOC size:" << total << " free size:" << free_size
+                << " may be other processes occupying this card, check as: ps -ef|grep python";
+    } else {
+      LOG_ERROR << "rtMalloc mem size[" << size << "] fail, ret[" << ret << "]";
+    }
+  } else {
+    LOG_OUT << "Call rtMalloc to allocate device memory Success, size: " << size
+            << " bytes, address start: " << reinterpret_cast<void *>(ptr)
+            << " end: " << reinterpret_cast<void *>(ptr + size);
+  }
+  return ptr;
+}
+
+bool AscendMemAdapter::FreeToRts(void *devPtr, const size_t size) const {
+  if (devPtr != nullptr) {
+    if (AscendGmemAdapter::GetInstance().is_eager_free_enabled()) {
+      return AscendGmemAdapter::GetInstance().MunmapMemory(devPtr, size);
+    }
+    auto ret = CALL_ASCEND_API(aclrtFree, devPtr);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "aclrtFree mem [" << devPtr << "] fail, ret[" << ret << "]";
+      return false;
+    }
+  }
+  return true;
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_adapter.h b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_adapter.h
new file mode 100644
index 0000000000000000000000000000000000000000..507798b1b693ad46bdd832208a4000d419d57eac
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_adapter.h
@@ -0,0 +1,106 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_ADAPTER_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_ADAPTER_H_
+
+#include <algorithm>
+#include <mutex>
+#include <string>
+#include <memory>
+#include <vector>
+#include <limits>
+
+#include "common/common.h"
+#include "hardware/hardware_abstract/visible.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+struct MemoryBlock {
+  MemoryBlock(void *ptr, const size_t size, const std::string &tag) {
+    mem_ptr = ptr;
+    mem_size = size;
+    mem_tag = tag;
+  }
+
+  void *mem_ptr{nullptr};
+  size_t mem_size{0};
+  std::string mem_tag;
+};
+
+class AscendMemAdapter;
+using AscendMemAdapterPtr = std::shared_ptr<AscendMemAdapter>;
+
+class HARDWARE_EXPORT AscendMemAdapter {
+ public:
+  virtual ~AscendMemAdapter() = default;
+  static AscendMemAdapterPtr GetInstance();
+
+  virtual bool Initialize();
+  virtual bool DeInitialize();
+
+  virtual uint8_t *MallocStaticDevMem(size_t size, const std::string &tag = "") = 0;
+  virtual bool FreeStaticDevMem(void *addr) = 0;
+  virtual uint8_t *MallocDynamicDevMem(size_t size, const std::string &tag = "") = 0;
+  virtual void ResetDynamicMemory() = 0;
+  virtual std::string DevMemStatistics() const = 0;
+  virtual size_t GetDynamicMemUpperBound(void *min_static_addr) const = 0;
+  [[nodiscard]] virtual uint64_t FreeDevMemSize() const = 0;
+
+  virtual void SimulationInitialize();
+
+  int64_t GetActualPeakMemory() const { return actual_peak_memory_; }
+  int64_t GetUsedPeakMemory() const { return used_peak_memory_; }
+  void UpdateActualPeakMemory(int64_t memory) { actual_peak_memory_ = std::max(actual_peak_memory_, memory); }
+  void UpdateUsedPeakMemory(int64_t memory) { used_peak_memory_ = std::max(used_peak_memory_, memory); }
+  [[nodiscard]] uint64_t MaxHbmSizeForMs() const { return max_available_ms_hbm_size_; }
+  [[nodiscard]] int64_t GetMsUsedHbmSize() const { return ms_used_hbm_size_; }
+  static size_t GetRoundUpAlignSize(size_t input_size);
+  static size_t GetRoundDownAlignSize(size_t input_size);
+
+ protected:
+  AscendMemAdapter() = default;
+  uint8_t *MallocFromRts(size_t size) const;
+  bool FreeToRts(void *devPtr, const size_t size) const;
+
+  bool initialized_{false};
+  // Support multi-thread.
+  std::mutex mutex_;
+
+  // Actual peak memory usage (with fragments)
+  int64_t actual_peak_memory_{0};
+  // Used peak memory usage (without fragments)
+  int64_t used_peak_memory_{0};
+
+  // rts Memory INFO
+  size_t device_hbm_total_size_{0};
+  size_t device_hbm_free_size_{0};
+  size_t device_hbm_huge_page_reserved_size_{0};
+
+  int64_t ms_used_hbm_size_{0};
+  int64_t max_available_ms_hbm_size_{0};
+
+ private:
+  DISABLE_COPY_AND_ASSIGN(AscendMemAdapter)
+  size_t GetDeviceMemSizeFromContext() const;
+  static AscendMemAdapterPtr instance_;
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_ADAPTER_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_manager.cc b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..18f41585266f45b65b1b2e881a33e1a692eb1e77
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_manager.cc
@@ -0,0 +1,191 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hardware/ascend/res_manager/mem_manager/ascend_memory_manager.h"
+
+#include <algorithm>
+#include <string>
+#include <chrono>
+#include <numeric>
+#include <unordered_map>
+
+#include "hardware/ascend/res_manager/mem_manager/ascend_memory_adapter.h"
+#include "hardware/ascend/res_manager/ascend_stream_manager.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h"
+#include "hardware/ascend/res_manager/symbol_interface/symbol_utils.h"
+#include "common/common.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+void AscendMemoryManager::Initialize() { (void)AscendMemAdapter::GetInstance()->Initialize(); }
+
+void AscendMemoryManager::Finalize() {
+  AscendMemoryPool::GetInstance().ReleaseDeviceRes();
+  (void)AscendMemAdapter::GetInstance()->DeInitialize();
+}
+
+void AscendMemoryManager::ResetDynamicMemory() { AscendMemAdapter::GetInstance()->ResetDynamicMemory(); }
+
+void AscendMemoryManager::ClearGlobalIdleMem() { AscendMemoryPool::GetInstance().ResetIdleMemBuf(); }
+
+uint64_t AscendMemoryManager::GetMsMaxMemSize() const { return AscendMemAdapter::GetInstance()->MaxHbmSizeForMs(); }
+
+uint64_t AscendMemoryManager::GetMsUsedHbmSize() const { return AscendMemAdapter::GetInstance()->GetMsUsedHbmSize(); }
+
+void *AscendMemoryManager::MallocMemFromMemPool(size_t size, bool from_persistent_mem, bool need_recycle,
+                                                uint32_t stream_id) {
+  auto align_size = GetCommonAlignSize(size);
+  return AscendMemoryPool::GetInstance().AllocTensorMem(align_size, from_persistent_mem, need_recycle, stream_id);
+}
+
+void AscendMemoryManager::FreeMemFromMemPool(void *device_ptr) {
+  AscendMemoryPool::GetInstance().FreeTensorMem(device_ptr);
+}
+
+size_t AscendMemoryManager::GetMaxUsedMemorySize() const { return AscendMemoryPool::GetInstance().GetMaxUsedMemSize(); }
+
+// Relevant function to manage memory statistics
+size_t AscendMemoryManager::GetTotalMemStatistics() const {
+  return AscendMemoryPool::GetInstance().TotalMemStatistics();
+}
+size_t AscendMemoryManager::GetTotalUsedMemStatistics() const {
+  return AscendMemoryPool::GetInstance().TotalUsedMemStatistics();
+}
+size_t AscendMemoryManager::GetTotalIdleMemStatistics() const {
+  return AscendMemoryPool::GetInstance().TotalIdleMemStatistics();
+}
+size_t AscendMemoryManager::GetTotalEagerFreeMemStatistics() const {
+  return AscendMemoryPool::GetInstance().TotalEagerFreeMemStatistics();
+}
+size_t AscendMemoryManager::GetUsedMemPeakStatistics() const {
+  return AscendMemoryPool::GetInstance().MaxMemAllocatedStatistics();
+}
+size_t AscendMemoryManager::GetReservedMemPeakStatistics() const {
+  return AscendMemoryPool::GetInstance().MaxMemReservedStatistics();
+}
+std::unordered_map<std::string, std::size_t> AscendMemoryManager::GetBlockCountsStatistics() const {
+  return AscendMemoryPool::GetInstance().BlockCountsStatistics();
+}
+std::unordered_map<std::string, std::size_t> AscendMemoryManager::GetBlockUnitSizeStatistics() const {
+  return AscendMemoryPool::GetInstance().BlockUnitSizeStatistics();
+}
+std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>
+AscendMemoryManager::GetCommonMemBlocksInfoStatistics() const {
+  return AscendMemoryPool::GetInstance().CommonMemBlocksInfoStatistics();
+}
+std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>
+AscendMemoryManager::GetPersistentMemBlocksInfoStatistics() const {
+  return AscendMemoryPool::GetInstance().PersistentMemBlocksInfoStatistics();
+}
+void AscendMemoryManager::ResetMaxMemoryReserved() { AscendMemoryPool::GetInstance().ResetMaxMemReserved(); }
+void AscendMemoryManager::ResetMaxMemoryAllocated() { AscendMemoryPool::GetInstance().ResetMaxMemAllocated(); }
+size_t AscendMemoryManager::EmptyCache() { return AscendMemoryPool::GetInstance().EmptyCache(); }
+
+uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id) {
+  size_t align_size = 0;
+  if (communication_mem) {
+    align_size = GetCommunicationAlignSize(size);
+  } else {
+    align_size = GetCommonAlignSize(size);
+  }
+  LOG_OUT << "Malloc Memory for Static: size[" << align_size << "] communication_mem:" << communication_mem;
+
+  uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
+  if (alloc_address != nullptr) {
+    // create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory
+    return communication_mem ? alloc_address + kMemAlignSize : alloc_address;
+  }
+  LOG_ERROR << "#umsg#Framework Error Message:#umsg#Fail to alloc memory, size: " << align_size
+            << "B, memory statistics:" << AscendMemAdapter::GetInstance()->DevMemStatistics();
+  return 0;
+}
+
+uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
+  size_t align_size = 0;
+  if (communication_mem) {
+    align_size = GetCommunicationAlignSize(size);
+  } else {
+    align_size = GetCommonAlignSize(size);
+  }
+  LOG_OUT << "Malloc Memory for Dynamic: size[" << align_size << "] communication_mem: " << communication_mem;
+
+  uint8_t *alloc_address =
+    reinterpret_cast<uint8_t *>(AscendMemAdapter::GetInstance()->MallocDynamicDevMem(align_size));
+  CHECK_IF_NULL(alloc_address);
+  // create protect area [kMemAlignSize -- data -- kMemAlignSize] for communication node memory
+  return communication_mem ? alloc_address + kMemAlignSize : alloc_address;
+}
+
+size_t AscendMemoryManager::GetAvailableMemSize() {
+  auto available_mem_size = AscendMemoryPool::GetInstance().free_mem_size() +
+                            AscendMemoryPool::GetInstance().TotalMemStatistics() -
+                            AscendMemoryPool::GetInstance().TotalUsedMemStatistics();
+  return available_mem_size;
+}
+
+DynamicMemPool *AscendMemoryManager::GetMemoryPool() {
+  if (MS_UNLIKELY(memory_pool_ == nullptr)) {
+    memory_pool_ = &(AscendMemoryPool::GetInstance());
+  }
+  return memory_pool_;
+}
+
+void EnhancedAscendMemoryManager::Initialize() {
+  AscendMemoryManager::Initialize();
+  LOG_OUT << "EnhancedAscendMemoryManager initialize.";
+  alloc_costs_.clear();
+}
+
+void EnhancedAscendMemoryManager::Finalize() {
+  AscendMemoryManager::Finalize();
+  LOG_OUT << "EnhancedAscendMemoryManager finalize";
+  std::sort(alloc_costs_.begin(), alloc_costs_.end());
+  // Calculate mean and median, then print them.
+  auto total_size = alloc_costs_.size();
+  if (total_size == 0) {
+    LOG_OUT << "No memory operation.";
+    return;
+  }
+  double median = 0;
+  if (total_size & 1) {
+    median = (alloc_costs_[total_size >> 1] + alloc_costs_[(total_size >> 1) + 1]) >> 1;
+  } else {
+    median = alloc_costs_[total_size >> 1];
+  }
+  LOG_OUT << "EnhancedAscendMemoryManager median : " << median << "ns.";
+
+  double sum = std::accumulate(alloc_costs_.begin(), alloc_costs_.end(), 0.0);
+  double mean = sum / total_size;
+  LOG_OUT << "EnhancedAscendMemoryManager mean : " << mean << "ns.";
+
+  const double cost_high_water = 1800;
+  if (median > cost_high_water || mean > cost_high_water) {
+    LOG_OUT << "EnhancedAscendMemoryManager check failed, median : " << median << ", mean : " << mean;
+  }
+}
+
+void *EnhancedAscendMemoryManager::MallocMemFromMemPool(size_t size, bool from_persistent_mem, bool need_recycle,
+                                                        uint32_t stream_id) {
+  auto start_tick = GetCurrentTick();
+  auto ret = AscendMemoryManager::MallocMemFromMemPool(size, from_persistent_mem, need_recycle, stream_id);
+  auto cost = GetCurrentTick() - start_tick;
+  (void)alloc_costs_.emplace_back(cost);
+  LOG_OUT << "Malloc memory cost : " << cost << "ns.";
+  return ret;
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_manager.h b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..38cdba324b422f88e6031cb822dc35212a9f64f2
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_manager.h
@@ -0,0 +1,99 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
+
+#include <vector>
+#include <string>
+#include <chrono>
+
+#include <unordered_map>
+#include "hardware/hardware_abstract/memory_manager.h"
+#include "hardware/ascend/res_manager/mem_manager/ascend_memory_pool.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+class HARDWARE_EXPORT AscendMemoryManager : public MemoryManager {
+ public:
+  AscendMemoryManager() = default;
+  ~AscendMemoryManager() override = default;
+
+  void Initialize() override;
+  void Finalize() override;
+  void ResetDynamicMemory() override;
+  void ClearGlobalIdleMem() override;
+  void *MallocMemFromMemPool(size_t size, bool from_persistent_mem, bool need_recycle = false,
+                             uint32_t stream_id = kDefaultStreamIndex) override;
+  void FreeMemFromMemPool(void *device_ptr) override;
+  size_t GetMaxUsedMemorySize() const override;
+  uint64_t GetMsMaxMemSize() const;
+  std::vector<void *> MallocContinuousMemFromMemPool(const std::vector<size_t> &size_list,
+                                                     uint32_t stream_id = kDefaultStreamIndex) override {
+    return AscendMemoryPool::GetInstance().AllocContinuousTensorMem(size_list, stream_id);
+  }
+
+  size_t GetAvailableMemSize() override;
+  uint64_t GetMsUsedHbmSize() const;
+
+  // Relevant function to manage memory statistics
+  size_t GetTotalMemStatistics() const override;
+  size_t GetTotalUsedMemStatistics() const override;
+  size_t GetTotalIdleMemStatistics() const override;
+  size_t GetTotalEagerFreeMemStatistics() const override;
+  size_t GetUsedMemPeakStatistics() const override;
+  size_t GetReservedMemPeakStatistics() const override;
+  std::unordered_map<std::string, std::size_t> GetBlockCountsStatistics() const override;
+  std::unordered_map<std::string, std::size_t> GetBlockUnitSizeStatistics() const override;
+  std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>> GetCommonMemBlocksInfoStatistics()
+    const override;
+  std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>
+  GetPersistentMemBlocksInfoStatistics() const override;
+  void ResetMaxMemoryReserved() override;
+  void ResetMaxMemoryAllocated() override;
+  size_t EmptyCache() override;
+
+  DynamicMemPool *GetMemoryPool() override;
+
+ protected:
+  uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id) override;
+  uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override;
+};
+
+class HARDWARE_EXPORT EnhancedAscendMemoryManager : public AscendMemoryManager {
+ public:
+  EnhancedAscendMemoryManager() = default;
+  ~EnhancedAscendMemoryManager() override = default;
+
+  void Initialize() override;
+
+  void Finalize() override;
+
+  void *MallocMemFromMemPool(size_t size, bool from_persistent_mem, bool need_recycle, uint32_t stream_id) override;
+
+ private:
+  inline uint64_t GetCurrentTick() {
+    auto &&ts = std::chrono::system_clock::now();
+    return static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::nanoseconds>(ts.time_since_epoch()).count());
+  }
+
+  std::vector<size_t> alloc_costs_;
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_pool.cc b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_pool.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c47013b143988d02907d5692668de98b93d634c9
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_pool.cc
@@ -0,0 +1,331 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/ascend/res_manager/mem_manager/ascend_memory_pool.h"
+
+#include <fcntl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include <atomic>
+#include <set>
+
+#include "common/common.h"
+#include "hardware/ascend/res_manager/mem_manager/ascend_vmm_adapter.h"
+#include "hardware/ascend/res_manager/ascend_stream_manager.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+constexpr size_t kByteOffset = 8;
+constexpr uint32_t kDefaultDispatchThreadsNum = 5;
+constexpr uint32_t kDefaultOpThreadsNum = 25;
+constexpr float kDefaultMemInitSize = 2.0;
+constexpr float kDefaultMemBlockIncreaseSize = 1.0;
+constexpr float kDefaultMemMaxSize = 1024.0;
+
+DefaultAscendMemoryPool::DefaultAscendMemoryPool() {
+  LOG_OUT << "DefaultAscendMemoryPool constructed.";
+  SetEnableVmm(AscendVmmAdapter::GetInstance().IsEnabled());
+}
+
+size_t DefaultAscendMemoryPool::EmptyCache() {
+  LockGuard lock(AbstractDynamicMemPool::lock());
+  AbstractEnhancedDynamicMemPool::WaitPipelineHelper();
+  AbstractAscendMemoryPoolSupport::SyncAllStreams();
+  size_t release_free_size = 0;
+  if (MS_UNLIKELY(!customized_allocators_.empty())) {
+    release_free_size += ReleaseCustomFreeBlocks();
+  }
+  if (IsEnableVmm()) {
+    AbstractEnhancedDynamicMemPool::FreeIdleMemsByEagerFree();
+    release_free_size += AbstractAscendMemoryPoolSupport::EmptyCache();
+    return release_free_size;
+  } else if (IsEnableEagerFree()) {
+    auto ret = AbstractEnhancedDynamicMemPool::FreeIdleMemsByEagerFree();
+    LOG_OUT << "Eager free memory size is " << ret.second << ".";
+    release_free_size += ret.second;
+    return release_free_size;
+  }
+
+  LOG_OUT << "Vmm is not enabled, try to release free blocks.";
+  // // disable ge kernel use two pointer mem adapter, not support free.
+  // if (IsDisableGeKernel()) {
+  //   return 0L;
+  // }
+  release_free_size += ReleaseFreeBlocks();
+  return release_free_size;
+}
+
+void DefaultAscendMemoryPool::EnablePluggableAllocator(std::function<MallocFuncType> alloc_fn,
+                                                       std::function<FreeFuncType> free_fn) {
+  custom_alloc_fn_ = alloc_fn;
+  custom_free_fn_ = free_fn;
+  enable_custom_allocator_ = true;
+}
+
+void DefaultAscendMemoryPool::DisablePluggableAllocator() {
+  enable_custom_allocator_ = false;
+  return;
+}
+
+DefaultEnhancedAscendMemoryPool::DefaultEnhancedAscendMemoryPool(const DefaultAscendMemoryPoolPtr &instance)
+    : instance_(instance) {
+  LOG_OUT << "DefaultEnhancedAscendMemoryPool constructed.";
+  instance_->SetEnableVmm(AscendVmmAdapter::GetInstance().IsEnabled());
+}
+
+void DefaultEnhancedAscendMemoryPool::ReleaseDeviceRes() {
+  LOG_OUT << "Start release device res.";
+  instance_->ReleaseDeviceRes();
+}
+
+DeviceMemPtr DefaultEnhancedAscendMemoryPool::AllocTensorMem(size_t size, bool from_persistent_mem, bool need_recycle,
+                                                             uint32_t stream_id) {
+  size_t align_size = AlignMemorySize(size);
+  LOG_OUT << "Allocate tensor mem, size : " << size << ", align_size : " << align_size
+          << ", need_recycle : " << need_recycle << ".";
+  LockGuard lock(instance_->lock());
+  const auto [mem_buf, allocator] = instance_->AllocMemBuf(align_size, from_persistent_mem, stream_id);
+  if (mem_buf == nullptr) {
+    LOG_OUT << "Allocate tensor mem, return nullptr.";
+    // Dump mem pool state info and debug info when alloc tensor failed.
+    DumpDynamicMemPoolStateInfo();
+    DumpDynamicMemPoolDebugInfo();
+    return nullptr;
+  }
+
+  mem_buf->SetDebugInfo();
+  instance_->addr_mem_buf_allocators().emplace(mem_buf->addr_, std::make_pair(mem_buf, allocator));
+  auto device_addr = mem_buf->addr_;
+
+  instance_->ReportMemoryPoolInfo();
+  instance_->ReportMemoryPoolMallocInfoToMstx(device_addr, align_size);
+
+  LOG_OUT << "Allocate tensor mem, return : " << mem_buf->ToJson() << ", stat info : " << instance_->mem_stat().ToJson()
+          << ".";
+  return device_addr;
+}
+
+std::vector<DeviceMemPtr> DefaultEnhancedAscendMemoryPool::AllocContinuousTensorMem(
+  const std::vector<size_t> &size_list, uint32_t stream_id) {
+  LOG_OUT << "Alloc continuous tensor mem, stream id : " << stream_id << ".";
+  const auto &continuous_addrs = instance_->AllocContinuousTensorMem(size_list, stream_id);
+  if (continuous_addrs.size() != size_list.size()) {
+    return continuous_addrs;
+  }
+  if (continuous_addrs.size() == 1 && continuous_addrs[0] == nullptr) {
+    return continuous_addrs;
+  }
+  return continuous_addrs;
+}
+
+void DefaultEnhancedAscendMemoryPool::FreeTensorMem(const DeviceMemPtr &device_addr) {
+  LOG_OUT << "Free tensor mem, device addr : " << device_addr << ".";
+  LockGuard lock(instance_->lock());
+  DoFreeTensorMem(device_addr);
+}
+
+bool DefaultEnhancedAscendMemoryPool::DoFreeTensorMem(const DeviceMemPtr &device_addr) {
+  void *enhanced_device_addr = device_addr;
+  bool ret = instance_->DoFreeTensorMem(device_addr);
+  LOG_OUT << "Do free tensor mem : " << enhanced_device_addr << ", return : " << ret << ".";
+  return ret;
+}
+
+void DefaultEnhancedAscendMemoryPool::FreePartTensorMems(const std::vector<DeviceMemPtr> &free_addrs,
+                                                         const std::vector<DeviceMemPtr> &keep_addrs,
+                                                         const std::vector<size_t> &keep_addr_sizes) {
+  LOG_OUT << "Free part tensor mems.";
+  LockGuard lock(instance_->lock());
+
+  const auto keep_mem_bufs = instance_->DoFreePartTensorMems(free_addrs, keep_addrs, keep_addr_sizes);
+}
+
+void DefaultEnhancedAscendMemoryPool::DefragMemory() {
+  if (last_vmm_used_size_ == 0) {
+    last_vmm_used_size_ = GetVmmUsedMemSize();
+  } else {
+    size_t vmm_used_size = GetVmmUsedMemSize();
+    if (vmm_used_size > last_vmm_used_size_) {
+      LOG_OUT << "Current vmm used size : " << vmm_used_size
+              << " is bigger than last vmm used size : " << last_vmm_used_size_ << ".";
+      last_vmm_used_size_ = vmm_used_size;
+    }
+  }
+
+  instance_->DefragMemory();
+}
+
+void DefaultEnhancedAscendMemoryPool::DumpDynamicMemPoolStateInfo() { instance_->DumpDynamicMemPoolStateInfo(); }
+
+const std::pair<size_t, size_t> DefaultEnhancedAscendMemoryPool::FreeIdleMemsByEagerFree() {
+  const auto [eager_free_size, real_free_size] = instance_->FreeIdleMemsByEagerFree();
+  return {eager_free_size, real_free_size};
+}
+
+bool DefaultEnhancedAscendMemoryPool::WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id,
+                                                uint32_t memory_stream_id) {
+  LockGuard lock(instance_->lock());
+  auto key = std::make_pair(user_stream_id, memory_stream_id);
+  auto iter = instance_->stream_pair_mem_bufs().find(key);
+  if (iter == instance_->stream_pair_mem_bufs().end()) {
+    return false;
+  }
+
+  auto mem_bufs_ = iter->second;
+  for (const auto &mem_buf : mem_bufs_) {
+    LOG_OUT << "Wait event for : " << mem_buf->ToJson() << ".";
+    mem_buf->WaitEvent(task_id_on_stream, user_stream_id);
+    // Remove event and try to free memory.
+    if (mem_buf->IsEventNotUsed()) {
+      instance_->mem_stat().used_by_event_size_ -= mem_buf->size_;
+      // Force clear all mem bufs.
+      for (auto &stream_pair_mem_bufs : instance_->stream_pair_mem_bufs()) {
+        (void)stream_pair_mem_bufs.second.erase(mem_buf);
+      }
+      if (mem_buf->status_ == DynamicMemBufStatus::kMemBufUsedByEvent) {
+        (void)DoFreeTensorMem(mem_buf->addr_);
+      }
+    }
+  }
+  return true;
+}
+
+bool DefaultEnhancedAscendMemoryPool::WaitEvent(int64_t task_id_on_stream, uint32_t memory_stream_id) {
+  LockGuard lock(instance_->lock());
+  for (auto &stream_pair_mem_bufs : instance_->stream_pair_mem_bufs()) {
+    const auto &[user_stream, memory_stream] = stream_pair_mem_bufs.first;
+    if (memory_stream != memory_stream_id) {
+      continue;
+    }
+    auto mem_bufs = stream_pair_mem_bufs.second;
+    for (const auto &mem_buf : mem_bufs) {
+      LOG_OUT << "Wait event for : " << mem_buf->ToJson() << ".";
+      mem_buf->WaitEvent(task_id_on_stream, user_stream);
+      // Remove event and try to free memory.
+      if (mem_buf->IsEventNotUsed()) {
+        instance_->mem_stat().used_by_event_size_ -= mem_buf->size_;
+        // Force clear all mem bufs.
+        for (auto &kv : instance_->stream_pair_mem_bufs()) {
+          (void)kv.second.erase(mem_buf);
+        }
+        if (mem_buf->status_ == DynamicMemBufStatus::kMemBufUsedByEvent) {
+          (void)DoFreeTensorMem(mem_buf->addr_);
+        }
+      }
+    }
+  }
+  return true;
+}
+
+bool DefaultEnhancedAscendMemoryPool::SyncAllEvents() {
+  LockGuard lock(instance_->lock());
+  if (stream_pair_mem_bufs().empty()) {
+    return false;
+  }
+
+  std::set<MemBuf *> carry_event_mem_bufs;
+  for (const auto &stream_pair_mem_buf : instance_->stream_pair_mem_bufs()) {
+    for (const auto &mem_buf : stream_pair_mem_buf.second) {
+      (void)carry_event_mem_bufs.emplace(mem_buf);
+    }
+  }
+  for (auto &mem_buf : carry_event_mem_bufs) {
+    if (mem_buf->SyncAllEvents() && mem_buf->status_ == DynamicMemBufStatus::kMemBufUsedByEvent) {
+      (void)DoFreeTensorMem(mem_buf->addr_);
+    }
+  }
+
+  instance_->stream_pair_mem_bufs().clear();
+  return true;
+}
+
+void DefaultEnhancedAscendMemoryPool::SetRankIdGetter(const std::function<size_t()> &rank_id_getter) {
+  instance_->SetRankIdGetter(rank_id_getter);
+  if (rank_id_getter != nullptr) {
+    rank_id_getter_ = rank_id_getter;
+  }
+}
+
+BestFitAscendMemoryPool::BestFitAscendMemoryPool() {
+  LOG_OUT << "BestFitAscendMemoryPool constructed, older memory allocator is enabled.";
+  SetEnableVmm(AscendVmmAdapter::GetInstance().IsEnabled());
+}
+
+size_t BestFitAscendMemoryPool::EmptyCache() {
+  LOG_OUT << "Best fit memory pool is not supported empty cache.";
+  return 0L;
+}
+
+// Initialize static member in AscendMemoryPool.
+AbstractAscendMemoryPoolSupportPtr AscendMemoryPool::pool_ = nullptr;
+
+AbstractAscendMemoryPoolSupportPtr AscendMemoryPool::instance_ = nullptr;
+
+AbstractAscendMemoryPoolSupportPtr AscendMemoryPool::enhanced_instance_ = nullptr;
+
+AbstractAscendMemoryPoolSupport &AscendMemoryPool::GetInstance() {
+  static std::once_flag flag;
+  std::call_once(flag, [&]() {
+    if (UseOldMemoryPool()) {
+      instance_ = std::make_shared<BestFitAscendMemoryPool>();
+      enhanced_instance_ = instance_;
+    } else {
+      const auto &memory_pool = std::make_shared<DefaultAscendMemoryPool>();
+      instance_ = memory_pool;
+      enhanced_instance_ = std::make_shared<DefaultEnhancedAscendMemoryPool>(memory_pool);
+    }
+    // Initialize instance and set ptr.
+    float init_size = kDefaultMemInitSize;
+    size_t init_size_byte = FloatToSize(init_size * kGBToByte);
+    float increase_size = kDefaultMemBlockIncreaseSize;
+    size_t increase_size_byte = FloatToSize(increase_size * kGBToByte);
+    float max_size = kDefaultMemMaxSize;
+    size_t max_size_byte = FloatToSize(max_size * kGBToByte);
+    instance_->Initialize(init_size_byte, increase_size_byte, max_size_byte);
+    // Set memory mstx callback func.
+    if (!UseEnhancedMemoryPool()) {
+      pool_ = instance_;
+    } else {
+      pool_ = enhanced_instance_;
+    }
+  });
+  return *pool_;
+}
+
+void AscendMemoryPool::SetEnhancedMemoryPool(bool enable) {
+  LOG_OUT << "Set enhanced memory pool : " << enable << ".";
+  if (enable) {
+    pool_ = enhanced_instance_;
+  } else {
+    pool_ = instance_;
+  }
+}
+
+bool AscendMemoryPool::UseOldMemoryPool() {
+  return false;
+  // if (memory::mem_pool::IsDisableAllocConfig(memory::mem_pool::kAllocMemoryPool)) {
+  //   return false;
+  // }
+  // return IsDisableGeKernel() || memory::mem_pool::IsEnableAllocConfig(memory::mem_pool::kAllocMemoryPool);
+}
+
+// Use enhanced memory pool when enable debug, enable log, enable prof, dry run and so on.
+bool AscendMemoryPool::UseEnhancedMemoryPool() { return false; }
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_pool.h b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_pool.h
new file mode 100644
index 0000000000000000000000000000000000000000..cbe3b44500e9e54d26faa1bb983dbc03720c1b0c
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_memory_pool.h
@@ -0,0 +1,300 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "hardware/hardware_abstract/memory/abstract_dynamic_mem_pool.h"
+#include "hardware/hardware_abstract/visible.h"
+#include "hardware/ascend/res_manager/mem_manager/abstract_ascend_memory_pool_support.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+
+class HARDWARE_EXPORT DefaultAscendMemoryPool : public AbstractAscendMemoryPoolSupport,
+                                                public AbstractEnhancedDynamicMemPool {
+ public:
+  DefaultAscendMemoryPool();
+  DefaultAscendMemoryPool(const DefaultAscendMemoryPool &) = delete;
+  DefaultAscendMemoryPool &operator=(const DefaultAscendMemoryPool &) = delete;
+  ~DefaultAscendMemoryPool() override = default;
+
+  std::string GetMemoryPoolType() const override { return "DefaultAscendMemoryPool"; }
+
+  void SetMemPoolBlockSize(size_t available_device_mem_size) override {
+    return AbstractAscendMemoryPoolSupport::SetMemPoolBlockSize(available_device_mem_size);
+  }
+
+  size_t CalMemBlockAllocSize(size_t size, bool from_persistent_mem, bool need_recycle = false) override {
+    return AbstractAscendMemoryPoolSupport::CalMemBlockAllocSize(size, from_persistent_mem, need_recycle);
+  }
+
+  const bool IsEnableEagerFree() const override { return AbstractAscendMemoryPoolSupport::IsEnableEagerFree(); }
+
+  size_t EmptyCache() override;
+
+  void EnablePluggableAllocator(std::function<MallocFuncType> alloc_fn, std::function<FreeFuncType> free_fn) override;
+  void DisablePluggableAllocator() override;
+};
+using DefaultAscendMemoryPoolPtr = std::shared_ptr<DefaultAscendMemoryPool>;
+
+class HARDWARE_EXPORT DefaultEnhancedAscendMemoryPool : public DefaultAscendMemoryPool {
+ public:
+  explicit DefaultEnhancedAscendMemoryPool(const DefaultAscendMemoryPoolPtr &instance);
+  DefaultEnhancedAscendMemoryPool(const DefaultEnhancedAscendMemoryPool &) = delete;
+  DefaultEnhancedAscendMemoryPool &operator=(const DefaultEnhancedAscendMemoryPool &) = delete;
+  ~DefaultEnhancedAscendMemoryPool() override = default;
+
+  // Wrap enhanced function.
+  void Initialize(size_t init_size, size_t increase_size, size_t max_size) override {
+    instance_->Initialize(init_size, increase_size, max_size);
+  }
+
+  void ReleaseDeviceRes() override;
+
+  DeviceMemPtr AllocTensorMem(size_t size, bool from_persistent_mem = false, bool need_recycle = false,
+                              uint32_t stream_id = kDefaultStreamIndex) override;
+
+  std::vector<DeviceMemPtr> AllocContinuousTensorMem(const std::vector<size_t> &size_list,
+                                                     uint32_t stream_id = kDefaultStreamIndex) override;
+
+  void FreeTensorMem(const DeviceMemPtr &device_addr) override;
+
+  bool DoFreeTensorMem(const DeviceMemPtr &device_addr) override;
+
+  void FreePartTensorMems(const std::vector<DeviceMemPtr> &free_addrs, const std::vector<DeviceMemPtr> &keep_addrs,
+                          const std::vector<size_t> &keep_addr_sizes) override;
+
+  std::vector<MemBuf *> DoFreePartTensorMems(const std::vector<DeviceMemPtr> &free_addrs,
+                                             const std::vector<DeviceMemPtr> &keep_addrs,
+                                             const std::vector<size_t> &keep_addr_sizes) override {
+    return instance_->DoFreePartTensorMems(free_addrs, keep_addrs, keep_addr_sizes);
+  }
+
+  void DefragMemory() override;
+
+  void DumpDynamicMemPoolStateInfo() override;
+
+  const std::pair<size_t, size_t> FreeIdleMemsByEagerFree() override;
+
+  size_t ReleaseFreeBlocks() override { return instance_->ReleaseFreeBlocks(); }
+
+  // Proxy wrapper for AbstractAscendMemoryPoolSupport
+  void ResetIdleMemBuf() const override { instance_->ResetIdleMemBuf(); }
+
+  bool RecordEvent(int64_t task_id_on_stream, uint32_t user_stream_id,
+                   const std::vector<std::pair<uint32_t, DeviceMemPtr>> &memory_stream_addresses,
+                   const DeviceEventPtr &event) override {
+    return instance_->RecordEvent(task_id_on_stream, user_stream_id, memory_stream_addresses, event);
+  }
+
+  bool WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id) override;
+
+  bool WaitEvent(int64_t task_id_on_stream, uint32_t memory_stream_id) override;
+
+  bool SyncAllEvents() override;
+
+  void EnablePluggableAllocator(std::function<MallocFuncType> alloc_fn, std::function<FreeFuncType> free_fn) override {
+    return instance_->EnablePluggableAllocator(alloc_fn, free_fn);
+  }
+
+  void DisablePluggableAllocator() override { return instance_->DisablePluggableAllocator(); }
+
+  size_t AlignMemorySize(size_t size) const override { return instance_->AlignMemorySize(size); }
+
+  size_t CalMemBlockAllocSize(size_t size, bool from_persistent_mem, bool need_recycle = false) override {
+    return instance_->CalMemBlockAllocSize(size, from_persistent_mem, need_recycle);
+  }
+
+  void SetMemPoolBlockSize(size_t available_device_mem_size) override {
+    instance_->SetMemPoolBlockSize(available_device_mem_size);
+  }
+
+  size_t MemAllocUnitSize(bool from_persistent_mem) const override {
+    return instance_->MemAllocUnitSize(from_persistent_mem);
+  }
+
+  void SetMemAllocUintSize(size_t common_size, size_t persist_size = kDynamicMemAllocUnitSize) override {
+    instance_->SetMemAllocUintSize(common_size, persist_size);
+  }
+
+  void *GetMinUsingMemoryAddr() const override { return instance_->GetMinUsingMemoryAddr(); }
+
+  size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override { return instance_->AllocDeviceMem(size, addr); }
+
+  bool FreeDeviceMem(const DeviceMemPtr &addr) override { return instance_->FreeDeviceMem(addr); }
+
+  size_t free_mem_size() override { return instance_->free_mem_size(); }
+
+  uint64_t total_mem_size() const override { return instance_->total_mem_size(); }
+
+  size_t GetMaxUsedMemSize() const override { return instance_->GetMaxUsedMemSize(); }
+
+  size_t GetVmmUsedMemSize() const override { return instance_->GetVmmUsedMemSize(); }
+
+  void DumpDynamicMemPoolDebugInfo() override { instance_->DumpDynamicMemPoolDebugInfo(); }
+
+  size_t TotalMemStatistics() const override { return instance_->TotalMemStatistics(); }
+
+  size_t TotalUsedMemStatistics() const override { return instance_->TotalUsedMemStatistics(); }
+
+  size_t TotalUsedByEventMemStatistics() const override { return instance_->TotalUsedByEventMemStatistics(); }
+
+  size_t TotalIdleMemStatistics() const override { return instance_->TotalIdleMemStatistics(); }
+
+  size_t TotalEagerFreeMemStatistics() const override { return instance_->TotalEagerFreeMemStatistics(); }
+
+  size_t UsedMemPeakStatistics() const override { return instance_->UsedMemPeakStatistics(); }
+
+  size_t MaxMemAllocatedStatistics() const override { return instance_->MaxMemAllocatedStatistics(); }
+
+  size_t MaxMemReservedStatistics() const override { return instance_->MaxMemReservedStatistics(); }
+
+  size_t ActualPeakStatistics() const override { return instance_->ActualPeakStatistics(); }
+
+  std::unordered_map<std::string, std::size_t> BlockCountsStatistics() const override {
+    return std::move(instance_->BlockCountsStatistics());
+  }
+
+  std::unordered_map<std::string, std::size_t> BlockUnitSizeStatistics() const override {
+    return std::move(instance_->BlockUnitSizeStatistics());
+  }
+
+  std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>> CommonMemBlocksInfoStatistics()
+    const override {
+    return std::move(instance_->CommonMemBlocksInfoStatistics());
+  }
+
+  std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>> PersistentMemBlocksInfoStatistics()
+    const override {
+    return std::move(instance_->PersistentMemBlocksInfoStatistics());
+  }
+
+  void ResetMaxMemReserved() override { instance_->ResetMaxMemReserved(); }
+
+  void ResetMaxMemAllocated() override { instance_->ResetMaxMemAllocated(); }
+
+  const bool IsEnableEagerFree() const override { return instance_->IsEnableEagerFree(); }
+
+  const bool IsEnableVmm() const override { return instance_->IsEnableVmm(); }
+
+  void SetEnableVmm(bool enable_vmm) override { instance_->SetEnableVmm(enable_vmm); }
+
+  const bool SyncAllStreams() override { return instance_->SyncAllStreams(); }
+
+  size_t AllocDeviceMemByEagerFree(size_t size, DeviceMemPtr *addr) override {
+    return instance_->AllocDeviceMemByEagerFree(size, addr);
+  }
+
+  size_t FreeDeviceMemByEagerFree(const DeviceMemPtr addr, const size_t size) override {
+    return instance_->FreeDeviceMemByEagerFree(addr, size);
+  }
+
+  size_t MmapDeviceMem(size_t size, DeviceMemPtr addr) override { return instance_->MmapDeviceMem(size, addr); }
+
+  std::string GetMemoryPoolType() const override { return "DefaultEnhancedAscendMemoryPool"; }
+
+  void ReportMemoryPoolInfo() override { instance_->ReportMemoryPoolInfo(); }
+
+  void ReportMemoryPoolMallocInfoToMstx(void *ptr, size_t size) override {
+    instance_->ReportMemoryPoolMallocInfoToMstx(ptr, size);
+  }
+
+  void ReportMemoryPoolFreeInfoToMstx(void *ptr) override { instance_->ReportMemoryPoolFreeInfoToMstx(ptr); }
+
+  bool IsEnableTimeEvent() override { return instance_->IsEnableTimeEvent(); }
+
+  void SetEnableTimeEvent(bool enable_time_event) override { instance_->SetEnableTimeEvent(enable_time_event); }
+
+  MemoryTimeEventPtr GenAllocateMemoryTimeEvent(const void *addr, size_t size, uint32_t stream_id, bool from_persistent,
+                                                bool is_persistent) override {
+    return instance_->GenAllocateMemoryTimeEvent(addr, size, stream_id, from_persistent, is_persistent);
+  }
+
+  MemoryTimeEventPtr GenFreeMemoryTimeEvent(const void *addr) override {
+    return instance_->GenFreeMemoryTimeEvent(addr);
+  }
+
+  size_t EmptyCache() override { return instance_->EmptyCache(); }
+
+ protected:
+  void SetRankIdGetter(const std::function<size_t()> &rank_id_getter) override;
+
+ private:
+  DefaultAscendMemoryPoolPtr instance_;
+  size_t last_vmm_used_size_{0};
+};
+
+class HARDWARE_EXPORT BestFitAscendMemoryPool : public AbstractAscendMemoryPoolSupport {
+ public:
+  BestFitAscendMemoryPool();
+  BestFitAscendMemoryPool(const BestFitAscendMemoryPool &) = delete;
+  BestFitAscendMemoryPool &operator=(const BestFitAscendMemoryPool &) = delete;
+  ~BestFitAscendMemoryPool() override = default;
+
+  void SetMemPoolBlockSize(size_t available_device_mem_size) override {
+    return AbstractAscendMemoryPoolSupport::SetMemPoolBlockSize(available_device_mem_size);
+  }
+
+  size_t CalMemBlockAllocSize(size_t size, bool from_persistent_mem, bool need_recycle = false) override {
+    return AbstractAscendMemoryPoolSupport::CalMemBlockAllocSize(size, from_persistent_mem, need_recycle);
+  }
+
+  const bool IsEnableEagerFree() const override { return AbstractAscendMemoryPoolSupport::IsEnableEagerFree(); }
+
+  std::string GetMemoryPoolType() const override { return "BestFitAscendMemoryPool"; }
+
+  size_t EmptyCache() override;
+};
+
+class HARDWARE_EXPORT AscendMemoryPool {
+ public:
+  AscendMemoryPool(const AscendMemoryPool &) = delete;
+  AscendMemoryPool &operator=(const AscendMemoryPool &) = delete;
+
+  static AbstractAscendMemoryPoolSupport &GetInstance();
+
+  static void SetEnhancedMemoryPool(bool enable);
+
+ private:
+  AscendMemoryPool() {}
+
+  static bool UseOldMemoryPool();
+
+  // Use enhanced memory pool when enable debug, enable log, enable prof, dry run and so on.
+  static bool UseEnhancedMemoryPool();
+
+  // Reference to memory pool.
+  static AbstractAscendMemoryPoolSupportPtr pool_;
+
+  // Basic memory pool instance with high performance.
+  static AbstractAscendMemoryPoolSupportPtr instance_;
+
+  // Memory pool support profiling and debugging.
+  static AbstractAscendMemoryPoolSupportPtr enhanced_instance_;
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_vmm_adapter.cc b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_vmm_adapter.cc
new file mode 100644
index 0000000000000000000000000000000000000000..104429256b574af256d4a608b69a279ab4217d0e
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_vmm_adapter.cc
@@ -0,0 +1,272 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hardware/ascend/res_manager/mem_manager/ascend_vmm_adapter.h"
+#include <map>
+#include <vector>
+#include <tuple>
+
+#include "hardware/ascend/res_manager/symbol_interface/symbol_utils.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h"
+#include "common/common.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+size_t AscendVmmAdapter::GetRoundUpAlignSize(size_t input_size) const {
+  return ((input_size + vmm_align_size_ - 1) / vmm_align_size_) * vmm_align_size_;
+}
+
+size_t AscendVmmAdapter::GetRoundDownAlignSize(size_t input_size) const {
+  return (input_size / vmm_align_size_) * vmm_align_size_;
+}
+
+size_t AscendVmmAdapter::GetHandleSize(size_t input_size) {
+  if (input_size % vmm_align_size_ != 0) {
+    LOG_ERROR << "Input size must be multiple of 2MB, but got " << input_size;
+  }
+  return input_size / vmm_align_size_;
+}
+
+DeviceMemPtr AscendVmmAdapter::FindVmmSegment(const DeviceMemPtr addr) {
+  auto it = vmm_map_.upper_bound(addr);
+  if (it == vmm_map_.begin()) {
+    return nullptr;
+  } else {
+    --it;
+    return it->first;
+  }
+  return nullptr;
+}
+
+void AscendVmmAdapter::ClearAllMemory() {
+  for (auto &kv : vmm_map_) {
+    if (kv.second == nullptr) {
+      continue;
+    }
+    auto ret = CALL_ASCEND_API(aclrtUnmapMem, kv.first);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "Unmap memory failed.";
+    }
+    ret = CALL_ASCEND_API(aclrtFreePhysical, kv.second);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "Free physical memory failed.";
+    }
+  }
+  while (!cached_handle_sets_.empty()) {
+    auto handle = *cached_handle_sets_.begin();
+    cached_handle_sets_.erase(cached_handle_sets_.begin());
+    auto ret = CALL_ASCEND_API(aclrtFreePhysical, handle);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "Free physical memory failed.";
+    }
+  }
+  for (auto &addr : all_reserve_mems_) {
+    CALL_ASCEND_API(aclrtReleaseMemAddress, addr);
+  }
+  all_reserve_mems_.clear();
+  vmm_map_.clear();
+}
+
+namespace {
+void MoveBackMappedHandle(std::map<DeviceMemPtr, aclrtDrvMemHandle> *mapped_vmm_handle,
+                          std::map<DeviceMemPtr, aclrtDrvMemHandle> *vmm_map,
+                          std::set<aclrtDrvMemHandle> *cached_handle_sets_) {
+  for (const auto [address, handle] : *mapped_vmm_handle) {
+    auto ret = CALL_ASCEND_API(aclrtUnmapMem, address);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "Unmap memory failed, address : " << address << ".";
+    } else {
+      auto iter = vmm_map->find(address);
+      if (iter == vmm_map->end()) {
+        LOG_ERROR << "Find vmm map address : " << address << " failed.";
+      } else {
+        iter->second = nullptr;
+        cached_handle_sets_->insert(handle);
+      }
+    }
+  }
+}
+};  // namespace
+
+size_t AscendVmmAdapter::MmapDeviceMem(const size_t size, const DeviceMemPtr addr, const size_t max_size) {
+  CHECK_IF_NULL(addr);
+  LOG_OUT << "VMM MmapDeviceMem size:" << size << ", addr:" << addr
+          << ", cached_handle_sets_ size : " << cached_handle_sets_.size() << ".";
+  // use 0 temporarily
+  auto device_id = 0;
+
+  auto vmm_start_addr = FindVmmSegment(addr);
+  if (vmm_start_addr == nullptr) {
+    LOG_ERROR << "Can not find the vmm segment.";
+    return 0;
+  }
+  aclrtPhysicalMemProp prop = {};
+  prop.handleType = ACL_MEM_HANDLE_TYPE_NONE;
+  prop.allocationType = ACL_MEM_ALLOCATION_TYPE_PINNED;
+  prop.memAttr = ACL_HBM_MEM_HUGE;
+  prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE;
+  prop.location.id = device_id;
+  prop.reserve = 0;
+  auto start_offset = CalAddressOffset(addr, vmm_start_addr);
+  auto align_size = GetRoundUpAlignSize(size + start_offset);
+  auto handle_size = GetHandleSize(align_size);
+  auto iter = vmm_map_.find(vmm_start_addr);
+
+  std::map<DeviceMemPtr, aclrtDrvMemHandle> mapped_vmm_handle;
+  for (size_t i = 0; i < handle_size; ++i) {
+    auto new_addr = AddressOffset(vmm_start_addr, i * vmm_align_size_);
+    if (iter == vmm_map_.end() || iter->first != new_addr) {
+      LOG_ERROR << "Can not find the vmm segment.";
+      return 0;
+    }
+    if (iter->second != nullptr) {
+      iter++;
+      continue;
+    }
+    aclrtDrvMemHandle handle = nullptr;
+    if (!cached_handle_sets_.empty()) {
+      handle = *cached_handle_sets_.begin();
+      cached_handle_sets_.erase(cached_handle_sets_.begin());
+    } else {
+      if (physical_handle_size_ * vmm_align_size_ >= max_size) {
+        LOG_OUT << "Mapped too much memory, physical_handle_size_ : " << physical_handle_size_
+                << ", max_size : " << max_size << ", addr : " << addr << ", size : " << size << ".";
+        MoveBackMappedHandle(&mapped_vmm_handle, &vmm_map_, &cached_handle_sets_);
+        return 0;
+      }
+
+      auto ret = CALL_ASCEND_API(aclrtMallocPhysical, &handle, vmm_align_size_, &prop, 0);
+      if (ret != ACL_SUCCESS) {
+        size_t used_handle_size = 0;
+        for (const auto &[k, v] : vmm_map_) {
+          if (v != nullptr) {
+            LOG_OUT << "Inuse handle address : " << k << ", handle : " << v << ".";
+            used_handle_size += 1;
+          }
+        }
+        used_handle_size += cached_handle_sets_.size();
+        // This may be a normal case at the memory usage boundary.
+        LOG_OUT << "Malloc physical memory failed, inuse physical memory handle size : " << used_handle_size
+                << ", physical_handle_size_ size : " << physical_handle_size_ << ".";
+        MoveBackMappedHandle(&mapped_vmm_handle, &vmm_map_, &cached_handle_sets_);
+        return 0;
+      } else {
+        physical_handle_size_++;
+        if (physical_handle_size_ * vmm_align_size_ >= max_size) {
+          LOG_OUT << "Mapped too much memory, physical_handle_size_ : " << physical_handle_size_
+                  << ", max_size : " << max_size << ".";
+        }
+      }
+    }
+
+    auto ret = CALL_ASCEND_API(aclrtMapMem, new_addr, vmm_align_size_, 0, handle, 0);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "Map memory failed.";
+      cached_handle_sets_.insert(handle);
+      MoveBackMappedHandle(&mapped_vmm_handle, &vmm_map_, &cached_handle_sets_);
+      return 0;
+    }
+    mapped_vmm_handle[iter->first] = handle;
+    iter->second = handle;
+    iter++;
+  }
+
+  return size;
+}
+
+size_t AscendVmmAdapter::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
+  CHECK_IF_NULL(addr);
+  size_t align_size = GetRoundUpAlignSize(size);
+  LOG_OUT << "VMM AllocDeviceMem size:" << size << ", align_size:" << align_size;
+  auto ret = CALL_ASCEND_API(aclrtReserveMemAddress, addr, align_size, 0, nullptr, 1);
+  if (ret != ACL_SUCCESS) {
+    LOG_ERROR << "Reserve memory address failed.";
+    return 0;
+  }
+  all_reserve_mems_.push_back(*addr);
+  auto handle_size = GetHandleSize(align_size);
+  for (size_t i = 0; i < handle_size; i++) {
+    auto new_addr = AddressOffset(*addr, i * vmm_align_size_);
+    vmm_map_[new_addr] = nullptr;
+  }
+  return align_size;
+}
+
+size_t AscendVmmAdapter::EagerFreeDeviceMem(const DeviceMemPtr addr, const size_t size) {
+  CHECK_IF_NULL(addr);
+  LOG_OUT << "Eager free device mem addr :" << addr << ", size :" << size
+          << ", cached_handle_sets_ size : " << cached_handle_sets_.size() << ".";
+  size_t ret_size = 0;
+  auto iter = vmm_map_.lower_bound(addr);
+  if (iter == vmm_map_.end()) {
+    // Memory less than 2MB may be at the end of a vmm segment, and it's a normal case.
+    if (size >= vmm_align_size_) {
+      LOG_ERROR << "Can not find the vmm segment.";
+    }
+    return 0;
+  }
+  auto vmm_start_addr = iter->first;
+  auto free_end_addr = AddressOffset(addr, size);
+  while (true) {
+    auto vmm_end_addr = AddressOffset(vmm_start_addr, vmm_align_size_);
+    if (vmm_end_addr > free_end_addr) {
+      break;
+    }
+    if (iter == vmm_map_.end() || iter->first != vmm_start_addr) {
+      LOG_ERROR << "Can not find the vmm segment.";
+      return 0;
+    }
+    if (iter->second == nullptr) {
+      iter++;
+      vmm_start_addr = vmm_end_addr;
+      // Here nullptr may be huge, skip do logging.
+      continue;
+    }
+    auto ret = CALL_ASCEND_API(aclrtUnmapMem, vmm_start_addr);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "Unmap memory failed.";
+      return 0;
+    }
+    cached_handle_sets_.insert(iter->second);
+    iter->second = nullptr;
+    iter++;
+    vmm_start_addr = vmm_end_addr;
+    ret_size += vmm_align_size_;
+  }
+  LOG_OUT << "After eager free, cached_handle_sets_ size : " << cached_handle_sets_.size()
+          << ", expected free size : " << size << ", real size : " << ret_size << ".";
+  return ret_size;
+}
+
+size_t AscendVmmAdapter::EmptyCache() {
+  size_t empty_size = 0L;
+  for (auto iter = cached_handle_sets_.begin(); iter != cached_handle_sets_.end(); iter++) {
+    auto ret = CALL_ASCEND_API(aclrtFreePhysical, *iter);
+    if (ret != ACL_SUCCESS) {
+      LOG_ERROR << "Free physical memory failed.";
+    }
+  }
+
+  size_t cache_handle_size = cached_handle_sets_.size();
+  physical_handle_size_ -= cache_handle_size;
+  empty_size += cache_handle_size * vmm_align_size_;
+  cached_handle_sets_.clear();
+  LOG_OUT << "Empty cache size: " << empty_size << ", cached handle set size: " << cached_handle_sets_.size() << ".";
+  return empty_size;
+}
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_vmm_adapter.h b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_vmm_adapter.h
new file mode 100644
index 0000000000000000000000000000000000000000..4a32f3818d08eb2fe92d8fcd4355f56a02968c33
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/mem_manager/ascend_vmm_adapter.h
@@ -0,0 +1,167 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_VMM_ADAPTER_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_VMM_ADAPTER_H_
+
+#include <atomic>
+#include <memory>
+#include <map>
+#include <vector>
+#include <set>
+#include <string>
+#include <fstream>
+#include <iostream>
+
+#include "acl/acl.h"
+#include "hardware/hardware_abstract/dlopen_macro.h"
+#include "common/common.h"
+
+#include "hardware/hardware_abstract/visible.h"
+
+namespace mindspore {
+namespace device {
+namespace ascend {
+using DeviceMemPtr = void(*);
+class HARDWARE_EXPORT AscendVmmAdapter {
+ public:
+  static AscendVmmAdapter &GetInstance() {
+    static AscendVmmAdapter instance{};
+    return instance;
+  }
+
+  AscendVmmAdapter() {
+    vmm_align_size_ = kDefaultAlignSize;
+
+    LOG_OUT << "VMM align size is " << vmm_align_size_;
+  }
+  ~AscendVmmAdapter() = default;
+
+ public:
+  size_t GetRoundUpAlignSize(size_t input_size) const;
+  size_t GetRoundDownAlignSize(size_t input_size) const;
+
+  void ClearAllMemory();
+  size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr);
+  size_t MmapDeviceMem(const size_t size, const DeviceMemPtr addr, const size_t max_size);
+  size_t EagerFreeDeviceMem(const DeviceMemPtr addr, const size_t size);
+  size_t GetAllocatedSize() { return physical_handle_size_ * vmm_align_size_; }
+
+  size_t EmptyCache();
+
+  static const bool IsEnabled() {
+    static bool is_enable_vmm = IsVmmEnabled();
+    return is_enable_vmm;
+  }
+
+ private:
+  static const bool IsVmmEnabled() {
+    if (!CheckVmmDriverVersion()) {
+      return false;
+    }
+
+    LOG_OUT << "VMM is enabled.";
+    return true;
+  }
+
+ private:
+  uint64_t vmm_align_size_;
+  DeviceMemPtr FindVmmSegment(const DeviceMemPtr addr);
+  size_t GetHandleSize(size_t input_size);
+  std::atomic<size_t> physical_handle_size_{0};
+  std::map<DeviceMemPtr, aclrtDrvMemHandle> vmm_map_;
+  std::vector<DeviceMemPtr> all_reserve_mems_;
+  std::set<aclrtDrvMemHandle> cached_handle_sets_;
+  static constexpr uint64_t kMB = 1024 * 1024;
+  static constexpr uint64_t kDefaultAlignSize = 2 * kMB;
+  static int StringToMB(const std::string &str) {
+    std::stringstream ss(str);
+    int num;
+    std::string unit;
+    if (!(ss >> num)) {
+      LOG_ERROR << "No valid number could be extracted from the string, " << str;
+    }
+    if (!(ss >> unit) || unit != "MB") {
+      LOG_ERROR << "The unit of the string is not MB, " << str;
+    }
+    if (ss.rdbuf()->in_avail() > 0) {
+      LOG_ERROR << "The string has extra characters, " << str;
+    }
+    return num;
+  }
+  static bool CheckVmmDriverVersion() {
+    // Get driver version
+    constexpr auto ascend_install_info = "/etc/ascend_install.info";
+    const std::string DRIVER_INSTALL_PATH_PARAM = "Driver_Install_Path_Param=";
+    std::string driver_path = "/usr/local/Ascend";
+
+    std::ifstream ascend_install_file(ascend_install_info);
+    if (!ascend_install_file.is_open()) {
+      LOG_OUT << "Open file " << ascend_install_info << " failed.";
+    } else {
+      std::string line;
+      while (std::getline(ascend_install_file, line)) {
+        size_t pos = line.find(DRIVER_INSTALL_PATH_PARAM);
+        if (pos != std::string::npos) {
+          // Extract the path after "Driver_Install_Path_Param="
+          driver_path = line.substr(pos + DRIVER_INSTALL_PATH_PARAM.length());
+          LOG_OUT << "Driver path is " << driver_path;
+          break;
+        }
+      }
+    }
+
+    auto splitString = [](const std::string &str, char delimiter) -> std::vector<std::string> {
+      std::vector<std::string> tokens;
+      std::string token;
+      std::istringstream tokenStream(str);
+      while (std::getline(tokenStream, token, delimiter)) {
+        tokens.push_back(token);
+      }
+      return tokens;
+    };
+
+    auto driver_version_info = driver_path + "/driver/version.info";
+    const std::string DRIVER_VERSION_PARAM = "Version=";
+    std::ifstream driver_version_file(driver_version_info);
+    if (!driver_version_file.is_open()) {
+      LOG_OUT << "Open file " << driver_version_info << " failed.";
+    } else {
+      std::string line;
+      while (std::getline(driver_version_file, line)) {
+        size_t pos = line.find(DRIVER_VERSION_PARAM);
+        if (pos != std::string::npos) {
+          // Extract the version after "Version="
+          std::string driver_version = line.substr(pos + DRIVER_VERSION_PARAM.length());
+          auto split_version = splitString(driver_version, '.');
+          LOG_OUT << "Driver version is " << driver_version << ", major version is " << split_version[0];
+          if (split_version[0] < "24") {
+            LOG_OUT << "Driver version is less than 24.0.0, vmm is disabled by default, drvier_version: "
+                    << driver_version;
+            return false;
+          }
+          break;
+        }
+      }
+    }
+    return true;
+  }
+};
+}  // namespace ascend
+}  // namespace device
+}  // namespace mindspore
+
+#endif
diff --git a/inferrt/src/hardware/ascend/res_manager/CMakeLists.txt b/inferrt/src/hardware/ascend/res_manager/symbol_interface/CMakeLists.txt
similarity index 100%
rename from inferrt/src/hardware/ascend/res_manager/CMakeLists.txt
rename to inferrt/src/hardware/ascend/res_manager/symbol_interface/CMakeLists.txt
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_base_symbol.cc b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_base_symbol.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d1888495c4f855509ff9c0c8a5fa6a5cbc27c96d
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_base_symbol.cc
@@ -0,0 +1,85 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "acl_base_symbol.h"
+#include <string>
+#include "symbol_utils.h"
+
+namespace mindspore::device::ascend {
+aclCreateDataBufferFunObj aclCreateDataBuffer_ = nullptr;
+aclCreateTensorDescFunObj aclCreateTensorDesc_ = nullptr;
+aclDataTypeSizeFunObj aclDataTypeSize_ = nullptr;
+aclDestroyDataBufferFunObj aclDestroyDataBuffer_ = nullptr;
+aclDestroyTensorDescFunObj aclDestroyTensorDesc_ = nullptr;
+aclGetTensorDescDimV2FunObj aclGetTensorDescDimV2_ = nullptr;
+aclGetTensorDescNumDimsFunObj aclGetTensorDescNumDims_ = nullptr;
+aclSetTensorConstFunObj aclSetTensorConst_ = nullptr;
+aclSetTensorDescNameFunObj aclSetTensorDescName_ = nullptr;
+aclSetTensorFormatFunObj aclSetTensorFormat_ = nullptr;
+aclSetTensorPlaceMentFunObj aclSetTensorPlaceMent_ = nullptr;
+aclSetTensorShapeFunObj aclSetTensorShape_ = nullptr;
+aclrtGetSocNameFunObj aclrtGetSocName_ = nullptr;
+aclUpdateDataBufferFunObj aclUpdateDataBuffer_ = nullptr;
+aclGetDataBufferAddrFunObj aclGetDataBufferAddr_ = nullptr;
+aclGetTensorDescSizeFunObj aclGetTensorDescSize_ = nullptr;
+aclGetRecentErrMsgFunObj aclGetRecentErrMsg_ = nullptr;
+
+void LoadAclBaseApiSymbol(const std::string &ascend_path) {
+  std::string aclbase_plugin_path = "lib64/libascendcl.so";
+  auto base_handler = GetLibHandler(ascend_path + aclbase_plugin_path);
+  if (base_handler == nullptr) {
+    LOG_OUT << "Dlopen " << aclbase_plugin_path << " failed!" << dlerror();
+    return;
+  }
+  aclCreateDataBuffer_ = DlsymAscendFuncObj(aclCreateDataBuffer, base_handler);
+  aclCreateTensorDesc_ = DlsymAscendFuncObj(aclCreateTensorDesc, base_handler);
+  aclDataTypeSize_ = DlsymAscendFuncObj(aclDataTypeSize, base_handler);
+  aclDestroyDataBuffer_ = DlsymAscendFuncObj(aclDestroyDataBuffer, base_handler);
+  aclDestroyTensorDesc_ = DlsymAscendFuncObj(aclDestroyTensorDesc, base_handler);
+  aclGetTensorDescDimV2_ = DlsymAscendFuncObj(aclGetTensorDescDimV2, base_handler);
+  aclGetTensorDescNumDims_ = DlsymAscendFuncObj(aclGetTensorDescNumDims, base_handler);
+  aclSetTensorConst_ = DlsymAscendFuncObj(aclSetTensorConst, base_handler);
+  aclSetTensorDescName_ = DlsymAscendFuncObj(aclSetTensorDescName, base_handler);
+  aclSetTensorFormat_ = DlsymAscendFuncObj(aclSetTensorFormat, base_handler);
+  aclSetTensorPlaceMent_ = DlsymAscendFuncObj(aclSetTensorPlaceMent, base_handler);
+  aclSetTensorShape_ = DlsymAscendFuncObj(aclSetTensorShape, base_handler);
+  aclrtGetSocName_ = DlsymAscendFuncObj(aclrtGetSocName, base_handler);
+  aclUpdateDataBuffer_ = DlsymAscendFuncObj(aclUpdateDataBuffer, base_handler);
+  aclGetDataBufferAddr_ = DlsymAscendFuncObj(aclGetDataBufferAddr, base_handler);
+  aclGetTensorDescSize_ = DlsymAscendFuncObj(aclGetTensorDescSize, base_handler);
+  aclGetRecentErrMsg_ = DlsymAscendFuncObj(aclGetRecentErrMsg, base_handler);
+  LOG_OUT << "Load acl base api success!";
+}
+
+void LoadSimulationAclBaseApi() {
+  ASSIGN_SIMU(aclCreateDataBuffer);
+  ASSIGN_SIMU(aclCreateTensorDesc);
+  ASSIGN_SIMU(aclDataTypeSize);
+  ASSIGN_SIMU(aclDestroyDataBuffer);
+  ASSIGN_SIMU(aclDestroyTensorDesc);
+  ASSIGN_SIMU(aclGetTensorDescDimV2);
+  ASSIGN_SIMU(aclGetTensorDescNumDims);
+  ASSIGN_SIMU(aclSetTensorConst);
+  ASSIGN_SIMU(aclSetTensorDescName);
+  ASSIGN_SIMU(aclSetTensorFormat);
+  ASSIGN_SIMU(aclSetTensorPlaceMent);
+  ASSIGN_SIMU(aclSetTensorShape);
+  ASSIGN_SIMU(aclUpdateDataBuffer);
+  ASSIGN_SIMU(aclrtGetSocName);
+  ASSIGN_SIMU(aclGetDataBufferAddr);
+  ASSIGN_SIMU(aclGetTensorDescSize);
+  ASSIGN_SIMU(aclGetRecentErrMsg);
+}
+}  // namespace mindspore::device::ascend
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_base_symbol.h b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_base_symbol.h
new file mode 100644
index 0000000000000000000000000000000000000000..8e2d8fe3258954bf28570904c7d58f4b35014b45
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_base_symbol.h
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_BASE_SYMBOL_H_
+#define MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_BASE_SYMBOL_H_
+#include <string>
+#include "acl/acl_base.h"
+#include "hardware/hardware_abstract/dlopen_macro.h"
+
+namespace mindspore::device::ascend {
+ORIGIN_METHOD_WITH_SIMU(aclCreateDataBuffer, aclDataBuffer *, void *, size_t);
+ORIGIN_METHOD_WITH_SIMU(aclCreateTensorDesc, aclTensorDesc *, aclDataType, int, const int64_t *, aclFormat);
+ORIGIN_METHOD_WITH_SIMU(aclDataTypeSize, size_t, aclDataType);
+ORIGIN_METHOD_WITH_SIMU(aclDestroyDataBuffer, aclError, const aclDataBuffer *);
+ORIGIN_METHOD_WITH_SIMU(aclDestroyTensorDesc, void, const aclTensorDesc *);
+ORIGIN_METHOD_WITH_SIMU(aclGetTensorDescDimV2, aclError, const aclTensorDesc *, size_t, int64_t *);
+ORIGIN_METHOD_WITH_SIMU(aclGetTensorDescNumDims, size_t, const aclTensorDesc *)
+ORIGIN_METHOD_WITH_SIMU(aclSetTensorConst, aclError, aclTensorDesc *, void *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclSetTensorDescName, void, aclTensorDesc *, const char *)
+ORIGIN_METHOD_WITH_SIMU(aclSetTensorFormat, aclError, aclTensorDesc *, aclFormat)
+ORIGIN_METHOD_WITH_SIMU(aclSetTensorPlaceMent, aclError, aclTensorDesc *, aclMemType)
+ORIGIN_METHOD_WITH_SIMU(aclSetTensorShape, aclError, aclTensorDesc *, int, const int64_t *)
+ACLRT_GET_SOC_NAME_WITH_SIMU(aclrtGetSocName, const char *)
+ORIGIN_METHOD_WITH_SIMU(aclUpdateDataBuffer, aclError, aclDataBuffer *, void *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclGetDataBufferAddr, void *, const aclDataBuffer *)
+ORIGIN_METHOD_WITH_SIMU(aclGetTensorDescSize, size_t, const aclTensorDesc *)
+ORIGIN_METHOD_WITH_SIMU(aclGetRecentErrMsg, const char *)
+
+void LoadAclBaseApiSymbol(const std::string &ascend_path);
+void LoadSimulationAclBaseApi();
+}  // namespace mindspore::device::ascend
+
+#endif  // MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_BASE_SYMBOL_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_compiler_symbol.cc b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_compiler_symbol.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f3d36395a4c1936a16e4b9f1a4c3340a5f832849
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_compiler_symbol.cc
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "acl_compiler_symbol.h"
+#include <string>
+#include "symbol_utils.h"
+
+namespace mindspore::device::ascend {
+aclopCompileAndExecuteFunObj aclopCompileAndExecute_ = nullptr;
+aclopCompileAndExecuteV2FunObj aclopCompileAndExecuteV2_ = nullptr;
+aclSetCompileoptFunObj aclSetCompileopt_ = nullptr;
+aclopSetCompileFlagFunObj aclopSetCompileFlag_ = nullptr;
+aclGenGraphAndDumpForOpFunObj aclGenGraphAndDumpForOp_ = nullptr;
+
+void LoadAclOpCompilerApiSymbol(const std::string &ascend_path) {
+  std::string complier_plugin_path = ascend_path + "lib64/libacl_op_compiler.so";
+  auto handler = GetLibHandler(complier_plugin_path);
+  if (handler == nullptr) {
+    LOG_OUT << "Dlopen " << complier_plugin_path << " failed!" << dlerror();
+    return;
+  }
+  aclopCompileAndExecute_ = DlsymAscendFuncObj(aclopCompileAndExecute, handler);
+  aclopCompileAndExecuteV2_ = DlsymAscendFuncObj(aclopCompileAndExecuteV2, handler);
+  aclSetCompileopt_ = DlsymAscendFuncObj(aclSetCompileopt, handler);
+  aclopSetCompileFlag_ = DlsymAscendFuncObj(aclopSetCompileFlag, handler);
+  aclGenGraphAndDumpForOp_ = DlsymAscendFuncObj(aclGenGraphAndDumpForOp, handler);
+  LOG_OUT << "Load acl op compiler api success!";
+}
+
+void LoadSimulationAclOpCompilerApi() {
+  ASSIGN_SIMU(aclopCompileAndExecute);
+  ASSIGN_SIMU(aclopCompileAndExecuteV2);
+  ASSIGN_SIMU(aclSetCompileopt);
+  ASSIGN_SIMU(aclopSetCompileFlag);
+  ASSIGN_SIMU(aclGenGraphAndDumpForOp);
+}
+}  // namespace mindspore::device::ascend
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_compiler_symbol.h b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_compiler_symbol.h
new file mode 100644
index 0000000000000000000000000000000000000000..bc9b7cd46fc15264613ce78053e53fff02da9460
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_compiler_symbol.h
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_COMPILER_SYMBOL_H_
+#define MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_COMPILER_SYMBOL_H_
+#include <string>
+#include "acl/acl_op_compiler.h"
+#include "hardware/hardware_abstract/dlopen_macro.h"
+
+namespace mindspore::device::ascend {
+
+ORIGIN_METHOD_WITH_SIMU(aclopCompileAndExecute, aclError, const char *, int, const aclTensorDesc *const[],
+                        const aclDataBuffer *const[], int, const aclTensorDesc *const[], aclDataBuffer *const[],
+                        const aclopAttr *, aclopEngineType, aclopCompileType, const char *, aclrtStream);
+ORIGIN_METHOD_WITH_SIMU(aclopCompileAndExecuteV2, aclError, const char *, int, aclTensorDesc *[], aclDataBuffer *[],
+                        int, aclTensorDesc *[], aclDataBuffer *[], aclopAttr *, aclopEngineType, aclopCompileType,
+                        const char *, aclrtStream);
+ORIGIN_METHOD_WITH_SIMU(aclSetCompileopt, aclError, aclCompileOpt, const char *);
+ORIGIN_METHOD_WITH_SIMU(aclopSetCompileFlag, aclError, aclOpCompileFlag);
+ORIGIN_METHOD_WITH_SIMU(aclGenGraphAndDumpForOp, aclError, const char *, int, const aclTensorDesc *const[],
+                        const aclDataBuffer *const[], int, const aclTensorDesc *const[], aclDataBuffer *const[],
+                        const aclopAttr *, aclopEngineType, const char *, const aclGraphDumpOption *);
+
+void LoadAclOpCompilerApiSymbol(const std::string &ascend_path);
+void LoadSimulationAclOpCompilerApi();
+}  // namespace mindspore::device::ascend
+
+#endif  // MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_COMPILER_SYMBOL_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_mdl_symbol.cc b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_mdl_symbol.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c4bddd8e822d8c1520a1974aa0def1eb7650477a
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_mdl_symbol.cc
@@ -0,0 +1,185 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "acl_mdl_symbol.h"
+#include <string>
+#include "symbol_utils.h"
+
+namespace mindspore::device::ascend {
+aclmdlAddDatasetBufferFunObj aclmdlAddDatasetBuffer_ = nullptr;
+aclmdlCreateDatasetFunObj aclmdlCreateDataset_ = nullptr;
+aclmdlCreateDescFunObj aclmdlCreateDesc_ = nullptr;
+aclmdlDestroyDatasetFunObj aclmdlDestroyDataset_ = nullptr;
+aclmdlDestroyDescFunObj aclmdlDestroyDesc_ = nullptr;
+aclmdlExecuteFunObj aclmdlExecute_ = nullptr;
+aclmdlFinalizeDumpFunObj aclmdlFinalizeDump_ = nullptr;
+aclmdlGetCurOutputDimsFunObj aclmdlGetCurOutputDims_ = nullptr;
+aclmdlGetDatasetBufferFunObj aclmdlGetDatasetBuffer_ = nullptr;
+aclmdlGetDatasetNumBuffersFunObj aclmdlGetDatasetNumBuffers_ = nullptr;
+aclmdlGetDescFunObj aclmdlGetDesc_ = nullptr;
+aclmdlGetInputDataTypeFunObj aclmdlGetInputDataType_ = nullptr;
+aclmdlGetInputDimsFunObj aclmdlGetInputDims_ = nullptr;
+aclmdlGetInputIndexByNameFunObj aclmdlGetInputIndexByName_ = nullptr;
+aclmdlGetInputNameByIndexFunObj aclmdlGetInputNameByIndex_ = nullptr;
+aclmdlGetInputSizeByIndexFunObj aclmdlGetInputSizeByIndex_ = nullptr;
+aclmdlGetNumInputsFunObj aclmdlGetNumInputs_ = nullptr;
+aclmdlGetNumOutputsFunObj aclmdlGetNumOutputs_ = nullptr;
+aclmdlGetOutputDataTypeFunObj aclmdlGetOutputDataType_ = nullptr;
+aclmdlGetOutputDimsFunObj aclmdlGetOutputDims_ = nullptr;
+aclmdlGetOutputNameByIndexFunObj aclmdlGetOutputNameByIndex_ = nullptr;
+aclmdlGetOutputSizeByIndexFunObj aclmdlGetOutputSizeByIndex_ = nullptr;
+aclmdlInitDumpFunObj aclmdlInitDump_ = nullptr;
+aclmdlLoadFromMemFunObj aclmdlLoadFromMem_ = nullptr;
+aclmdlSetDumpFunObj aclmdlSetDump_ = nullptr;
+aclmdlSetDynamicBatchSizeFunObj aclmdlSetDynamicBatchSize_ = nullptr;
+aclmdlUnloadFunObj aclmdlUnload_ = nullptr;
+aclmdlQuerySizeFromMemFunObj aclmdlQuerySizeFromMem_ = nullptr;
+aclmdlBundleGetModelIdFunObj aclmdlBundleGetModelId_ = nullptr;
+aclmdlBundleLoadFromMemFunObj aclmdlBundleLoadFromMem_ = nullptr;
+aclmdlBundleUnloadFunObj aclmdlBundleUnload_ = nullptr;
+aclmdlLoadFromMemWithMemFunObj aclmdlLoadFromMemWithMem_ = nullptr;
+aclmdlSetDatasetTensorDescFunObj aclmdlSetDatasetTensorDesc_ = nullptr;
+aclmdlGetInputFormatFunObj aclmdlGetInputFormat_ = nullptr;
+aclmdlGetDatasetTensorDescFunObj aclmdlGetDatasetTensorDesc_ = nullptr;
+aclmdlSetInputDynamicDimsFunObj aclmdlSetInputDynamicDims_ = nullptr;
+aclmdlGetOutputFormatFunObj aclmdlGetOutputFormat_ = nullptr;
+aclmdlGetInputDimsV2FunObj aclmdlGetInputDimsV2_ = nullptr;
+aclmdlGetDynamicHWFunObj aclmdlGetDynamicHW_ = nullptr;
+aclmdlGetInputDynamicDimsFunObj aclmdlGetInputDynamicDims_ = nullptr;
+aclmdlGetInputDynamicGearCountFunObj aclmdlGetInputDynamicGearCount_ = nullptr;
+aclmdlGetDynamicBatchFunObj aclmdlGetDynamicBatch_ = nullptr;
+aclmdlSetDynamicHWSizeFunObj aclmdlSetDynamicHWSize_ = nullptr;
+#if defined(__linux__) && defined(WITH_BACKEND)
+aclmdlRICaptureBeginFunObj aclmdlRICaptureBegin_ = nullptr;
+aclmdlRICaptureGetInfoFunObj aclmdlRICaptureGetInfo_ = nullptr;
+aclmdlRICaptureEndFunObj aclmdlRICaptureEnd_ = nullptr;
+aclmdlRIExecuteAsyncFunObj aclmdlRIExecuteAsync_ = nullptr;
+aclmdlRIDestroyFunObj aclmdlRIDestroy_ = nullptr;
+#endif
+
+void LoadAclMdlApiSymbol(const std::string &ascend_path) {
+  std::string aclmdl_plugin_path = ascend_path + "lib64/libascendcl.so";
+  auto handler = GetLibHandler(aclmdl_plugin_path);
+  if (handler == nullptr) {
+    LOG_OUT << "Dlopen " << aclmdl_plugin_path << " failed!" << dlerror();
+    return;
+  }
+  aclmdlAddDatasetBuffer_ = DlsymAscendFuncObj(aclmdlAddDatasetBuffer, handler);
+  aclmdlCreateDataset_ = DlsymAscendFuncObj(aclmdlCreateDataset, handler);
+  aclmdlCreateDesc_ = DlsymAscendFuncObj(aclmdlCreateDesc, handler);
+  aclmdlDestroyDataset_ = DlsymAscendFuncObj(aclmdlDestroyDataset, handler);
+  aclmdlDestroyDesc_ = DlsymAscendFuncObj(aclmdlDestroyDesc, handler);
+  aclmdlExecute_ = DlsymAscendFuncObj(aclmdlExecute, handler);
+  aclmdlFinalizeDump_ = DlsymAscendFuncObj(aclmdlFinalizeDump, handler);
+  aclmdlGetCurOutputDims_ = DlsymAscendFuncObj(aclmdlGetCurOutputDims, handler);
+  aclmdlGetDatasetBuffer_ = DlsymAscendFuncObj(aclmdlGetDatasetBuffer, handler);
+  aclmdlGetDatasetNumBuffers_ = DlsymAscendFuncObj(aclmdlGetDatasetNumBuffers, handler);
+  aclmdlGetDesc_ = DlsymAscendFuncObj(aclmdlGetDesc, handler);
+  aclmdlGetInputDataType_ = DlsymAscendFuncObj(aclmdlGetInputDataType, handler);
+  aclmdlGetInputDims_ = DlsymAscendFuncObj(aclmdlGetInputDims, handler);
+  aclmdlGetInputIndexByName_ = DlsymAscendFuncObj(aclmdlGetInputIndexByName, handler);
+  aclmdlGetInputNameByIndex_ = DlsymAscendFuncObj(aclmdlGetInputNameByIndex, handler);
+  aclmdlGetInputSizeByIndex_ = DlsymAscendFuncObj(aclmdlGetInputSizeByIndex, handler);
+  aclmdlGetNumInputs_ = DlsymAscendFuncObj(aclmdlGetNumInputs, handler);
+  aclmdlGetNumOutputs_ = DlsymAscendFuncObj(aclmdlGetNumOutputs, handler);
+  aclmdlGetOutputDataType_ = DlsymAscendFuncObj(aclmdlGetOutputDataType, handler);
+  aclmdlGetOutputDims_ = DlsymAscendFuncObj(aclmdlGetOutputDims, handler);
+  aclmdlQuerySizeFromMem_ = DlsymAscendFuncObj(aclmdlQuerySizeFromMem, handler);
+  aclmdlGetOutputNameByIndex_ = DlsymAscendFuncObj(aclmdlGetOutputNameByIndex, handler);
+  aclmdlGetOutputSizeByIndex_ = DlsymAscendFuncObj(aclmdlGetOutputSizeByIndex, handler);
+  aclmdlInitDump_ = DlsymAscendFuncObj(aclmdlInitDump, handler);
+  aclmdlLoadFromMem_ = DlsymAscendFuncObj(aclmdlLoadFromMem, handler);
+  aclmdlSetDump_ = DlsymAscendFuncObj(aclmdlSetDump, handler);
+  aclmdlSetDynamicBatchSize_ = DlsymAscendFuncObj(aclmdlSetDynamicBatchSize, handler);
+  aclmdlUnload_ = DlsymAscendFuncObj(aclmdlUnload, handler);
+  aclmdlBundleGetModelId_ = DlsymAscendFuncObj(aclmdlBundleGetModelId, handler);
+  aclmdlBundleLoadFromMem_ = DlsymAscendFuncObj(aclmdlBundleLoadFromMem, handler);
+  aclmdlBundleUnload_ = DlsymAscendFuncObj(aclmdlBundleUnload, handler);
+  aclmdlLoadFromMemWithMem_ = DlsymAscendFuncObj(aclmdlLoadFromMemWithMem, handler);
+  aclmdlSetDatasetTensorDesc_ = DlsymAscendFuncObj(aclmdlSetDatasetTensorDesc, handler);
+  aclmdlGetInputFormat_ = DlsymAscendFuncObj(aclmdlGetInputFormat, handler);
+  aclmdlGetDatasetTensorDesc_ = DlsymAscendFuncObj(aclmdlGetDatasetTensorDesc, handler);
+  aclmdlSetInputDynamicDims_ = DlsymAscendFuncObj(aclmdlSetInputDynamicDims, handler);
+  aclmdlGetOutputFormat_ = DlsymAscendFuncObj(aclmdlGetOutputFormat, handler);
+  aclmdlGetInputDimsV2_ = DlsymAscendFuncObj(aclmdlGetInputDimsV2, handler);
+  aclmdlGetDynamicHW_ = DlsymAscendFuncObj(aclmdlGetDynamicHW, handler);
+  aclmdlGetInputDynamicDims_ = DlsymAscendFuncObj(aclmdlGetInputDynamicDims, handler);
+  aclmdlGetInputDynamicGearCount_ = DlsymAscendFuncObj(aclmdlGetInputDynamicGearCount, handler);
+  aclmdlGetDynamicBatch_ = DlsymAscendFuncObj(aclmdlGetDynamicBatch, handler);
+  aclmdlSetDynamicHWSize_ = DlsymAscendFuncObj(aclmdlSetDynamicHWSize, handler);
+#if defined(__linux__) && defined(WITH_BACKEND)
+  aclmdlRICaptureBegin_ = DlsymAscendFuncObj(aclmdlRICaptureBegin, handler);
+  aclmdlRICaptureGetInfo_ = DlsymAscendFuncObj(aclmdlRICaptureGetInfo, handler);
+  aclmdlRICaptureEnd_ = DlsymAscendFuncObj(aclmdlRICaptureEnd, handler);
+  aclmdlRIExecuteAsync_ = DlsymAscendFuncObj(aclmdlRIExecuteAsync, handler);
+  aclmdlRIDestroy_ = DlsymAscendFuncObj(aclmdlRIDestroy, handler);
+#endif
+
+  LOG_OUT << "Load acl mdl api success!";
+}
+
+void LoadSimulationAclMdlApi() {
+  ASSIGN_SIMU(aclmdlAddDatasetBuffer);
+  ASSIGN_SIMU(aclmdlCreateDataset);
+  ASSIGN_SIMU(aclmdlCreateDesc);
+  ASSIGN_SIMU(aclmdlDestroyDataset);
+  ASSIGN_SIMU(aclmdlDestroyDesc);
+  ASSIGN_SIMU(aclmdlExecute);
+  ASSIGN_SIMU(aclmdlFinalizeDump);
+  ASSIGN_SIMU(aclmdlGetCurOutputDims);
+  ASSIGN_SIMU(aclmdlGetDatasetBuffer);
+  ASSIGN_SIMU(aclmdlGetDatasetNumBuffers);
+  ASSIGN_SIMU(aclmdlGetDesc);
+  ASSIGN_SIMU(aclmdlGetInputDataType);
+  ASSIGN_SIMU(aclmdlGetInputDims);
+  ASSIGN_SIMU(aclmdlGetInputIndexByName);
+  ASSIGN_SIMU(aclmdlGetInputNameByIndex);
+  ASSIGN_SIMU(aclmdlGetInputSizeByIndex);
+  ASSIGN_SIMU(aclmdlGetNumInputs);
+  ASSIGN_SIMU(aclmdlGetNumOutputs);
+  ASSIGN_SIMU(aclmdlGetOutputDataType);
+  ASSIGN_SIMU(aclmdlGetOutputDims);
+  ASSIGN_SIMU(aclmdlGetOutputNameByIndex);
+  ASSIGN_SIMU(aclmdlGetOutputSizeByIndex);
+  ASSIGN_SIMU(aclmdlInitDump);
+  ASSIGN_SIMU(aclmdlLoadFromMem);
+  ASSIGN_SIMU(aclmdlSetDump);
+  ASSIGN_SIMU(aclmdlSetDynamicBatchSize);
+  ASSIGN_SIMU(aclmdlUnload);
+  ASSIGN_SIMU(aclmdlQuerySizeFromMem);
+  ASSIGN_SIMU(aclmdlBundleGetModelId);
+  ASSIGN_SIMU(aclmdlBundleLoadFromMem);
+  ASSIGN_SIMU(aclmdlBundleUnload);
+  ASSIGN_SIMU(aclmdlLoadFromMemWithMem);
+  ASSIGN_SIMU(aclmdlSetDatasetTensorDesc);
+  ASSIGN_SIMU(aclmdlGetInputFormat);
+  ASSIGN_SIMU(aclmdlGetDatasetTensorDesc);
+  ASSIGN_SIMU(aclmdlSetInputDynamicDims);
+  ASSIGN_SIMU(aclmdlGetOutputFormat);
+  ASSIGN_SIMU(aclmdlGetInputDimsV2);
+  ASSIGN_SIMU(aclmdlGetDynamicHW);
+  ASSIGN_SIMU(aclmdlGetInputDynamicDims);
+  ASSIGN_SIMU(aclmdlGetInputDynamicGearCount);
+  ASSIGN_SIMU(aclmdlGetDynamicBatch);
+  ASSIGN_SIMU(aclmdlSetDynamicHWSize);
+#if defined(__linux__) && defined(WITH_BACKEND)
+  ASSIGN_SIMU(aclmdlRICaptureBegin);
+  ASSIGN_SIMU(aclmdlRICaptureGetInfo);
+  ASSIGN_SIMU(aclmdlRICaptureEnd);
+  ASSIGN_SIMU(aclmdlRIExecuteAsync);
+  ASSIGN_SIMU(aclmdlRIDestroy);
+#endif
+}
+}  // namespace mindspore::device::ascend
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_mdl_symbol.h b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_mdl_symbol.h
new file mode 100644
index 0000000000000000000000000000000000000000..19abf85f91a0372fa0390ae5a58afab789454a46
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_mdl_symbol.h
@@ -0,0 +1,130 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_MDL_SYMBOL_H_
+#define MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_MDL_SYMBOL_H_
+#include <string>
+#include "acl/acl_mdl.h"
+#include "hardware/hardware_abstract/dlopen_macro.h"
+
+namespace mindspore::device::ascend {
+ORIGIN_METHOD_WITH_SIMU(aclmdlAddDatasetBuffer, aclError, aclmdlDataset *, aclDataBuffer *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlCreateDataset, aclmdlDataset *);
+ORIGIN_METHOD_WITH_SIMU(aclmdlCreateDesc, aclmdlDesc *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlDestroyDataset, aclError, const aclmdlDataset *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlDestroyDesc, aclError, aclmdlDesc *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlExecute, aclError, uint32_t, const aclmdlDataset *, aclmdlDataset *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlFinalizeDump, aclError)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetCurOutputDims, aclError, const aclmdlDesc *, size_t, aclmdlIODims *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetDatasetBuffer, aclDataBuffer *, const aclmdlDataset *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetDatasetNumBuffers, size_t, const aclmdlDataset *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetDesc, aclError, aclmdlDesc *, uint32_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetInputDataType, aclDataType, const aclmdlDesc *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetInputDims, aclError, const aclmdlDesc *, size_t, aclmdlIODims *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetInputIndexByName, aclError, const aclmdlDesc *, const char *, size_t *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetInputNameByIndex, const char *, const aclmdlDesc *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetInputSizeByIndex, size_t, aclmdlDesc *, size_t index)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetNumInputs, size_t, aclmdlDesc *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetNumOutputs, size_t, aclmdlDesc *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetOutputDataType, aclDataType, const aclmdlDesc *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetOutputDims, aclError, const aclmdlDesc *, size_t, aclmdlIODims *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetOutputNameByIndex, const char *, const aclmdlDesc *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetOutputSizeByIndex, size_t, aclmdlDesc *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlInitDump, aclError)
+ORIGIN_METHOD_WITH_SIMU(aclmdlLoadFromMem, aclError, const void *, size_t, uint32_t *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlSetDump, aclError, const char *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlSetDynamicBatchSize, aclError, uint32_t, aclmdlDataset *, size_t, uint64_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlUnload, aclError, uint32_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlQuerySizeFromMem, aclError, const void *, size_t, size_t *, size_t *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlBundleGetModelId, aclError, uint32_t, size_t, uint32_t *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlBundleLoadFromMem, aclError, const void *, size_t, uint32_t *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlBundleUnload, aclError, uint32_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlLoadFromMemWithMem, aclError, const void *, size_t, uint32_t *, void *, size_t, void *,
+                        size_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlSetDatasetTensorDesc, aclError, aclmdlDataset *, aclTensorDesc *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetInputFormat, aclFormat, const aclmdlDesc *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetDatasetTensorDesc, aclTensorDesc *, const aclmdlDataset *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlSetInputDynamicDims, aclError, uint32_t, aclmdlDataset *, size_t, const aclmdlIODims *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetOutputFormat, aclFormat, const aclmdlDesc *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetInputDimsV2, aclError, const aclmdlDesc *, size_t, aclmdlIODims *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetDynamicHW, aclError, const aclmdlDesc *, size_t, aclmdlHW *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetInputDynamicDims, aclError, const aclmdlDesc *, size_t, aclmdlIODims *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetInputDynamicGearCount, aclError, const aclmdlDesc *, size_t, size_t *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlGetDynamicBatch, aclError, const aclmdlDesc *, aclmdlBatch *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlSetDynamicHWSize, aclError, uint32_t, aclmdlDataset *, size_t, uint64_t, uint64_t)
+#if defined(__linux__) && defined(WITH_BACKEND)
+ORIGIN_METHOD_WITH_SIMU(aclmdlRICaptureBegin, aclError, aclrtStream, aclmdlRICaptureMode)
+ORIGIN_METHOD_WITH_SIMU(aclmdlRICaptureGetInfo, aclError, aclrtStream, aclmdlRICaptureStatus *, aclmdlRI *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlRICaptureEnd, aclError, aclrtStream, aclmdlRI *)
+ORIGIN_METHOD_WITH_SIMU(aclmdlRIExecuteAsync, aclError, aclmdlRI, aclrtStream)
+ORIGIN_METHOD_WITH_SIMU(aclmdlRIDestroy, aclError, aclmdlRI)
+#endif
+
+extern aclmdlAddDatasetBufferFunObj aclmdlAddDatasetBuffer_;
+extern aclmdlCreateDatasetFunObj aclmdlCreateDataset_;
+extern aclmdlCreateDescFunObj aclmdlCreateDesc_;
+extern aclmdlDestroyDatasetFunObj aclmdlDestroyDataset_;
+extern aclmdlDestroyDescFunObj aclmdlDestroyDesc_;
+extern aclmdlExecuteFunObj aclmdlExecute_;
+extern aclmdlFinalizeDumpFunObj aclmdlFinalizeDump_;
+extern aclmdlGetCurOutputDimsFunObj aclmdlGetCurOutputDims_;
+extern aclmdlGetDatasetBufferFunObj aclmdlGetDatasetBuffer_;
+extern aclmdlGetDatasetNumBuffersFunObj aclmdlGetDatasetNumBuffers_;
+extern aclmdlGetDescFunObj aclmdlGetDesc_;
+extern aclmdlGetInputDataTypeFunObj aclmdlGetInputDataType_;
+extern aclmdlGetInputDimsFunObj aclmdlGetInputDims_;
+extern aclmdlGetInputIndexByNameFunObj aclmdlGetInputIndexByName_;
+extern aclmdlGetInputNameByIndexFunObj aclmdlGetInputNameByIndex_;
+extern aclmdlGetInputSizeByIndexFunObj aclmdlGetInputSizeByIndex_;
+extern aclmdlGetNumInputsFunObj aclmdlGetNumInputs_;
+extern aclmdlGetNumOutputsFunObj aclmdlGetNumOutputs_;
+extern aclmdlGetOutputDataTypeFunObj aclmdlGetOutputDataType_;
+extern aclmdlGetOutputDimsFunObj aclmdlGetOutputDims_;
+extern aclmdlGetOutputNameByIndexFunObj aclmdlGetOutputNameByIndex_;
+extern aclmdlGetOutputSizeByIndexFunObj aclmdlGetOutputSizeByIndex_;
+extern aclmdlInitDumpFunObj aclmdlInitDump_;
+extern aclmdlLoadFromMemFunObj aclmdlLoadFromMem_;
+extern aclmdlSetDumpFunObj aclmdlSetDump_;
+extern aclmdlSetDynamicBatchSizeFunObj aclmdlSetDynamicBatchSize_;
+extern aclmdlUnloadFunObj aclmdlUnload_;
+extern aclmdlQuerySizeFromMemFunObj aclmdlQuerySizeFromMem_;
+extern aclmdlBundleGetModelIdFunObj aclmdlBundleGetModelId_;
+extern aclmdlBundleLoadFromMemFunObj aclmdlBundleLoadFromMem_;
+extern aclmdlBundleUnloadFunObj aclmdlBundleUnload_;
+extern aclmdlLoadFromMemWithMemFunObj aclmdlLoadFromMemWithMem_;
+extern aclmdlSetDatasetTensorDescFunObj aclmdlSetDatasetTensorDesc_;
+extern aclmdlGetInputFormatFunObj aclmdlGetInputFormat_;
+extern aclmdlGetDatasetTensorDescFunObj aclmdlGetDatasetTensorDesc_;
+extern aclmdlSetInputDynamicDimsFunObj aclmdlSetInputDynamicDims_;
+extern aclmdlGetOutputFormatFunObj aclmdlGetOutputFormat_;
+extern aclmdlGetInputDimsV2FunObj aclmdlGetInputDimsV2_;
+extern aclmdlGetDynamicHWFunObj aclmdlGetDynamicHW_;
+extern aclmdlGetInputDynamicDimsFunObj aclmdlGetInputDynamicDims_;
+extern aclmdlGetInputDynamicGearCountFunObj aclmdlGetInputDynamicGearCount_;
+extern aclmdlGetDynamicBatchFunObj aclmdlGetDynamicBatch_;
+extern aclmdlSetDynamicHWSizeFunObj aclmdlSetDynamicHWSize_;
+#if defined(__linux__) && defined(WITH_BACKEND)
+extern aclmdlRICaptureBeginFunObj aclmdlRICaptureBegin_;
+extern aclmdlRICaptureGetInfoFunObj aclmdlRICaptureGetInfo_;
+extern aclmdlRICaptureEndFunObj aclmdlRICaptureEnd_;
+extern aclmdlRIExecuteAsyncFunObj aclmdlRIExecuteAsync_;
+extern aclmdlRIDestroyFunObj aclmdlRIDestroy_;
+#endif
+
+void LoadAclMdlApiSymbol(const std::string &ascend_path);
+void LoadSimulationAclMdlApi();
+}  // namespace mindspore::device::ascend
+
+#endif  // MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_MDL_SYMBOL_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_op_symbol.cc b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_op_symbol.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2355e0705c4af92a9709bc2997496ecb12342fd6
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_op_symbol.cc
@@ -0,0 +1,73 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "acl_op_symbol.h"
+#include <string>
+#include "symbol_utils.h"
+
+namespace mindspore::device::ascend {
+aclopCreateAttrFunObj aclopCreateAttr_ = nullptr;
+aclopSetAttrBoolFunObj aclopSetAttrBool_ = nullptr;
+aclopSetAttrDataTypeFunObj aclopSetAttrDataType_ = nullptr;
+aclopSetAttrFloatFunObj aclopSetAttrFloat_ = nullptr;
+aclopSetAttrIntFunObj aclopSetAttrInt_ = nullptr;
+aclopSetAttrListBoolFunObj aclopSetAttrListBool_ = nullptr;
+aclopSetAttrListDataTypeFunObj aclopSetAttrListDataType_ = nullptr;
+aclopSetAttrListFloatFunObj aclopSetAttrListFloat_ = nullptr;
+aclopSetAttrListIntFunObj aclopSetAttrListInt_ = nullptr;
+aclopSetAttrListListIntFunObj aclopSetAttrListListInt_ = nullptr;
+aclopSetAttrListStringFunObj aclopSetAttrListString_ = nullptr;
+aclopSetAttrStringFunObj aclopSetAttrString_ = nullptr;
+aclopSetModelDirFunObj aclopSetModelDir_ = nullptr;
+
+void LoadAclOpApiSymbol(const std::string &ascend_path) {
+  std::string ascendcl_plugin_path = ascend_path + "lib64/libascendcl.so";
+  auto handler = GetLibHandler(ascendcl_plugin_path);
+  if (handler == nullptr) {
+    LOG_OUT << "Dlopen " << ascendcl_plugin_path << " failed!" << dlerror();
+    return;
+  }
+  aclopCreateAttr_ = DlsymAscendFuncObj(aclopCreateAttr, handler);
+  aclopSetAttrBool_ = DlsymAscendFuncObj(aclopSetAttrBool, handler);
+  aclopSetAttrDataType_ = DlsymAscendFuncObj(aclopSetAttrDataType, handler);
+  aclopSetAttrFloat_ = DlsymAscendFuncObj(aclopSetAttrFloat, handler);
+  aclopSetAttrInt_ = DlsymAscendFuncObj(aclopSetAttrInt, handler);
+  aclopSetAttrListBool_ = DlsymAscendFuncObj(aclopSetAttrListBool, handler);
+  aclopSetAttrListDataType_ = DlsymAscendFuncObj(aclopSetAttrListDataType, handler);
+  aclopSetAttrListFloat_ = DlsymAscendFuncObj(aclopSetAttrListFloat, handler);
+  aclopSetAttrListInt_ = DlsymAscendFuncObj(aclopSetAttrListInt, handler);
+  aclopSetAttrListListInt_ = DlsymAscendFuncObj(aclopSetAttrListListInt, handler);
+  aclopSetAttrListString_ = DlsymAscendFuncObj(aclopSetAttrListString, handler);
+  aclopSetAttrString_ = DlsymAscendFuncObj(aclopSetAttrString, handler);
+  aclopSetModelDir_ = DlsymAscendFuncObj(aclopSetModelDir, handler);
+  LOG_OUT << "Load ascend op api success!";
+}
+
+void LoadSimulationAclOpApi() {
+  ASSIGN_SIMU(aclopCreateAttr);
+  ASSIGN_SIMU(aclopSetAttrBool);
+  ASSIGN_SIMU(aclopSetAttrDataType);
+  ASSIGN_SIMU(aclopSetAttrFloat);
+  ASSIGN_SIMU(aclopSetAttrInt);
+  ASSIGN_SIMU(aclopSetAttrListBool);
+  ASSIGN_SIMU(aclopSetAttrListDataType);
+  ASSIGN_SIMU(aclopSetAttrListFloat);
+  ASSIGN_SIMU(aclopSetAttrListInt);
+  ASSIGN_SIMU(aclopSetAttrListListInt);
+  ASSIGN_SIMU(aclopSetAttrListString);
+  ASSIGN_SIMU(aclopSetAttrString);
+  ASSIGN_SIMU(aclopSetModelDir);
+}
+}  // namespace mindspore::device::ascend
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_op_symbol.h b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_op_symbol.h
new file mode 100644
index 0000000000000000000000000000000000000000..af7adb3a71c6eb32d19ab140464cb4cd2e0a46d3
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_op_symbol.h
@@ -0,0 +1,57 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_OP_SYMBOL_H_
+#define MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_OP_SYMBOL_H_
+#include <string>
+#include "acl/acl_op.h"
+#include "hardware/hardware_abstract/dlopen_macro.h"
+
+namespace mindspore::device::ascend {
+
+ORIGIN_METHOD_WITH_SIMU(aclopCreateAttr, aclopAttr *)
+ORIGIN_METHOD_WITH_SIMU(aclopSetAttrBool, aclError, aclopAttr *, const char *, uint8_t)
+ORIGIN_METHOD_WITH_SIMU(aclopSetAttrDataType, aclError, aclopAttr *, const char *, aclDataType)
+ORIGIN_METHOD_WITH_SIMU(aclopSetAttrFloat, aclError, aclopAttr *, const char *, float)
+ORIGIN_METHOD_WITH_SIMU(aclopSetAttrInt, aclError, aclopAttr *, const char *, int64_t)
+ORIGIN_METHOD_WITH_SIMU(aclopSetAttrListBool, aclError, aclopAttr *, const char *, int, const uint8_t *)
+ORIGIN_METHOD_WITH_SIMU(aclopSetAttrListDataType, aclError, aclopAttr *, const char *, int, const aclDataType[])
+ORIGIN_METHOD_WITH_SIMU(aclopSetAttrListFloat, aclError, aclopAttr *, const char *, int, const float *)
+ORIGIN_METHOD_WITH_SIMU(aclopSetAttrListInt, aclError, aclopAttr *, const char *, int, const int64_t *)
+ORIGIN_METHOD_WITH_SIMU(aclopSetAttrListListInt, aclError, aclopAttr *, const char *, int, const int *,
+                        const int64_t *const[])
+ORIGIN_METHOD_WITH_SIMU(aclopSetAttrListString, aclError, aclopAttr *, const char *, int, const char **)
+ORIGIN_METHOD_WITH_SIMU(aclopSetAttrString, aclError, aclopAttr *, const char *, const char *)
+ORIGIN_METHOD_WITH_SIMU(aclopSetModelDir, aclError, const char *)
+
+extern aclopCreateAttrFunObj aclopCreateAttr_;
+extern aclopSetAttrBoolFunObj aclopSetAttrBool_;
+extern aclopSetAttrDataTypeFunObj aclopSetAttrDataType_;
+extern aclopSetAttrFloatFunObj aclopSetAttrFloat_;
+extern aclopSetAttrIntFunObj aclopSetAttrInt_;
+extern aclopSetAttrListBoolFunObj aclopSetAttrListBool_;
+extern aclopSetAttrListDataTypeFunObj aclopSetAttrListDataType_;
+extern aclopSetAttrListFloatFunObj aclopSetAttrListFloat_;
+extern aclopSetAttrListIntFunObj aclopSetAttrListInt_;
+extern aclopSetAttrListListIntFunObj aclopSetAttrListListInt_;
+extern aclopSetAttrListStringFunObj aclopSetAttrListString_;
+extern aclopSetAttrStringFunObj aclopSetAttrString_;
+extern aclopSetModelDirFunObj aclopSetModelDir_;
+
+void LoadAclOpApiSymbol(const std::string &ascend_path);
+void LoadSimulationAclOpApi();
+}  // namespace mindspore::device::ascend
+
+#endif  // MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_OP_SYMBOL_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_rt_allocator_symbol.cc b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_rt_allocator_symbol.cc
new file mode 100644
index 0000000000000000000000000000000000000000..885886059d26a5a73a819e3af51dc1c8c37bc25b
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_rt_allocator_symbol.cc
@@ -0,0 +1,62 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "acl_rt_allocator_symbol.h"
+#include <string>
+#include "symbol_utils.h"
+
+namespace mindspore::device::ascend {
+aclrtAllocatorCreateDescFunObj aclrtAllocatorCreateDesc_ = nullptr;
+aclrtAllocatorDestroyDescFunObj aclrtAllocatorDestroyDesc_ = nullptr;
+aclrtAllocatorRegisterFunObj aclrtAllocatorRegister_ = nullptr;
+aclrtAllocatorSetAllocAdviseFuncToDescFunObj aclrtAllocatorSetAllocAdviseFuncToDesc_ = nullptr;
+aclrtAllocatorSetAllocFuncToDescFunObj aclrtAllocatorSetAllocFuncToDesc_ = nullptr;
+aclrtAllocatorSetFreeFuncToDescFunObj aclrtAllocatorSetFreeFuncToDesc_ = nullptr;
+aclrtAllocatorSetGetAddrFromBlockFuncToDescFunObj aclrtAllocatorSetGetAddrFromBlockFuncToDesc_ = nullptr;
+aclrtAllocatorSetObjToDescFunObj aclrtAllocatorSetObjToDesc_ = nullptr;
+aclrtAllocatorUnregisterFunObj aclrtAllocatorUnregister_ = nullptr;
+
+void LoadAclAllocatorApiSymbol(const std::string &ascend_path) {
+  std::string allocator_plugin_path = ascend_path + "lib64/libascendcl.so";
+  auto handler = GetLibHandler(allocator_plugin_path);
+  if (handler == nullptr) {
+    LOG_OUT << "Dlopen " << allocator_plugin_path << " failed!" << dlerror();
+    return;
+  }
+  aclrtAllocatorCreateDesc_ = DlsymAscendFuncObj(aclrtAllocatorCreateDesc, handler);
+  aclrtAllocatorDestroyDesc_ = DlsymAscendFuncObj(aclrtAllocatorDestroyDesc, handler);
+  aclrtAllocatorRegister_ = DlsymAscendFuncObj(aclrtAllocatorRegister, handler);
+  aclrtAllocatorSetAllocAdviseFuncToDesc_ = DlsymAscendFuncObj(aclrtAllocatorSetAllocAdviseFuncToDesc, handler);
+  aclrtAllocatorSetAllocFuncToDesc_ = DlsymAscendFuncObj(aclrtAllocatorSetAllocFuncToDesc, handler);
+  aclrtAllocatorSetFreeFuncToDesc_ = DlsymAscendFuncObj(aclrtAllocatorSetFreeFuncToDesc, handler);
+  aclrtAllocatorSetGetAddrFromBlockFuncToDesc_ =
+    DlsymAscendFuncObj(aclrtAllocatorSetGetAddrFromBlockFuncToDesc, handler);
+  aclrtAllocatorSetObjToDesc_ = DlsymAscendFuncObj(aclrtAllocatorSetObjToDesc, handler);
+  aclrtAllocatorUnregister_ = DlsymAscendFuncObj(aclrtAllocatorUnregister, handler);
+  LOG_OUT << "Load acl allocator api success!";
+}
+
+void LoadSimulationAclAllocatorApi() {
+  ASSIGN_SIMU(aclrtAllocatorCreateDesc);
+  ASSIGN_SIMU(aclrtAllocatorDestroyDesc);
+  ASSIGN_SIMU(aclrtAllocatorRegister);
+  ASSIGN_SIMU(aclrtAllocatorSetAllocAdviseFuncToDesc);
+  ASSIGN_SIMU(aclrtAllocatorSetAllocFuncToDesc);
+  ASSIGN_SIMU(aclrtAllocatorSetFreeFuncToDesc);
+  ASSIGN_SIMU(aclrtAllocatorSetGetAddrFromBlockFuncToDesc);
+  ASSIGN_SIMU(aclrtAllocatorSetObjToDesc);
+  ASSIGN_SIMU(aclrtAllocatorUnregister);
+}
+}  // namespace mindspore::device::ascend
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_rt_allocator_symbol.h b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_rt_allocator_symbol.h
new file mode 100644
index 0000000000000000000000000000000000000000..e9d29334059ebb9ca1798253c00a66f12d82298c
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_rt_allocator_symbol.h
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_RT_ALLOCATOR_SYMBOL_H_
+#define MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_RT_ALLOCATOR_SYMBOL_H_
+#include <string>
+#include "acl/acl_rt_allocator.h"
+#include "hardware/hardware_abstract/dlopen_macro.h"
+
+namespace mindspore::device::ascend {
+ORIGIN_METHOD_WITH_SIMU(aclrtAllocatorCreateDesc, aclrtAllocatorDesc)
+ORIGIN_METHOD_WITH_SIMU(aclrtAllocatorDestroyDesc, aclError, aclrtAllocatorDesc)
+ORIGIN_METHOD_WITH_SIMU(aclrtAllocatorRegister, aclError, aclrtStream, aclrtAllocatorDesc)
+ORIGIN_METHOD_WITH_SIMU(aclrtAllocatorSetAllocAdviseFuncToDesc, aclError, aclrtAllocatorDesc,
+                        aclrtAllocatorAllocAdviseFunc)
+ORIGIN_METHOD_WITH_SIMU(aclrtAllocatorSetAllocFuncToDesc, aclError, aclrtAllocatorDesc, aclrtAllocatorAllocFunc)
+ORIGIN_METHOD_WITH_SIMU(aclrtAllocatorSetFreeFuncToDesc, aclError, aclrtAllocatorDesc, aclrtAllocatorFreeFunc)
+ORIGIN_METHOD_WITH_SIMU(aclrtAllocatorSetGetAddrFromBlockFuncToDesc, aclError, aclrtAllocatorDesc,
+                        aclrtAllocatorGetAddrFromBlockFunc)
+ORIGIN_METHOD_WITH_SIMU(aclrtAllocatorSetObjToDesc, aclError, aclrtAllocatorDesc, aclrtAllocator)
+ORIGIN_METHOD_WITH_SIMU(aclrtAllocatorUnregister, aclError, aclrtStream)
+
+extern aclrtAllocatorCreateDescFunObj aclrtAllocatorCreateDesc_;
+extern aclrtAllocatorDestroyDescFunObj aclrtAllocatorDestroyDesc_;
+extern aclrtAllocatorRegisterFunObj aclrtAllocatorRegister_;
+extern aclrtAllocatorSetAllocAdviseFuncToDescFunObj aclrtAllocatorSetAllocAdviseFuncToDesc_;
+extern aclrtAllocatorSetAllocFuncToDescFunObj aclrtAllocatorSetAllocFuncToDesc_;
+extern aclrtAllocatorSetFreeFuncToDescFunObj aclrtAllocatorSetFreeFuncToDesc_;
+extern aclrtAllocatorSetGetAddrFromBlockFuncToDescFunObj aclrtAllocatorSetGetAddrFromBlockFuncToDesc_;
+extern aclrtAllocatorSetObjToDescFunObj aclrtAllocatorSetObjToDesc_;
+extern aclrtAllocatorUnregisterFunObj aclrtAllocatorUnregister_;
+
+void LoadAclAllocatorApiSymbol(const std::string &ascend_path);
+void LoadSimulationAclAllocatorApi();
+}  // namespace mindspore::device::ascend
+
+#endif  // MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_RT_ALLOCATOR_SYMBOL_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.cc b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5ddc52205ee05278cc6d1bab942c68d8f61d1633
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.cc
@@ -0,0 +1,228 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "acl_rt_symbol.h"
+#include <string>
+#include "symbol_utils.h"
+
+int (*aclrt_get_last_error)(int) = nullptr;
+const char *(*acl_get_recent_err_msg)() = nullptr;
+namespace mindspore::device::ascend {
+aclrtCreateContextFunObj aclrtCreateContext_ = nullptr;
+aclrtCreateEventFunObj aclrtCreateEvent_ = nullptr;
+aclrtCreateEventWithFlagFunObj aclrtCreateEventWithFlag_ = nullptr;
+aclrtCreateEventExWithFlagFunObj aclrtCreateEventExWithFlag_ = nullptr;
+aclrtCreateStreamWithConfigFunObj aclrtCreateStreamWithConfig_ = nullptr;
+aclrtDestroyContextFunObj aclrtDestroyContext_ = nullptr;
+aclrtDestroyEventFunObj aclrtDestroyEvent_ = nullptr;
+aclrtDestroyStreamFunObj aclrtDestroyStream_ = nullptr;
+aclrtDestroyStreamForceFunObj aclrtDestroyStreamForce_ = nullptr;
+aclrtEventElapsedTimeFunObj aclrtEventElapsedTime_ = nullptr;
+aclrtFreeFunObj aclrtFree_ = nullptr;
+aclrtFreeHostFunObj aclrtFreeHost_ = nullptr;
+aclrtGetCurrentContextFunObj aclrtGetCurrentContext_ = nullptr;
+aclrtGetDeviceFunObj aclrtGetDevice_ = nullptr;
+aclrtGetDeviceCountFunObj aclrtGetDeviceCount_ = nullptr;
+aclrtGetDeviceIdFromExceptionInfoFunObj aclrtGetDeviceIdFromExceptionInfo_ = nullptr;
+aclrtGetErrorCodeFromExceptionInfoFunObj aclrtGetErrorCodeFromExceptionInfo_ = nullptr;
+aclrtGetMemInfoFunObj aclrtGetMemInfo_ = nullptr;
+aclrtGetRunModeFunObj aclrtGetRunMode_ = nullptr;
+aclrtGetStreamIdFromExceptionInfoFunObj aclrtGetStreamIdFromExceptionInfo_ = nullptr;
+aclrtGetTaskIdFromExceptionInfoFunObj aclrtGetTaskIdFromExceptionInfo_ = nullptr;
+aclrtGetThreadIdFromExceptionInfoFunObj aclrtGetThreadIdFromExceptionInfo_ = nullptr;
+aclrtLaunchCallbackFunObj aclrtLaunchCallback_ = nullptr;
+aclrtMallocFunObj aclrtMalloc_ = nullptr;
+aclrtMallocHostFunObj aclrtMallocHost_ = nullptr;
+aclrtMemcpyFunObj aclrtMemcpy_ = nullptr;
+aclrtMemcpyAsyncFunObj aclrtMemcpyAsync_ = nullptr;
+aclrtMemsetFunObj aclrtMemset_ = nullptr;
+aclrtMemsetAsyncFunObj aclrtMemsetAsync_ = nullptr;
+aclrtProcessReportFunObj aclrtProcessReport_ = nullptr;
+aclrtQueryEventStatusFunObj aclrtQueryEventStatus_ = nullptr;
+aclrtRecordEventFunObj aclrtRecordEvent_ = nullptr;
+aclrtResetDeviceFunObj aclrtResetDevice_ = nullptr;
+aclrtResetEventFunObj aclrtResetEvent_ = nullptr;
+aclrtSetCurrentContextFunObj aclrtSetCurrentContext_ = nullptr;
+aclrtSetDeviceFunObj aclrtSetDevice_ = nullptr;
+aclrtSetDeviceSatModeFunObj aclrtSetDeviceSatMode_ = nullptr;
+aclrtSetExceptionInfoCallbackFunObj aclrtSetExceptionInfoCallback_ = nullptr;
+aclrtSetOpExecuteTimeOutFunObj aclrtSetOpExecuteTimeOut_ = nullptr;
+aclrtSetOpWaitTimeoutFunObj aclrtSetOpWaitTimeout_ = nullptr;
+aclrtSetStreamFailureModeFunObj aclrtSetStreamFailureMode_ = nullptr;
+aclrtStreamQueryFunObj aclrtStreamQuery_ = nullptr;
+aclrtStreamWaitEventFunObj aclrtStreamWaitEvent_ = nullptr;
+aclrtSubscribeReportFunObj aclrtSubscribeReport_ = nullptr;
+aclrtSynchronizeEventFunObj aclrtSynchronizeEvent_ = nullptr;
+aclrtSynchronizeStreamFunObj aclrtSynchronizeStream_ = nullptr;
+aclrtSynchronizeStreamWithTimeoutFunObj aclrtSynchronizeStreamWithTimeout_ = nullptr;
+aclrtSynchronizeDeviceWithTimeoutFunObj aclrtSynchronizeDeviceWithTimeout_ = nullptr;
+aclrtUnmapMemFunObj aclrtUnmapMem_ = nullptr;
+aclrtReserveMemAddressFunObj aclrtReserveMemAddress_ = nullptr;
+aclrtMallocPhysicalFunObj aclrtMallocPhysical_ = nullptr;
+aclrtMapMemFunObj aclrtMapMem_ = nullptr;
+aclrtFreePhysicalFunObj aclrtFreePhysical_ = nullptr;
+aclrtReleaseMemAddressFunObj aclrtReleaseMemAddress_ = nullptr;
+aclrtCtxSetSysParamOptFunObj aclrtCtxSetSysParamOpt_ = nullptr;
+aclrtGetMemUceInfoFunObj aclrtGetMemUceInfo_ = nullptr;
+aclrtDeviceTaskAbortFunObj aclrtDeviceTaskAbort_ = nullptr;
+aclrtMemUceRepairFunObj aclrtMemUceRepair_ = nullptr;
+aclrtEventGetTimestampFunObj aclrtEventGetTimestamp_ = nullptr;
+aclrtDeviceGetBareTgidFunObj aclrtDeviceGetBareTgid_ = nullptr;
+aclrtMemExportToShareableHandleFunObj aclrtMemExportToShareableHandle_ = nullptr;
+aclrtMemSetPidToShareableHandleFunObj aclrtMemSetPidToShareableHandle_ = nullptr;
+aclrtMemImportFromShareableHandleFunObj aclrtMemImportFromShareableHandle_ = nullptr;
+aclrtGetLastErrorFunObj aclrtGetLastError_ = nullptr;
+
+void LoadAclRtApiSymbol(const std::string &ascend_path) {
+  std::string aclrt_plugin_path = ascend_path + "lib64/libascendcl.so";
+  auto handler = GetLibHandler(aclrt_plugin_path);
+  if (handler == nullptr) {
+    LOG_OUT << "Dlopen " << aclrt_plugin_path << " failed!" << dlerror();
+    return;
+  }
+  aclrtCreateContext_ = DlsymAscendFuncObj(aclrtCreateContext, handler);
+  aclrtCreateEvent_ = DlsymAscendFuncObj(aclrtCreateEvent, handler);
+  aclrtCreateEventWithFlag_ = DlsymAscendFuncObj(aclrtCreateEventWithFlag, handler);
+  aclrtCreateEventExWithFlag_ = DlsymAscendFuncObj(aclrtCreateEventExWithFlag, handler);
+  aclrtCreateStreamWithConfig_ = DlsymAscendFuncObj(aclrtCreateStreamWithConfig, handler);
+  aclrtDestroyContext_ = DlsymAscendFuncObj(aclrtDestroyContext, handler);
+  aclrtDestroyEvent_ = DlsymAscendFuncObj(aclrtDestroyEvent, handler);
+  aclrtDestroyStream_ = DlsymAscendFuncObj(aclrtDestroyStream, handler);
+  aclrtDestroyStreamForce_ = DlsymAscendFuncObj(aclrtDestroyStreamForce, handler);
+  aclrtEventElapsedTime_ = DlsymAscendFuncObj(aclrtEventElapsedTime, handler);
+  aclrtFree_ = DlsymAscendFuncObj(aclrtFree, handler);
+  aclrtFreeHost_ = DlsymAscendFuncObj(aclrtFreeHost, handler);
+  aclrtGetCurrentContext_ = DlsymAscendFuncObj(aclrtGetCurrentContext, handler);
+  aclrtGetDevice_ = DlsymAscendFuncObj(aclrtGetDevice, handler);
+  aclrtGetDeviceCount_ = DlsymAscendFuncObj(aclrtGetDeviceCount, handler);
+  aclrtGetDeviceIdFromExceptionInfo_ = DlsymAscendFuncObj(aclrtGetDeviceIdFromExceptionInfo, handler);
+  aclrtGetErrorCodeFromExceptionInfo_ = DlsymAscendFuncObj(aclrtGetErrorCodeFromExceptionInfo, handler);
+  aclrtGetMemInfo_ = DlsymAscendFuncObj(aclrtGetMemInfo, handler);
+  aclrtGetRunMode_ = DlsymAscendFuncObj(aclrtGetRunMode, handler);
+  aclrtGetStreamIdFromExceptionInfo_ = DlsymAscendFuncObj(aclrtGetStreamIdFromExceptionInfo, handler);
+  aclrtGetTaskIdFromExceptionInfo_ = DlsymAscendFuncObj(aclrtGetTaskIdFromExceptionInfo, handler);
+  aclrtGetThreadIdFromExceptionInfo_ = DlsymAscendFuncObj(aclrtGetThreadIdFromExceptionInfo, handler);
+  aclrtLaunchCallback_ = DlsymAscendFuncObj(aclrtLaunchCallback, handler);
+  aclrtMalloc_ = DlsymAscendFuncObj(aclrtMalloc, handler);
+  aclrtMallocHost_ = DlsymAscendFuncObj(aclrtMallocHost, handler);
+  aclrtMemcpy_ = DlsymAscendFuncObj(aclrtMemcpy, handler);
+  aclrtMemcpyAsync_ = DlsymAscendFuncObj(aclrtMemcpyAsync, handler);
+  aclrtMemset_ = DlsymAscendFuncObj(aclrtMemset, handler);
+  aclrtMemsetAsync_ = DlsymAscendFuncObj(aclrtMemsetAsync, handler);
+  aclrtProcessReport_ = DlsymAscendFuncObj(aclrtProcessReport, handler);
+  aclrtQueryEventStatus_ = DlsymAscendFuncObj(aclrtQueryEventStatus, handler);
+  aclrtRecordEvent_ = DlsymAscendFuncObj(aclrtRecordEvent, handler);
+  aclrtResetDevice_ = DlsymAscendFuncObj(aclrtResetDevice, handler);
+  aclrtResetEvent_ = DlsymAscendFuncObj(aclrtResetEvent, handler);
+  aclrtSetCurrentContext_ = DlsymAscendFuncObj(aclrtSetCurrentContext, handler);
+  aclrtSetDevice_ = DlsymAscendFuncObj(aclrtSetDevice, handler);
+  aclrtSetDeviceSatMode_ = DlsymAscendFuncObj(aclrtSetDeviceSatMode, handler);
+  aclrtSetExceptionInfoCallback_ = DlsymAscendFuncObj(aclrtSetExceptionInfoCallback, handler);
+  aclrtSetOpExecuteTimeOut_ = DlsymAscendFuncObj(aclrtSetOpExecuteTimeOut, handler);
+  aclrtSetOpWaitTimeout_ = DlsymAscendFuncObj(aclrtSetOpWaitTimeout, handler);
+  aclrtSetStreamFailureMode_ = DlsymAscendFuncObj(aclrtSetStreamFailureMode, handler);
+  aclrtStreamQuery_ = DlsymAscendFuncObj(aclrtStreamQuery, handler);
+  aclrtStreamWaitEvent_ = DlsymAscendFuncObj(aclrtStreamWaitEvent, handler);
+  aclrtSubscribeReport_ = DlsymAscendFuncObj(aclrtSubscribeReport, handler);
+  aclrtSynchronizeEvent_ = DlsymAscendFuncObj(aclrtSynchronizeEvent, handler);
+  aclrtSynchronizeStream_ = DlsymAscendFuncObj(aclrtSynchronizeStream, handler);
+  aclrtSynchronizeStreamWithTimeout_ = DlsymAscendFuncObj(aclrtSynchronizeStreamWithTimeout, handler);
+  aclrtSynchronizeDeviceWithTimeout_ = DlsymAscendFuncObj(aclrtSynchronizeDeviceWithTimeout, handler);
+  aclrtUnmapMem_ = DlsymAscendFuncObj(aclrtUnmapMem, handler);
+  aclrtReserveMemAddress_ = DlsymAscendFuncObj(aclrtReserveMemAddress, handler);
+  aclrtMallocPhysical_ = DlsymAscendFuncObj(aclrtMallocPhysical, handler);
+  aclrtMapMem_ = DlsymAscendFuncObj(aclrtMapMem, handler);
+  aclrtFreePhysical_ = DlsymAscendFuncObj(aclrtFreePhysical, handler);
+  aclrtReleaseMemAddress_ = DlsymAscendFuncObj(aclrtReleaseMemAddress, handler);
+  aclrtCtxSetSysParamOpt_ = DlsymAscendFuncObj(aclrtCtxSetSysParamOpt, handler);
+  aclrtGetMemUceInfo_ = DlsymAscendFuncObj(aclrtGetMemUceInfo, handler);
+  aclrtDeviceTaskAbort_ = DlsymAscendFuncObj(aclrtDeviceTaskAbort, handler);
+  aclrtMemUceRepair_ = DlsymAscendFuncObj(aclrtMemUceRepair, handler);
+  aclrtEventGetTimestamp_ = DlsymAscendFuncObj(aclrtEventGetTimestamp, handler);
+  aclrtDeviceGetBareTgid_ = DlsymAscendFuncObj(aclrtDeviceGetBareTgid, handler);
+  aclrtMemExportToShareableHandle_ = DlsymAscendFuncObj(aclrtMemExportToShareableHandle, handler);
+  aclrtMemSetPidToShareableHandle_ = DlsymAscendFuncObj(aclrtMemSetPidToShareableHandle, handler);
+  aclrtMemImportFromShareableHandle_ = DlsymAscendFuncObj(aclrtMemImportFromShareableHandle, handler);
+  aclrtGetLastError_ = DlsymAscendFuncObj(aclrtGetLastError, handler);
+  LOG_OUT << "Load acl rt api success!";
+}
+
+void LoadSimulationRtApi() {
+  ASSIGN_SIMU(aclrtCreateContext);
+  ASSIGN_SIMU(aclrtCreateEvent);
+  ASSIGN_SIMU(aclrtCreateEventWithFlag);
+  ASSIGN_SIMU(aclrtCreateEventExWithFlag);
+  ASSIGN_SIMU(aclrtCreateStreamWithConfig);
+  ASSIGN_SIMU(aclrtDestroyContext);
+  ASSIGN_SIMU(aclrtDestroyEvent);
+  ASSIGN_SIMU(aclrtDestroyStream);
+  ASSIGN_SIMU(aclrtDestroyStreamForce);
+  ASSIGN_SIMU(aclrtEventElapsedTime);
+  ASSIGN_SIMU(aclrtFree);
+  ASSIGN_SIMU(aclrtFreeHost);
+  ASSIGN_SIMU(aclrtGetCurrentContext);
+  ASSIGN_SIMU(aclrtGetDevice);
+  ASSIGN_SIMU(aclrtGetDeviceCount);
+  ASSIGN_SIMU(aclrtGetDeviceIdFromExceptionInfo);
+  ASSIGN_SIMU(aclrtGetErrorCodeFromExceptionInfo);
+  ASSIGN_SIMU(aclrtGetMemInfo);
+  ASSIGN_SIMU(aclrtGetRunMode);
+  ASSIGN_SIMU(aclrtGetStreamIdFromExceptionInfo);
+  ASSIGN_SIMU(aclrtGetTaskIdFromExceptionInfo);
+  ASSIGN_SIMU(aclrtGetThreadIdFromExceptionInfo);
+  ASSIGN_SIMU(aclrtLaunchCallback);
+  ASSIGN_SIMU(aclrtMalloc);
+  ASSIGN_SIMU(aclrtMallocHost);
+  ASSIGN_SIMU(aclrtMemcpy);
+  ASSIGN_SIMU(aclrtMemcpyAsync);
+  ASSIGN_SIMU(aclrtMemset);
+  ASSIGN_SIMU(aclrtMemsetAsync);
+  ASSIGN_SIMU(aclrtProcessReport);
+  ASSIGN_SIMU(aclrtQueryEventStatus);
+  ASSIGN_SIMU(aclrtRecordEvent);
+  ASSIGN_SIMU(aclrtResetDevice);
+  ASSIGN_SIMU(aclrtResetEvent);
+  ASSIGN_SIMU(aclrtSetCurrentContext);
+  ASSIGN_SIMU(aclrtSetDevice);
+  ASSIGN_SIMU(aclrtSetDeviceSatMode);
+  ASSIGN_SIMU(aclrtSetExceptionInfoCallback);
+  ASSIGN_SIMU(aclrtSetOpExecuteTimeOut);
+  ASSIGN_SIMU(aclrtSetOpWaitTimeout);
+  ASSIGN_SIMU(aclrtSetStreamFailureMode);
+  ASSIGN_SIMU(aclrtStreamQuery);
+  ASSIGN_SIMU(aclrtStreamWaitEvent);
+  ASSIGN_SIMU(aclrtSubscribeReport);
+  ASSIGN_SIMU(aclrtSynchronizeEvent);
+  ASSIGN_SIMU(aclrtSynchronizeStream);
+  ASSIGN_SIMU(aclrtSynchronizeStreamWithTimeout);
+  ASSIGN_SIMU(aclrtSynchronizeDeviceWithTimeout);
+  ASSIGN_SIMU(aclrtUnmapMem);
+  ASSIGN_SIMU(aclrtReserveMemAddress);
+  ASSIGN_SIMU(aclrtMallocPhysical);
+  ASSIGN_SIMU(aclrtMapMem);
+  ASSIGN_SIMU(aclrtFreePhysical);
+  ASSIGN_SIMU(aclrtReleaseMemAddress);
+  ASSIGN_SIMU(aclrtCtxSetSysParamOpt);
+  ASSIGN_SIMU(aclrtGetMemUceInfo);
+  ASSIGN_SIMU(aclrtDeviceTaskAbort);
+  ASSIGN_SIMU(aclrtMemUceRepair);
+  ASSIGN_SIMU(aclrtEventGetTimestamp);
+  ASSIGN_SIMU(aclrtDeviceGetBareTgid);
+  ASSIGN_SIMU(aclrtMemExportToShareableHandle);
+  ASSIGN_SIMU(aclrtMemSetPidToShareableHandle);
+  ASSIGN_SIMU(aclrtMemImportFromShareableHandle);
+  ASSIGN_SIMU(aclrtGetLastError);
+}
+}  // namespace mindspore::device::ascend
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h
new file mode 100644
index 0000000000000000000000000000000000000000..e53dae76e782fb7fc8f43b65c638e2e208ced791
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_rt_symbol.h
@@ -0,0 +1,94 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_RT_SYMBOL_H_
+#define MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_RT_SYMBOL_H_
+#include <string>
+#include "acl/acl_rt.h"
+#include "hardware/hardware_abstract/dlopen_macro.h"
+
+namespace mindspore::device::ascend {
+ORIGIN_METHOD_WITH_SIMU_CREATE(aclrtCreateContext, aclError, aclrtContext *, int32_t)
+ORIGIN_METHOD_WITH_SIMU_CREATE(aclrtCreateEvent, aclError, aclrtEvent *)
+ORIGIN_METHOD_WITH_SIMU_CREATE(aclrtCreateEventWithFlag, aclError, aclrtEvent *, uint32_t)
+ORIGIN_METHOD_WITH_SIMU_CREATE(aclrtCreateEventExWithFlag, aclError, aclrtEvent *, uint32_t)
+ORIGIN_METHOD_WITH_SIMU_CREATE(aclrtCreateStreamWithConfig, aclError, aclrtStream *, uint32_t, uint32_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtDestroyContext, aclError, aclrtContext)
+ORIGIN_METHOD_WITH_SIMU(aclrtDestroyEvent, aclError, aclrtEvent)
+ORIGIN_METHOD_WITH_SIMU(aclrtDestroyStream, aclError, aclrtStream)
+ORIGIN_METHOD_WITH_SIMU(aclrtDestroyStreamForce, aclError, aclrtStream)
+ORIGIN_METHOD_WITH_SIMU_CREATE(aclrtEventElapsedTime, aclError, float *, aclrtEvent, aclrtEvent)
+ORIGIN_METHOD_WITH_SIMU(aclrtFree, aclError, void *)
+ORIGIN_METHOD_WITH_SIMU(aclrtFreeHost, aclError, void *)
+ORIGIN_METHOD_WITH_SIMU_CREATE(aclrtGetCurrentContext, aclError, aclrtContext *)
+ORIGIN_METHOD_WITH_SIMU_CREATE(aclrtGetDevice, aclError, int32_t *)
+ORIGIN_METHOD_WITH_SIMU_CREATE(aclrtGetDeviceCount, aclError, uint32_t *)
+ORIGIN_METHOD_WITH_SIMU(aclrtGetDeviceIdFromExceptionInfo, uint32_t, const aclrtExceptionInfo *)
+ORIGIN_METHOD_WITH_SIMU(aclrtGetErrorCodeFromExceptionInfo, uint32_t, const aclrtExceptionInfo *)
+ORIGIN_METHOD_WITH_SIMU(aclrtGetMemInfo, aclError, aclrtMemAttr, size_t *, size_t *)
+ORIGIN_METHOD_WITH_SIMU_CREATE(aclrtGetRunMode, aclError, aclrtRunMode *)
+ORIGIN_METHOD_WITH_SIMU(aclrtGetStreamIdFromExceptionInfo, uint32_t, const aclrtExceptionInfo *)
+ORIGIN_METHOD_WITH_SIMU(aclrtGetTaskIdFromExceptionInfo, uint32_t, const aclrtExceptionInfo *)
+ORIGIN_METHOD_WITH_SIMU(aclrtGetThreadIdFromExceptionInfo, uint32_t, const aclrtExceptionInfo *)
+ORIGIN_METHOD_WITH_SIMU(aclrtLaunchCallback, aclError, aclrtCallback, void *, aclrtCallbackBlockType, aclrtStream)
+ORIGIN_METHOD_WITH_SIMU_CREATE(aclrtMalloc, aclError, void **, size_t, aclrtMemMallocPolicy)
+ORIGIN_METHOD_WITH_SIMU_CREATE(aclrtMallocHost, aclError, void **, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtMemcpy, aclError, void *, size_t, const void *, size_t, aclrtMemcpyKind)
+ORIGIN_METHOD_WITH_SIMU(aclrtMemcpyAsync, aclError, void *, size_t, const void *, size_t, aclrtMemcpyKind, aclrtStream)
+ORIGIN_METHOD_WITH_SIMU(aclrtMemset, aclError, void *, size_t, int32_t, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtMemsetAsync, aclError, void *, size_t, int32_t, size_t, aclrtStream)
+ORIGIN_METHOD_WITH_SIMU(aclrtProcessReport, aclError, int32_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtQueryEventStatus, aclError, aclrtEvent, aclrtEventRecordedStatus *)
+ORIGIN_METHOD_WITH_SIMU(aclrtRecordEvent, aclError, aclrtEvent, aclrtStream)
+ORIGIN_METHOD_WITH_SIMU(aclrtResetDevice, aclError, int32_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtResetEvent, aclError, aclrtEvent, aclrtStream)
+ORIGIN_METHOD_WITH_SIMU(aclrtSetCurrentContext, aclError, aclrtContext)
+ORIGIN_METHOD_WITH_SIMU(aclrtSetDevice, aclError, int32_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtSetDeviceSatMode, aclError, aclrtFloatOverflowMode)
+ORIGIN_METHOD_WITH_SIMU(aclrtSetExceptionInfoCallback, aclError, aclrtExceptionInfoCallback)
+ORIGIN_METHOD_WITH_SIMU(aclrtSetOpExecuteTimeOut, aclError, uint32_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtSetOpWaitTimeout, aclError, uint32_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtSetStreamFailureMode, aclError, aclrtStream, uint64_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtStreamQuery, aclError, aclrtStream, aclrtStreamStatus *)
+ORIGIN_METHOD_WITH_SIMU(aclrtStreamWaitEvent, aclError, aclrtStream, aclrtEvent)
+ORIGIN_METHOD_WITH_SIMU(aclrtSubscribeReport, aclError, uint64_t, aclrtStream)
+ORIGIN_METHOD_WITH_SIMU(aclrtSynchronizeEvent, aclError, aclrtEvent)
+ORIGIN_METHOD_WITH_SIMU(aclrtSynchronizeStream, aclError, aclrtStream)
+ORIGIN_METHOD_WITH_SIMU(aclrtSynchronizeStreamWithTimeout, aclError, aclrtStream, int32_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtSynchronizeDeviceWithTimeout, aclError, int32_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtUnmapMem, aclError, void *)
+ORIGIN_METHOD_WITH_SIMU(aclrtReserveMemAddress, aclError, void **, size_t, size_t, void *, uint64_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtMallocPhysical, aclError, aclrtDrvMemHandle *, size_t, const aclrtPhysicalMemProp *,
+                        uint64_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtMapMem, aclError, void *, size_t, size_t, aclrtDrvMemHandle, uint64_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtFreePhysical, aclError, aclrtDrvMemHandle)
+ORIGIN_METHOD_WITH_SIMU(aclrtReleaseMemAddress, aclError, void *)
+ORIGIN_METHOD_WITH_SIMU(aclrtCtxSetSysParamOpt, aclError, aclSysParamOpt, int64_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtGetMemUceInfo, aclError, int32_t, aclrtMemUceInfo *, size_t, size_t *)
+ORIGIN_METHOD_WITH_SIMU(aclrtDeviceTaskAbort, aclError, int32_t, uint32_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtMemUceRepair, aclError, int32_t, aclrtMemUceInfo *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtEventGetTimestamp, aclError, aclrtEvent, uint64_t *)
+ORIGIN_METHOD_WITH_SIMU(aclrtDeviceGetBareTgid, aclError, int32_t *)
+ORIGIN_METHOD_WITH_SIMU(aclrtMemExportToShareableHandle, aclError, aclrtDrvMemHandle, aclrtMemHandleType, uint64_t,
+                        uint64_t *)
+ORIGIN_METHOD_WITH_SIMU(aclrtMemSetPidToShareableHandle, aclError, uint64_t, int32_t *, size_t)
+ORIGIN_METHOD_WITH_SIMU(aclrtMemImportFromShareableHandle, aclError, uint64_t, int32_t, aclrtDrvMemHandle *)
+ORIGIN_METHOD_WITH_SIMU(aclrtGetLastError, aclError, aclrtLastErrLevel)
+
+void LoadAclRtApiSymbol(const std::string &ascend_path);
+void LoadSimulationRtApi();
+}  // namespace mindspore::device::ascend
+
+#endif  // MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_RT_SYMBOL_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_symbol.cc b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_symbol.cc
new file mode 100644
index 0000000000000000000000000000000000000000..706b4325a6f69dcec4cc75db5a0fb1d3b8aa2c4a
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_symbol.cc
@@ -0,0 +1,41 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "acl_symbol.h"
+#include <string>
+#include "symbol_utils.h"
+
+namespace mindspore::device::ascend {
+
+aclInitFunObj aclInit_ = nullptr;
+aclFinalizeFunObj aclFinalize_ = nullptr;
+
+void LoadAclApiSymbol(const std::string &ascend_path) {
+  std::string acl_plugin_path = ascend_path + "lib64/libascendcl.so";
+  auto base_handler = GetLibHandler(acl_plugin_path);
+  if (base_handler == nullptr) {
+    LOG_OUT << "Dlopen " << acl_plugin_path << " failed!" << dlerror();
+    return;
+  }
+  aclInit_ = DlsymAscendFuncObj(aclInit, base_handler);
+  aclFinalize_ = DlsymAscendFuncObj(aclFinalize, base_handler);
+  LOG_OUT << "Load acl base api success!";
+}
+
+void LoadSimulationAclApi() {
+  ASSIGN_SIMU(aclInit);
+  ASSIGN_SIMU(aclFinalize);
+}
+}  // namespace mindspore::device::ascend
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_symbol.h b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_symbol.h
new file mode 100644
index 0000000000000000000000000000000000000000..a38f0bbeab8cb11bf510f2f6ac2b38ed02b90230
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_symbol.h
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_SYMBOL_H_
+#define MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_SYMBOL_H_
+#include <string>
+#include "acl/acl_rt_allocator.h"
+#include "hardware/hardware_abstract/dlopen_macro.h"
+#include "hardware/ascend/res_manager/symbol_interface/acl_base_symbol.h"
+
+namespace mindspore::device::ascend {
+
+ORIGIN_METHOD_WITH_SIMU(aclInit, aclError, const char *);
+ORIGIN_METHOD_WITH_SIMU(aclFinalize, aclError);
+
+extern aclInitFunObj aclInit_;
+extern aclFinalizeFunObj aclFinalize_;
+
+void LoadAclApiSymbol(const std::string &ascend_path);
+void LoadSimulationAclApi();
+}  // namespace mindspore::device::ascend
+
+#endif  // MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_SYMBOL_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_tdt_symbol.cc b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_tdt_symbol.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b62cc9721c4339a4142468796decfb3111c41551
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_tdt_symbol.cc
@@ -0,0 +1,123 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "acl_tdt_symbol.h"
+#include <string>
+#include <vector>
+#include "symbol_utils.h"
+
+namespace mindspore::device::ascend {
+
+acltdtAddDataItemFunObj acltdtAddDataItem_ = nullptr;
+acltdtCleanChannelFunObj acltdtCleanChannel_ = nullptr;
+acltdtCreateChannelFunObj acltdtCreateChannel_ = nullptr;
+acltdtCreateChannelWithCapacityFunObj acltdtCreateChannelWithCapacity_ = nullptr;
+acltdtCreateDataItemFunObj acltdtCreateDataItem_ = nullptr;
+acltdtCreateDatasetFunObj acltdtCreateDataset_ = nullptr;
+acltdtDestroyChannelFunObj acltdtDestroyChannel_ = nullptr;
+acltdtDestroyDataItemFunObj acltdtDestroyDataItem_ = nullptr;
+acltdtDestroyDatasetFunObj acltdtDestroyDataset_ = nullptr;
+acltdtGetDataAddrFromItemFunObj acltdtGetDataAddrFromItem_ = nullptr;
+acltdtGetDataItemFunObj acltdtGetDataItem_ = nullptr;
+acltdtGetDatasetNameFunObj acltdtGetDatasetName_ = nullptr;
+acltdtGetDatasetSizeFunObj acltdtGetDatasetSize_ = nullptr;
+acltdtGetDataSizeFromItemFunObj acltdtGetDataSizeFromItem_ = nullptr;
+acltdtGetDataTypeFromItemFunObj acltdtGetDataTypeFromItem_ = nullptr;
+acltdtGetDimNumFromItemFunObj acltdtGetDimNumFromItem_ = nullptr;
+acltdtGetDimsFromItemFunObj acltdtGetDimsFromItem_ = nullptr;
+acltdtGetTensorTypeFromItemFunObj acltdtGetTensorTypeFromItem_ = nullptr;
+acltdtGetSliceInfoFromItemFunObj acltdtGetSliceInfoFromItem_ = nullptr;
+acltdtQueryChannelSizeFunObj acltdtQueryChannelSize_ = nullptr;
+acltdtReceiveTensorFunObj acltdtReceiveTensor_ = nullptr;
+acltdtSendTensorFunObj acltdtSendTensor_ = nullptr;
+acltdtStopChannelFunObj acltdtStopChannel_ = nullptr;
+
+void LoadAcltdtApiSymbol(const std::string &ascend_path) {
+  const std::vector<std::string> depend_libs = {"libacl_tdt_queue.so"};
+  for (const auto &dep_lib : depend_libs) {
+    (void)GetLibHandler(ascend_path + "lib64/" + dep_lib);
+  }
+
+  std::string aclrt_tdt_path = ascend_path + "lib64/libacl_tdt_channel.so";
+  auto handler = GetLibHandler(aclrt_tdt_path);
+  if (handler == nullptr) {
+    LOG_OUT << "Dlopen " << aclrt_tdt_path << " failed!" << dlerror();
+    return;
+  }
+  acltdtAddDataItem_ = DlsymAscendFuncObj(acltdtAddDataItem, handler);
+  acltdtCleanChannel_ = DlsymAscendFuncObj(acltdtCleanChannel, handler);
+  acltdtCreateChannel_ = DlsymAscendFuncObj(acltdtCreateChannel, handler);
+  acltdtCreateChannelWithCapacity_ = DlsymAscendFuncObj(acltdtCreateChannelWithCapacity, handler);
+  acltdtCreateDataItem_ = DlsymAscendFuncObj(acltdtCreateDataItem, handler);
+  acltdtCreateDataset_ = DlsymAscendFuncObj(acltdtCreateDataset, handler);
+  acltdtDestroyChannel_ = DlsymAscendFuncObj(acltdtDestroyChannel, handler);
+  acltdtDestroyDataItem_ = DlsymAscendFuncObj(acltdtDestroyDataItem, handler);
+  acltdtDestroyDataset_ = DlsymAscendFuncObj(acltdtDestroyDataset, handler);
+  acltdtGetDataAddrFromItem_ = DlsymAscendFuncObj(acltdtGetDataAddrFromItem, handler);
+  acltdtGetDataItem_ = DlsymAscendFuncObj(acltdtGetDataItem, handler);
+  acltdtGetDatasetName_ = DlsymAscendFuncObj(acltdtGetDatasetName, handler);
+  acltdtGetDatasetSize_ = DlsymAscendFuncObj(acltdtGetDatasetSize, handler);
+  acltdtGetDataSizeFromItem_ = DlsymAscendFuncObj(acltdtGetDataSizeFromItem, handler);
+  acltdtGetDataTypeFromItem_ = DlsymAscendFuncObj(acltdtGetDataTypeFromItem, handler);
+  acltdtGetDimNumFromItem_ = DlsymAscendFuncObj(acltdtGetDimNumFromItem, handler);
+  acltdtGetDimsFromItem_ = DlsymAscendFuncObj(acltdtGetDimsFromItem, handler);
+  acltdtGetTensorTypeFromItem_ = DlsymAscendFuncObj(acltdtGetTensorTypeFromItem, handler);
+  acltdtGetSliceInfoFromItem_ = DlsymAscendFuncObj(acltdtGetSliceInfoFromItem, handler);
+  acltdtQueryChannelSize_ = DlsymAscendFuncObj(acltdtQueryChannelSize, handler);
+  acltdtReceiveTensor_ = DlsymAscendFuncObj(acltdtReceiveTensor, handler);
+  acltdtSendTensor_ = DlsymAscendFuncObj(acltdtSendTensor, handler);
+  acltdtStopChannel_ = DlsymAscendFuncObj(acltdtStopChannel, handler);
+  LOG_OUT << "Load acl tdt api success!";
+}
+
+void LoadSpecialSimulationTdtApi() {
+  acltdtQueryChannelSize_ = [](const acltdtChannelHandle *handle, size_t *ret_size_ptr) {
+    if (handle == nullptr) {
+      LOG_OUT << "Empty handle!";
+    }
+    if (ret_size_ptr != nullptr) {
+      *ret_size_ptr = 1;
+    }
+    return ACL_SUCCESS;
+  };
+}
+
+void LoadSimulationTdtApi() {
+  ASSIGN_SIMU(acltdtAddDataItem);
+  ASSIGN_SIMU(acltdtCleanChannel);
+  ASSIGN_SIMU(acltdtCreateChannel);
+  ASSIGN_SIMU(acltdtCreateChannelWithCapacity);
+  ASSIGN_SIMU(acltdtCreateDataItem);
+  ASSIGN_SIMU(acltdtCreateDataset);
+  ASSIGN_SIMU(acltdtDestroyChannel);
+  ASSIGN_SIMU(acltdtDestroyDataItem);
+  ASSIGN_SIMU(acltdtDestroyDataset);
+  ASSIGN_SIMU(acltdtGetDataAddrFromItem);
+  ASSIGN_SIMU(acltdtGetDataItem);
+  ASSIGN_SIMU(acltdtGetDatasetName);
+  ASSIGN_SIMU(acltdtGetDatasetSize);
+  ASSIGN_SIMU(acltdtGetDataSizeFromItem);
+  ASSIGN_SIMU(acltdtGetDataTypeFromItem);
+  ASSIGN_SIMU(acltdtGetDimNumFromItem);
+  ASSIGN_SIMU(acltdtGetDimsFromItem);
+  ASSIGN_SIMU(acltdtGetTensorTypeFromItem);
+  ASSIGN_SIMU(acltdtGetSliceInfoFromItem);
+  ASSIGN_SIMU(acltdtQueryChannelSize);
+  ASSIGN_SIMU(acltdtReceiveTensor);
+  ASSIGN_SIMU(acltdtSendTensor);
+  ASSIGN_SIMU(acltdtStopChannel);
+  LoadSpecialSimulationTdtApi();
+}
+}  // namespace mindspore::device::ascend
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_tdt_symbol.h b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_tdt_symbol.h
new file mode 100644
index 0000000000000000000000000000000000000000..1761f47a578d1f3bc50be855a373448fadd0b7f5
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/acl_tdt_symbol.h
@@ -0,0 +1,54 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_TDT_SYMBOL_H_
+#define MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_TDT_SYMBOL_H_
+#include <cstddef>
+#include <string>
+#include "acl/acl_tdt.h"
+#include "hardware/hardware_abstract/dlopen_macro.h"
+
+namespace mindspore::device::ascend {
+
+ORIGIN_METHOD_WITH_SIMU(acltdtAddDataItem, aclError, acltdtDataset *, acltdtDataItem *)
+ORIGIN_METHOD_WITH_SIMU(acltdtCleanChannel, aclError, acltdtChannelHandle *)
+ORIGIN_METHOD_WITH_SIMU(acltdtCreateChannel, acltdtChannelHandle *, uint32_t, const char *)
+ORIGIN_METHOD_WITH_SIMU(acltdtCreateChannelWithCapacity, acltdtChannelHandle *, uint32_t, const char *, size_t)
+ORIGIN_METHOD_WITH_SIMU(acltdtCreateDataItem, acltdtDataItem *, acltdtTensorType, const int64_t *, size_t, aclDataType,
+                        void *, size_t)
+ORIGIN_METHOD_WITH_SIMU(acltdtCreateDataset, acltdtDataset *)
+ORIGIN_METHOD_WITH_SIMU(acltdtDestroyChannel, aclError, acltdtChannelHandle *)
+ORIGIN_METHOD_WITH_SIMU(acltdtDestroyDataItem, aclError, acltdtDataItem *)
+ORIGIN_METHOD_WITH_SIMU(acltdtDestroyDataset, aclError, acltdtDataset *)
+ORIGIN_METHOD_WITH_SIMU(acltdtGetDataAddrFromItem, void *, const acltdtDataItem *)
+ORIGIN_METHOD_WITH_SIMU(acltdtGetDataItem, acltdtDataItem *, const acltdtDataset *, size_t)
+ORIGIN_METHOD_WITH_SIMU(acltdtGetDatasetName, const char *, const acltdtDataset *)
+ORIGIN_METHOD_WITH_SIMU(acltdtGetDatasetSize, size_t, const acltdtDataset *)
+ORIGIN_METHOD_WITH_SIMU(acltdtGetDataSizeFromItem, size_t, const acltdtDataItem *)
+ORIGIN_METHOD_WITH_SIMU(acltdtGetDataTypeFromItem, aclDataType, const acltdtDataItem *)
+ORIGIN_METHOD_WITH_SIMU(acltdtGetDimNumFromItem, size_t, const acltdtDataItem *)
+ORIGIN_METHOD_WITH_SIMU(acltdtGetDimsFromItem, aclError, const acltdtDataItem *, int64_t *, size_t)
+ORIGIN_METHOD_WITH_SIMU(acltdtGetTensorTypeFromItem, acltdtTensorType, const acltdtDataItem *)
+ORIGIN_METHOD_WITH_SIMU(acltdtGetSliceInfoFromItem, aclError, const acltdtDataItem *, size_t *, size_t *)
+ORIGIN_METHOD_WITH_SIMU(acltdtQueryChannelSize, aclError, const acltdtChannelHandle *, size_t *)
+ORIGIN_METHOD_WITH_SIMU(acltdtReceiveTensor, aclError, const acltdtChannelHandle *, acltdtDataset *, int32_t)
+ORIGIN_METHOD_WITH_SIMU(acltdtSendTensor, aclError, const acltdtChannelHandle *, const acltdtDataset *, int32_t)
+ORIGIN_METHOD_WITH_SIMU(acltdtStopChannel, aclError, acltdtChannelHandle *)
+
+void LoadAcltdtApiSymbol(const std::string &ascend_path);
+void LoadSimulationTdtApi();
+}  // namespace mindspore::device::ascend
+
+#endif  // MINDSPORE_CCSRC_TRANSFORM_SYMBOL_ACL_TDT_SYMBOL_H_
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/symbol_utils.cc b/inferrt/src/hardware/ascend/res_manager/symbol_interface/symbol_utils.cc
new file mode 100644
index 0000000000000000000000000000000000000000..52b37bf43c5045bdb75a9c194cdbe6454aaee3b4
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/symbol_utils.cc
@@ -0,0 +1,96 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "symbol_utils.h"
+#include <string>
+#include "acl_base_symbol.h"
+#include "acl_compiler_symbol.h"
+#include "acl_mdl_symbol.h"
+#include "acl_op_symbol.h"
+#include "acl_rt_allocator_symbol.h"
+#include "acl_rt_symbol.h"
+#include "acl_symbol.h"
+#include "acl_tdt_symbol.h"
+
+namespace mindspore::device::ascend {
+
+static bool load_ascend_api = false;
+static bool load_simulation_api = false;
+
+void *GetLibHandler(const std::string &lib_path, bool if_global) {
+  void *handler = nullptr;
+  if (if_global) {
+    handler = dlopen(lib_path.c_str(), RTLD_LAZY | RTLD_GLOBAL);
+  } else {
+    handler = dlopen(lib_path.c_str(), RTLD_LAZY | RTLD_LOCAL);
+  }
+  if (handler == nullptr) {
+    LOG_OUT << "Dlopen " << lib_path << " failed!" << dlerror();
+  }
+  return handler;
+}
+
+std::string GetAscendPath() {
+  Dl_info info;
+  if (dladdr(reinterpret_cast<void *>(aclrtMalloc), &info) == 0) {
+    LOG_ERROR << "Get dladdr failed.";
+    return "";
+  }
+  auto path_tmp = std::string(info.dli_fname);
+  const std::string kLatest = "latest";
+  auto pos = path_tmp.rfind(kLatest);
+  if (pos == std::string::npos) {
+    LOG_ERROR << "Get ascend path failed, please check whether CANN packages are installed correctly, \n"
+                 "and environment variables are set by source ${LOCAL_ASCEND}/ascend-toolkit/set_env.sh.";
+  }
+  return path_tmp.substr(0, pos) + kLatest + "/";
+}
+
+void LoadAscendApiSymbols() {
+  if (load_ascend_api) {
+    LOG_OUT << "Ascend api is already loaded.";
+    return;
+  }
+  std::string ascend_path = GetAscendPath();
+  LoadAclBaseApiSymbol(ascend_path);
+  LoadAclOpCompilerApiSymbol(ascend_path);
+  LoadAclMdlApiSymbol(ascend_path);
+  LoadAclOpApiSymbol(ascend_path);
+  LoadAclAllocatorApiSymbol(ascend_path);
+  LoadAclRtApiSymbol(ascend_path);
+  LoadAclApiSymbol(ascend_path);
+  LoadAcltdtApiSymbol(ascend_path);
+  load_ascend_api = true;
+  LOG_OUT << "Load ascend api success!";
+}
+
+void LoadSimulationApiSymbols() {
+  if (load_simulation_api) {
+    LOG_OUT << "Simulation api is already loaded.";
+    return;
+  }
+
+  LoadSimulationAclBaseApi();
+  LoadSimulationRtApi();
+  LoadSimulationTdtApi();
+  LoadSimulationAclOpCompilerApi();
+  LoadSimulationAclMdlApi();
+  LoadSimulationAclOpApi();
+  LoadSimulationAclAllocatorApi();
+  LoadSimulationAclApi();
+  load_simulation_api = true;
+  LOG_OUT << "Load simulation api success!";
+}
+}  // namespace mindspore::device::ascend
diff --git a/inferrt/src/hardware/ascend/res_manager/symbol_interface/symbol_utils.h b/inferrt/src/hardware/ascend/res_manager/symbol_interface/symbol_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..8d6538aa1e9ea2130f5046b524be9ed4402f9221
--- /dev/null
+++ b/inferrt/src/hardware/ascend/res_manager/symbol_interface/symbol_utils.h
@@ -0,0 +1,85 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_TRANSFORM_SYMBOL_SYMBOL_UTILS_H_
+#define MINDSPORE_CCSRC_TRANSFORM_SYMBOL_SYMBOL_UTILS_H_
+#include <string>
+#include "common/common.h"
+#include "acl/acl.h"
+#include "hardware/hardware_abstract/visible.h"
+
+extern "C" HARDWARE_EXPORT int (*aclrt_get_last_error)(int);
+
+#ifndef ACL_ERROR_RT_DEVICE_MEM_ERROR
+#define ACL_ERROR_RT_DEVICE_MEM_ERROR 507053
+#endif
+#ifndef ACL_ERROR_RT_HBM_MULTI_BIT_ECC_ERROR
+#define ACL_ERROR_RT_HBM_MULTI_BIT_ECC_ERROR 507054
+#endif
+#ifndef ACL_ERROR_RT_COMM_OP_RETRY_FAIL
+#define ACL_ERROR_RT_COMM_OP_RETRY_FAIL 507904
+#endif
+#ifndef ACL_ERROR_RT_DEVICE_TASK_ABORT
+#define ACL_ERROR_RT_DEVICE_TASK_ABORT 107022
+#endif
+const int thread_level = 0;
+
+template <typename Function, typename... Args>
+auto RunAscendApi(Function f, int line, const char *call_f, const char *func_name, Args... args) {
+  if (f == nullptr) {
+    LOG_ERROR << func_name << " is null.";
+  }
+
+  if constexpr (std::is_same_v<std::invoke_result_t<decltype(f), Args...>, int>) {
+    auto ret = f(args...);
+    return ret;
+  } else {
+    return f(args...);
+  }
+}
+
+template <typename Function>
+auto RunAscendApi(Function f, int line, const char *call_f, const char *func_name) {
+  if (f == nullptr) {
+    LOG_ERROR << func_name << " is null.";
+  }
+  if constexpr (std::is_same_v<std::invoke_result_t<decltype(f)>, int>) {
+    auto ret = f();
+    return ret;
+  } else {
+    return f();
+  }
+}
+
+template <typename Function>
+bool HasAscendApi(Function f) {
+  return f != nullptr;
+}
+
+namespace mindspore::device::ascend {
+
+#define CALL_ASCEND_API(func_name, ...) \
+  RunAscendApi(mindspore::device::ascend::func_name##_, __LINE__, __FUNCTION__, #func_name, ##__VA_ARGS__)
+
+#define HAS_ASCEND_API(func_name) HasAscendApi(mindspore::device::ascend::func_name##_)
+
+std::string GetAscendPath();
+void *GetLibHandler(const std::string &lib_path, bool if_global = false);
+void LoadAscendApiSymbols();
+void LoadSimulationApiSymbols();
+}  // namespace mindspore::device::ascend
+
+#endif  // MINDSPORE_CCSRC_TRANSFORM_SYMBOL_SYMBOL_UTILS_H_
diff --git a/inferrt/src/hardware/cpu/CMakeLists.txt b/inferrt/src/hardware/cpu/CMakeLists.txt
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..985e282b325ed23f7f5748c170443dd7040a18bc 100644
--- a/inferrt/src/hardware/cpu/CMakeLists.txt
+++ b/inferrt/src/hardware/cpu/CMakeLists.txt
@@ -0,0 +1,4 @@
+# check_debug_log_out()
+
+# file(GLOB_RECURSE HARDWARE_CPU_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+# add_library(hardware_cpu_obj STATIC ${HARDWARE_CPU_SRC_FILES})
\ No newline at end of file
diff --git a/inferrt/src/hardware/cpu/cpu_device_context.cc b/inferrt/src/hardware/cpu/cpu_device_context.cc
new file mode 100644
index 0000000000000000000000000000000000000000..fd1aa5dc7c0387a9023ac67d260138d4d7dcae50
--- /dev/null
+++ b/inferrt/src/hardware/cpu/cpu_device_context.cc
@@ -0,0 +1,641 @@
+/**
+ * Copyright 2021-2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/cpu/cpu_device_context.h"
+#include <map>
+#include <string>
+#include <unordered_set>
+#include <utility>
+
+#include "hardware/cpu/res_manager/mem_manager/cpu_memory_manager.h"
+
+namespace mindspore {
+namespace device {
+namespace cpu {
+namespace {
+const char kModelNameCPU[] = "CPU";
+const char kEventOptimizeGraph[] = "OptimizeGraph";
+const char kStageSetKernelInfo[] = "SetKernelInfo";
+
+std::pair<bool, size_t> MatchMultiDynamicKernelAttr(const kernel::KernelAttr &kernel_attr,
+                                                    const std::vector<int64_t> &dyn_input_sizes,
+                                                    const std::vector<kernel::KernelAttr> &kernel_attr_list) {
+  auto output_num = kernel_attr.GetOutputSize();
+  for (size_t index = 0; index < kernel_attr_list.size(); ++index) {
+    // support multi dynamic inputs.
+    const auto &cur_kernel_attr = kernel_attr_list[index];
+    auto cur_input_num = cur_kernel_attr.GetInputSize();
+    if (dyn_input_sizes.size() != cur_input_num) {
+      MS_LOG(EXCEPTION) << "Kernel attr's input num: " << cur_input_num
+                        << ", is not equal to dynamic input size: " << dyn_input_sizes.size();
+    }
+    bool mis_match = false;
+    size_t input_index = 0;
+    for (size_t i = 0; i < cur_input_num; ++i) {
+      int64_t dyn_input_size = dyn_input_sizes[i];
+      if (dyn_input_size < 0) {
+        dyn_input_size = 1;
+      }
+      auto dtype = cur_kernel_attr.GetInputAttr(i).dtype;
+      for (size_t j = 0; j < LongToSize(dyn_input_size); ++j) {
+        if (kernel_attr.GetInputAttr(input_index).dtype != dtype) {
+          mis_match = true;
+          break;
+        }
+        ++input_index;
+      }
+      if (mis_match) {
+        break;
+      }
+    }
+    if (mis_match) {
+      continue;
+    }
+
+    // only support one dynamic output. TODO: support multi dynamic output.
+    for (size_t i = 0; i < output_num; ++i) {
+      auto dtype = cur_kernel_attr.GetOutputAttr(i).dtype;
+      if (kernel_attr.GetInputAttr(i).dtype != dtype) {
+        mis_match = true;
+        break;
+      }
+    }
+    if (!mis_match) {
+      return std::make_pair(true, index);
+    }
+  }
+  return std::make_pair(false, 0);
+}
+
+runtime::KernelTaskPtr GetTaskByTaskType(const runtime::KernelTaskType &task_type,
+                                         const std::shared_ptr<runtime::KernelTaskContext> &task_context) {
+  switch (task_type) {
+    case runtime::KernelTaskType::kCONTIGUOUS_TASK:
+      return std::make_shared<CpuContiguousKernelTask>(task_context);
+    case runtime::KernelTaskType::kCOPY_TASK:
+      return std::make_shared<CpuCopyWithSliceKernelTask>(task_context);
+    default:
+      MS_LOG(EXCEPTION) << "KernelTaskType is invalid, task_type:" << task_type;
+  }
+}
+
+void MallocMemoryForDeviceAddress(device::DeviceAddress *device_address, const device::DeviceContext *device_context) {
+  MS_EXCEPTION_IF_NULL(device_address);
+  device::tracker::CALL_MEMORY_TRACKER_WITH_FILE(AddTask, "Graph", "Contiguous", "");
+  device::tracker::CALL_MEMORY_TRACKER_WITH_FILE(AddMemInfo, "Graph", device::tracker::MemType::kPyNativeOutput,
+                                                 device_address->GetSize(), device_address);
+  if (device_address->GetPtr() == nullptr) {
+    if (!device_context->device_res_manager_->AllocateMemory(device_address)) {
+      MS_LOG(EXCEPTION) << "Allocate device memory failed!";
+    }
+  }
+}
+
+}  // namespace
+
+void SetCpuRefMapToKernelInfo(const CNodePtr &apply_kernel, const std::vector<kernel::KernelAttr> &apply_kernel_attrs) {
+  MS_EXCEPTION_IF_NULL(apply_kernel);
+  auto kernel_attrs = apply_kernel_attrs;
+  if (kernel_attrs.empty()) {
+    return;
+  }
+
+  auto build_info = AnfAlgo::GetSelectKernelBuildInfo(apply_kernel);
+  MS_EXCEPTION_IF_NULL(build_info);
+  auto kernel_attr = GetKernelAttrFromBuildInfo(build_info);
+  std::vector<int64_t> dyn_input_sizes = {};
+  if (common::AnfAlgo::HasNodeAttr(kAttrDynInputSizes, apply_kernel)) {
+    dyn_input_sizes = common::AnfAlgo::GetNodeAttr<std::vector<int64_t>>(apply_kernel, kAttrDynInputSizes);
+  }
+  std::pair<bool, int64_t> match_result;
+
+  if (kernel_attrs[0].GetSkipCheck()) {
+    // If kernel skips attr check, we need to synchronize the ref map in case it's discarded.
+    SyncOutInRef(kernel_attrs[0], &kernel_attr);
+    kernel_attrs[0] = kernel_attr;
+    match_result = {true, 0};
+  } else if (dyn_input_sizes.empty() || kernel_attrs[0].GetAllSame()) {
+    match_result = MatchKernelAttr(kernel_attr, kernel_attrs);
+  } else {
+    match_result = MatchMultiDynamicKernelAttr(kernel_attr, dyn_input_sizes, kernel_attrs);
+  }
+
+  auto [is_match, index] = match_result;
+  if (!is_match) {
+    MS_LOG_WITH_NODE(EXCEPTION, apply_kernel)
+      << apply_kernel->fullname_with_scope() << " does not support this kernel data type: " << build_info->ToString()
+      << ", node debug name: " << apply_kernel->DebugString(AnfNode::DebugStringLevel::kLevel2);
+  }
+
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(apply_kernel->kernel_info());
+  MS_EXCEPTION_IF_NULL(kernel_info);
+  const auto &matched_kernel_attr = kernel_attrs[index];
+  if (!matched_kernel_attr.GetOutInRefMap().empty() || matched_kernel_attr.GetAllOutInRef()) {
+    kernel_info->set_ref_map(matched_kernel_attr.GetAllOutInRef(), matched_kernel_attr.GetOutInRefMap());
+  }
+}
+
+using mindspore::kernel::KernelBuildInfo;
+
+void CPUDeviceContext::Initialize() {
+#ifdef __APPLE__
+  std::lock_guard<SpinLock> spin_lock(init_lock_);
+#else
+  std::lock_guard<std::mutex> lock(init_mutex_);
+#endif
+  if (initialized_) {
+    return;
+  }
+  MS_EXCEPTION_IF_NULL(device_res_manager_);
+  device_res_manager_->Initialize();
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
+  if (ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kCPUDevice) {
+    // Dump json config file if dump is enabled.
+    uint32_t rank_id = 0;
+    auto &json_parser = DumpJsonParser::GetInstance();
+    json_parser.Parse();
+    json_parser.CopyDumpJsonToDir(rank_id);
+    json_parser.CopyMSCfgJsonToDir(rank_id);
+  }
+#ifdef __linux__
+  if (ms_context->IsDefaultDeviceTarget() && ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kCPUDevice) {
+    MS_LOG(INFO)
+      << "No device_target set, set CPU as default. You can call mindspore.set_context(device_target=\"XXX\")";
+  }
+#endif  // __linux__
+  initialized_ = true;
+}
+
+void CPUDeviceContext::Destroy() {
+  MS_EXCEPTION_IF_NULL(device_res_manager_);
+  device_res_manager_->Destroy();
+  initialized_ = false;
+}
+
+void CPUKernelExecutor::OptimizeGraph(const FuncGraphPtr &graph) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  auto kernel_graph = graph->cast<KernelGraphPtr>();
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
+  auto enable_lazy_inline = ms_context->CellReuseLevel() != CellReuseLevel::kNoCellReuse;
+  if (enable_lazy_inline) {
+    MS_LOG(EXCEPTION) << "CPU does not support the lazy_inline feature, "
+                      << "please do not mark @lazy_inline in cell's __init__ func.";
+  }
+  if (kernel_graph->is_from_single_op()) {
+    SetOperatorInfo(kernel_graph);
+    SingleOpGraphOptimize(kernel_graph);
+    UpdateKernelRefInfo(kernel_graph);
+  } else {
+    // The passes in this function must be before ops select: SetOperatorInfo()
+    OptimizeMindIR(kernel_graph);
+    // Update Graph Dynamic Shape Attr.
+    opt::AddDynamicShapeAttrPass(kernel_graph);
+
+    SetOperatorInfo(kernel_graph);
+    // SetOperatorInfo may generate new node, so need set kernel object type again.
+    kernel_graph->SetKernelObjectTypesForUnrealNodes();
+#ifdef ENABLE_DUMP_IR
+    if (ms_context->CanDump(kIntroductory)) {
+      DumpIR("hwopt_comm_after_kernel_select_" + graph->ToString() + ".ir", graph, true);
+    }
+#endif
+
+    OptimizeGraphImpl(kernel_graph);
+
+    // Run final optimization.
+    opt::CommonFinalOptimization(kernel_graph);
+
+    // Run graph kernel fusion optimization
+    if (graphkernel::GraphKernelFlags::GetInstance().IsEnableGraphKernel()) {
+      graphkernel::GraphKernelOptimize(kernel_graph);
+      kernel_graph->SetExecOrderByDefault();
+    }
+  }
+}
+
+void CPUKernelExecutor::UpdateKernelRefInfo(const KernelGraphPtr &graph) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  const std::vector<CNodePtr> &kernels = graph->execution_order();
+  for (const auto &kernel : kernels) {
+    MS_EXCEPTION_IF_NULL(kernel);
+    const std::string &op_name = common::AnfAlgo::GetCNodeName(kernel);
+    if (IsPrimitiveCNode(kernel, prim::kPrimCustom) &&
+        mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kImplyCPU) == nullptr) {
+      MS_LOG(DEBUG) << "Not find operator information for Custom operator [" << op_name << "]";
+      return;
+    }
+
+    auto kernel_attr_list = kernel::NativeCpuKernelMod::GetCpuSupportedList(op_name);
+    if (kernel_attr_list.empty()) {
+      MS_LOG(DEBUG) << "kernel_attr_list is empty";
+      return;
+    }
+
+    auto kernel_info = dynamic_cast<device::KernelInfo *>(kernel->kernel_info());
+    MS_EXCEPTION_IF_NULL(kernel_info);
+    kernel_info->set_ref_map(kernel_attr_list[0].GetAllOutInRef(), kernel_attr_list[0].GetOutInRefMap());
+  }
+}
+
+void CPUKernelExecutor::OptimizeMindIR(const KernelGraphPtr &graph) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  pm->AddPass(std::make_shared<opt::SoftmaxGradFusionCpu>("softmax_grad_fusion_cpu"));
+  // Match MatMul+BiasAdd+ReLU first, if no match, then match MatMul+BiasAdd
+  pm->AddPass(std::make_shared<opt::MatMulBiasAddReluFusionCPU>("matmul_biasadd_relu_fusion_cpu"));
+  pm->AddPass(std::make_shared<opt::DynamicSequenceOpsAdaptation>());
+
+  // Do communication op fusion before InsertTensorMoveForCommunication pass.
+  // So these passes are before kernel select process, no need to generate kernel build info in them.
+  if (parallel::ParallelContext::GetInstance()->enable_all_reduce_fusion()) {
+    MS_LOG(INFO) << "Parallel comm_fusion of AllReduce is enabled.";
+    pm->AddPass(std::make_shared<opt::AllReduceFusion>());
+  }
+
+  optimizer->AddPassManager(pm);
+  (void)optimizer->Optimize(graph);
+  graph->SetExecOrderByDefault();
+}
+
+void CPUKernelExecutor::OptimizeGraphImpl(const KernelGraphPtr &graph) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  pm->AddPass(std::make_shared<opt::InsertTypeTransformOp>("insert_type_transform_op"));
+  pm->AddPass(std::make_shared<opt::FlattenValueSequenceInPyExecute>("flatten_value_sequence_in_pyexecute"));
+  pm->AddPass(std::make_shared<opt::InsertFormatTransformOpCPU>("insert_format_transform_op_cpu"));
+  pm->AddPass(std::make_shared<opt::InsertCastCPU>("insert_cast"));
+  pm->AddPass(std::make_shared<opt::EraseVisitAttr>());
+  pm->AddPass(std::make_shared<opt::InsertTensorMoveForCommunication>());
+  pm->AddPass(std::make_shared<opt::AddTrainingAttr>());
+  pm->AddPass(std::make_shared<opt::PrintValueType>("print_value_type"));
+  pm->AddPass(std::make_shared<opt::InsertCastToPyExecute>("insert_cast_for_pyexecute"));
+  optimizer->AddPassManager(pm);
+  (void)optimizer->Optimize(graph);
+  graph->SetExecOrderByDefault();
+}
+
+void CPUKernelExecutor::SingleOpGraphOptimize(const KernelGraphPtr &graph) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  pm->AddPass(std::make_shared<opt::InsertCastCPU>("insert_cast"));
+  optimizer->AddPassManager(pm);
+  (void)optimizer->Optimize(graph);
+  graph->SetExecOrderByDefault();
+}
+
+namespace {
+void SetControlOpInfo(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  std::vector<std::string> inputs_format;
+  std::vector<TypeId> inputs_type;
+  size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
+  for (size_t input_index = 0; input_index < input_num; ++input_index) {
+    (void)inputs_format.emplace_back(kOpFormat_DEFAULT);
+    inputs_type.push_back(common::AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index));
+  }
+  std::vector<std::string> outputs_format;
+  std::vector<TypeId> outputs_type;
+  size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
+  for (size_t output_index = 0; output_index < output_num; ++output_index) {
+    (void)outputs_format.emplace_back(kOpFormat_DEFAULT);
+    outputs_type.push_back(common::AnfAlgo::GetOutputInferDataType(kernel_node, output_index));
+  }
+
+  auto builder = std::make_shared<KernelBuildInfo::KernelBuildInfoBuilder>();
+  builder->SetInputsFormat(inputs_format);
+  builder->SetInputsDeviceType(inputs_type);
+  builder->SetOutputsFormat(outputs_format);
+  builder->SetOutputsDeviceType(outputs_type);
+
+  AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), kernel_node.get());
+}
+
+// Before creating the kernel, check whether the node has completed the operator selection. If not, the operator
+// selection needs to be performed to set kernel info.
+void SetKernelInfoBeforeCreateKernel(const std::vector<CNodePtr> &nodes) {
+  // Check whether the node has completed operator selection.
+  for (const auto &node : nodes) {
+    if (AnfAlgo::GetSelectKernelBuildInfo(node) != nullptr) {
+      continue;
+    }
+
+    // Kernel selection process for non control op.
+    if (!common::AnfAlgo::IsBpropCutOpExecInBackend(node)) {
+      auto [msg, etype] = SetKernelInfoWithMsg(node);
+      if (!msg.empty()) {
+        MS_EXCEPTION(etype) << "#umsg#Kernel select failed:#umsg#" << msg;
+      }
+    } else {
+      // Kernel selection process for control op.
+      SetControlOpInfo(node);
+    }
+  }
+}
+}  // namespace
+
+void CPUKernelExecutor::SetOperatorInfo(const KernelGraphPtr &graph) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  uint64_t start_time = profiler::GetClockSyscnt();
+  bool do_expand = false;
+  auto mng = graph->manager();
+  if (mng == nullptr) {
+    mng = Manage(graph, true);
+    MS_EXCEPTION_IF_NULL(mng);
+    graph->set_manager(mng);
+  }
+  auto &node_list = graph->execution_order();
+  for (auto &node : node_list) {
+    if (!common::AnfAlgo::IsBpropCutOpExecInBackend(node)) {
+      auto [msg, etype] = SetKernelInfoWithMsg(node);
+      if (msg.empty()) {
+        continue;
+      }
+      auto f = [](const CNodePtr &n) {
+        auto res = SetKernelInfoWithMsg(n);
+        return res.first.empty();
+      };
+      auto expand_ret = expander::TryExpandCNode(node, f);
+      if (!expand_ret) {
+        MS_EXCEPTION(etype) << "#umsg#Kernel select failed:#umsg#" << msg
+                            << "\nnode: " << node->DebugString(AnfNode::DebugStringLevel::kLevel2);
+      }
+      MS_LOG(INFO) << msg << " but expand success.";
+      do_expand = true;
+    } else {
+      SetControlOpInfo(node);
+    }
+  }
+  if (do_expand) {
+    (void)opt::BindValueToGraph().Run(graph);
+    graph->SetExecOrderByDefault();
+  }
+  (void)profiler::CollectHostInfo(kModelNameCPU, kEventOptimizeGraph, kStageSetKernelInfo, start_time,
+                                  profiler::GetClockSyscnt(), 1);
+}
+
+kernel::KernelModPtr CPUKernelExecutor::CreateKernelMod(const std::string &op_name) const {
+  if (kernel::IsOpPluginKernel(op_name)) {
+    return kernel::Factory<kernel::CustomOpPluginCpuKernelMod>::Instance().Create(op_name);
+  }
+  return kernel::Factory<kernel::NativeCpuKernelMod>::Instance().Create(op_name);
+}
+
+void CPUKernelExecutor::CreateKernel(const std::vector<CNodePtr> &nodes) const {
+  SetKernelInfoBeforeCreateKernel(nodes);
+
+  kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
+  std::vector<AnfNodePtr> akg_nodes;
+  for (const auto &node : nodes) {
+    MS_EXCEPTION_IF_NULL(node);
+    if (common::AnfAlgo::IsBpropCutOpExecInBackend(node)) {
+      continue;
+    }
+    if (session::AnfRuntimeAlgorithm::GetKernelType(node) == KernelType::AKG_KERNEL) {
+      if (!bin_map->initialized()) {
+        bin_map->Initialize();
+      }
+      akg_nodes.push_back(node);
+      continue;
+    }
+    std::string kernel_name = common::AnfAlgo::GetCNodeName(node);
+
+    std::shared_ptr<kernel::NativeCpuKernelMod> cpu_kernel =
+      std::dynamic_pointer_cast<kernel::NativeCpuKernelMod>(CreateKernelMod(kernel_name));
+
+    if (cpu_kernel == nullptr) {
+      MS_LOG(INTERNAL_EXCEPTION) << "#dmsg#Kernel build failed:#dmsg#Build cpu operator[" << node->fullname_with_scope()
+                                 << "] failed";
+    }
+
+    auto kernel_attrs = cpu_kernel->GetOpSupport();
+    SetCpuRefMapToKernelInfo(node, kernel_attrs);
+    auto thread_pool = kernel::GetActorMgrInnerThreadPool();
+    cpu_kernel->SetThreadPool(thread_pool);
+    std::vector<KernelTensor *> input_kernel_tensors = AnfAlgo::GetOrCreateAllInputKernelTensors(node);
+    std::vector<KernelTensor *> output_kernel_tensors = AnfAlgo::GetOrCreateAllOutputKernelTensors(node);
+    auto ret = cpu_kernel->Init(common::AnfAlgo::GetCNodePrimitive(node), input_kernel_tensors, output_kernel_tensors);
+    if (!ret) {
+      MS_LOG_WITH_NODE(EXCEPTION, node) << trace::DumpSourceLines(node);
+    }
+    if (kernel::CheckResizeCondition(node)) {
+      if (cpu_kernel->Resize(input_kernel_tensors, output_kernel_tensors) == kernel::KRET_RESIZE_FAILED) {
+        MS_LOG(INTERNAL_EXCEPTION) << "#dmsg#Kernel build failed:#dmsg#CPU kernel op [" << node->fullname_with_scope()
+                                   << "] resize failed.";
+      }
+    }
+
+    AnfAlgo::SetKernelMod(cpu_kernel, node.get());
+  }
+#ifdef ENABLE_AKG
+  kernel::AkgCpuKernelBuilder akg_cpu_kernel_builder;
+  (void)akg_cpu_kernel_builder.SingleOpParallelBuild(akg_nodes);
+#endif
+}
+
+std::vector<size_t> CPUKernelExecutor::GetLaunchIgnoredInputAddressIdx(const AnfNodePtr &node) const {
+  MS_EXCEPTION_IF_NULL(node);
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+  MS_EXCEPTION_IF_NULL(kernel_info);
+  auto kernel_mod = kernel_info->MutableKernelMod();
+  MS_EXCEPTION_IF_NULL(kernel_mod);
+  return kernel_mod->GetLaunchIgnoredInputAddressIdx();
+}
+
+bool CPUKernelExecutor::IsLaunchIgnoredInputAddressIdx(const AnfNodePtr &node, size_t input_idx) const {
+  auto ignored_input_list = GetLaunchIgnoredInputAddressIdx(node);
+  if (std::find(ignored_input_list.begin(), ignored_input_list.end(), input_idx) != ignored_input_list.end()) {
+    return true;
+  }
+  return false;
+}
+
+void CPUKernelExecutor::PreprocessBeforeRun(const FuncGraphPtr &graph) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  auto kernel_graph = graph->cast<KernelGraphPtr>();
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  if (!kernel_graph->is_from_single_op()) {
+    // Remove reorder after PS feature finish adapting push/pull in auto_monad.
+    auto execution_order = kernel_graph->execution_order();
+    common::AnfAlgo::ReorderPosteriorExecList(NOT_NULL(&execution_order));
+    kernel_graph->set_execution_order(execution_order);
+  }
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
+  // somas
+  if (runtime::RuntimeConf::GetInstance()->mem_optimize_level() != kOptimizeO0) {
+    auto somas = std::make_shared<CPUSomas>();
+    bool ret = somas->Assign(kernel_graph);
+    if (ret) {
+      MS_LOG(INFO) << "Somas allocate success for graph " << kernel_graph->graph_id()
+                   << " somas size: " << kernel_graph->somas_whole_block_size();
+    } else if (somas->IsSupportSomas(*kernel_graph)) {
+      MS_LOG(WARNING) << "Somas allocate failed for graph " << kernel_graph->graph_id();
+    }
+  }
+  MS_LOG(INFO) << "Status record: end preprocess before run graph. graph id: " << kernel_graph->graph_id();
+}
+
+bool CPUKernelExecutor::LaunchKernel(const CNodePtr &kernel, const std::vector<KernelTensor *> &inputs,
+                                     const std::vector<KernelTensor *> &workspace,
+                                     const std::vector<KernelTensor *> &outputs, KernelMod *kernel_mod,
+                                     void * /* stream */) const {
+  MS_EXCEPTION_IF_NULL(kernel);
+
+  const auto &profiler_inst = profiler::cpu::CPUProfiler::GetInstance();
+  MS_EXCEPTION_IF_NULL(profiler_inst);
+  if (profiler_inst->GetEnableFlag() && profiler_inst->GetOpTimeFlag()) {
+    auto ret = LaunchKernelWithProfiling(kernel, inputs, workspace, outputs, kernel_mod);
+    return ret;
+  }
+  auto ret = DoLaunchKernel(kernel, inputs, workspace, outputs, kernel_mod);
+  return ret;
+}
+
+bool CPUKernelExecutor::ExecuteKernelTask(const runtime::KernelTaskType &task_type,
+                                          const device::DeviceAddressPtrList &input_addr_list,
+                                          const device::DeviceAddressPtrList &output_addr_list,
+                                          const size_t &stream_id) const {
+  auto task_context =
+    std::make_shared<runtime::KernelTaskContext>(device_context_, input_addr_list, output_addr_list, nullptr);
+  auto task = GetTaskByTaskType(task_type, task_context);
+  MS_EXCEPTION_IF_NULL(task);
+
+  auto ret = task->RunWithRet();
+  if (!ret) {
+    MS_LOG(EXCEPTION) << "Exec task failed, task_type:" << task_type;
+  }
+  return ret;
+}
+
+bool CPUKernelExecutor::ExecuteKernelTask(const runtime::KernelTaskType &task_type,
+                                          const std::vector<device::DeviceAddress *> &input_addr_list,
+                                          const std::vector<device::DeviceAddress *> &output_addr_list,
+                                          const size_t &stream_id) const {
+  if (task_type != runtime::KernelTaskType::kCONTIGUOUS_TASK) {
+    MS_LOG(EXCEPTION) << "KernelTaskType not supported, task_type:" << task_type;
+  }
+  MS_LOG(DEBUG) << "Start Contiguous task";
+
+  const auto &input_address = input_addr_list[0];
+  const auto &output_address = output_addr_list[0];
+  const auto &input_storage_info = input_address->GetTensorStorageInfo();
+  MS_LOG(DEBUG) << "Input_storage_info:" << (input_storage_info == nullptr ? "" : input_storage_info->ToString())
+                << ", input_address size:" << input_address->GetSize()
+                << ", output_address size:" << output_address->GetSize();
+
+  MallocMemoryForDeviceAddress(input_address, device_context_);
+  MallocMemoryForDeviceAddress(output_address, device_context_);
+
+  kernel::ContiguousCpuKernel contiguous_kernel;
+  auto ret = contiguous_kernel.LaunchContiguous(input_address->type_id(), input_address, input_storage_info,
+                                                output_address->type_id(), output_address);
+  if (!ret) {
+    MS_LOG(EXCEPTION) << "CpuContiguous failed";
+  }
+
+  MS_LOG(DEBUG) << "End Contiguous task";
+  return true;
+}
+
+bool CPUKernelExecutor::LaunchKernelWithProfiling(const CNodePtr &kernel, const std::vector<KernelTensor *> &inputs,
+                                                  const std::vector<KernelTensor *> &workspace,
+                                                  const std::vector<KernelTensor *> &outputs,
+                                                  KernelMod *kernel_mod) const {
+  MS_EXCEPTION_IF_NULL(kernel);
+
+  auto profiler_inst = profiler::cpu::CPUProfiler::GetInstance();
+  MS_EXCEPTION_IF_NULL(profiler_inst);
+
+  uint32_t pid = IntToUint(getpid());
+  // cpu support multi-thread with mindrt for profiling.
+  profiler_inst->OpDataProducerBeginParallel(kernel->fullname_with_scope(), pid);
+  bool ret = DoLaunchKernel(kernel, inputs, workspace, outputs, kernel_mod);
+  profiler_inst->OpDataProducerEndParallel(kernel->fullname_with_scope());
+  profiler_inst->RecordFrameWorkInfo(kernel);
+  return ret;
+}
+
+bool CPUKernelExecutor::DoLaunchKernel(const CNodePtr &kernel, const std::vector<KernelTensor *> &inputs,
+                                       const std::vector<KernelTensor *> &workspace,
+                                       const std::vector<KernelTensor *> &outputs, KernelMod *kernel_mod) const {
+  MS_EXCEPTION_IF_NULL(kernel);
+  MS_EXCEPTION_IF_NULL(kernel_mod);
+  uint64_t start_time = 0;
+  PROFILER_START(start_time);
+  auto ret = kernel_mod->Launch(inputs, workspace, outputs, nullptr);
+  PROFILER_END(start_time, runtime::ProfilerModule::kKernel, runtime::ProfilerEvent::kKernelLaunch,
+               kernel->fullname_with_scope(), false);
+  return ret;
+}
+
+void CPUKernelExecutor::RebuildKernelSelectBackoffOp(const std::vector<CNodePtr> &nodes) const {
+  for (auto &node : nodes) {
+    MS_EXCEPTION_IF_NULL(node);
+    if (!AnfAlgo::IsKernelSelectBackoffOp(node)) {
+      continue;
+    }
+    auto [failure_info, failure_type] = AnfAlgo::GetKernelSelectBackoffInfo(node);
+    if (IsVmapNotSupported(node)) {
+      MS_EXCEPTION(failure_type) << "#umsg#Kernel select failed:#umsg#" << failure_info;
+    }
+
+    // Judge whether match strictly between kernel build info and supported kernel attrs.
+    const auto &kernel_build_info = AnfAlgo::GetSelectKernelBuildInfo(node);
+    MS_EXCEPTION_IF_NULL(kernel_build_info);
+    const auto &kernel_attr = kernel::GetKernelAttrFromBuildInfo(kernel_build_info);
+    const auto &supported_kernel_attrs =
+      kernel::NativeCpuKernelMod::GetCpuSupportedList(common::AnfAlgo::GetCNodeName(node));
+    const auto &match_result = kernel::MatchKernelAttrStrict(kernel_attr, supported_kernel_attrs);
+    auto attr_info = kernel::FetchPrintInfoByKernelAttr(kernel_attr);
+    if (!match_result.first) {
+      MS_LOG(INFO) << "Backoff and rebuild kernel on CPU failed for node: " << node->fullname_with_scope()
+                   << ", node attr: " << attr_info;
+      MS_EXCEPTION(failure_type) << "#umsg#Kernel select failed:#umsg#" << failure_info;
+    } else {
+      // Set the CPU flag.
+      common::AnfAlgo::SetNodeAttr(kAttrPrimitiveTarget, MakeValue(kCPUDevice), node);
+      kernel_build_info->set_kernel_type(CPU_KERNEL);
+      kernel_build_info->set_processor(kernel::Processor::CPU);
+      MS_LOG(INFO) << "Backoff and rebuild kernel on CPU successfully for node: " << node->fullname_with_scope()
+                   << ", node attr: " << attr_info;
+    }
+
+    CreateKernel({node});
+  }
+}
+
+MS_REGISTER_DEVICE(kCPUDevice, CPUDeviceContext);
+#ifdef WITH_BACKEND
+MSCONTEXT_REGISTER_INIT_FUNC(kCPUDevice, [](MsContext *ctx) -> void {
+  MS_EXCEPTION_IF_NULL(ctx);
+  if (ctx->backend_policy() != "ms") {
+    (void)ctx->set_backend_policy("ms");
+  }
+});
+#endif
+
+// Register functions to _c_expression so python hal module could call CPU device interfaces.
+void PybindCPUStatelessFunc(py::module *m) { MS_EXCEPTION_IF_NULL(m); }
+REGISTER_DEV_STATELESS_FUNC_CB(kCPUDevice, PybindCPUStatelessFunc);
+}  // namespace cpu
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/cpu/cpu_device_context.h b/inferrt/src/hardware/cpu/cpu_device_context.h
new file mode 100644
index 0000000000000000000000000000000000000000..6b02af05b46bb41b182a24d0c4d16d349624f363
--- /dev/null
+++ b/inferrt/src/hardware/cpu/cpu_device_context.h
@@ -0,0 +1,104 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_RUNTIME_HARDWARE_CPU_CPU_DEVICE_CONTEXT_H_
+#define MINDSPORE_CCSRC_RUNTIME_HARDWARE_CPU_CPU_DEVICE_CONTEXT_H_
+
+#include <vector>
+#include <memory>
+#include <string>
+#include <utility>
+#include <mutex>
+#include "hardware/hardware_abstract/device_context.h"
+#include "hardware/hardware_abstract/hardware_abstract/memory_manager.h"
+#include "hardware/cpu/res_manager/cpu_res_manager.h"
+
+namespace mindspore {
+namespace device {
+namespace cpu {
+class CPUKernelExecutor : public KernelExecutor {
+ public:
+  CPUKernelExecutor() = default;
+  ~CPUKernelExecutor() override = default;
+
+  void OptimizeGraph(const FuncGraphPtr &graph) const override;
+
+  void CreateKernel(const std::vector<CNodePtr> &nodes) const override;
+  kernel::KernelModPtr CreateKernelMod(const std::string &op_name) const override;
+
+  // Kernel that is not supported by other device can be backed off and rebuilt on the CPU.
+  // The function will set kernel info and create kernel mod.
+  void RebuildKernelSelectBackoffOp(const std::vector<CNodePtr> &nodes) const;
+
+  void PreprocessBeforeRun(const FuncGraphPtr &graph) const override;
+
+  bool LaunchKernel(const CNodePtr &kernel, const std::vector<KernelTensor *> &inputs,
+                    const std::vector<KernelTensor *> &workspace, const std::vector<KernelTensor *> &outputs,
+                    KernelMod *kernel_mod, void * /* stream */) const override;
+  bool LaunchKernelHP(const CNodePtr &kernel, const std::vector<KernelTensor *> &inputs,
+                      const std::vector<KernelTensor *> &workspace, const std::vector<KernelTensor *> &outputs,
+                      KernelMod *kernel_mod, void *stream) const override {
+    return LaunchKernel(kernel, inputs, workspace, outputs, kernel_mod, stream);
+  }
+
+  bool ExecuteKernelTask(const runtime::KernelTaskType &task_type, const device::DeviceAddressPtrList &input_addr_list,
+                         const device::DeviceAddressPtrList &output_addr_list, const size_t &stream_id) const override;
+  bool ExecuteKernelTask(const runtime::KernelTaskType &task_type,
+                         const std::vector<device::DeviceAddress *> &input_addr_list,
+                         const std::vector<device::DeviceAddress *> &output_addr_list,
+                         const size_t &stream_id) const override;
+
+  std::vector<size_t> GetLaunchIgnoredInputAddressIdx(const AnfNodePtr &node) const override;
+
+  bool IsLaunchIgnoredInputAddressIdx(const AnfNodePtr &node, size_t input_idx) const override;
+
+ private:
+  // Select the matching backend kernels according to the data type and format of input and output for all
+  // execution operators, and set final device data type and format information for backend kernels, device
+  // data type and format which replace original data type and format will use for executing kernels.
+  void SetOperatorInfo(const KernelGraphPtr &graph) const;
+  void SingleOpGraphOptimize(const KernelGraphPtr &graph) const;
+  void OptimizeGraphImpl(const KernelGraphPtr &graph) const;
+  void OptimizeMindIR(const KernelGraphPtr &graph) const;
+  // Launch a kernel and record the elapsed time end to end.
+  bool LaunchKernelWithProfiling(const CNodePtr &kernel, const std::vector<KernelTensor *> &inputs,
+                                 const std::vector<KernelTensor *> &workspace,
+                                 const std::vector<KernelTensor *> &outputs, KernelMod *kernel_mod) const;
+  // Launch a kernel by 'KernelMod' of the kernel.
+  bool DoLaunchKernel(const CNodePtr &kernel, const std::vector<KernelTensor *> &inputs,
+                      const std::vector<KernelTensor *> &workspace, const std::vector<KernelTensor *> &outputs,
+                      KernelMod *kernel_mod) const;
+  void UpdateKernelRefInfo(const KernelGraphPtr &graph) const;
+
+  mutable std::mutex launch_mutex_;
+};
+
+class CPUDeviceContext : public DeviceInterface<CPUKernelExecutor, CPUResManager> {
+ public:
+  explicit CPUDeviceContext(const DeviceContextKey &device_context_key) : DeviceInterface(device_context_key) {}
+  ~CPUDeviceContext() override = default;
+
+  void Initialize() override;
+
+  void Destroy() override;
+
+ private:
+  DISABLE_COPY_AND_ASSIGN(CPUDeviceContext);
+};
+}  // namespace cpu
+}  // namespace device
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_RUNTIME_HARDWARE_CPU_CPU_DEVICE_CONTEXT_H_
diff --git a/inferrt/src/hardware/cpu/res_manager/cpu_res_manager.cc b/inferrt/src/hardware/cpu/res_manager/cpu_res_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..985b55ac5ddddd1b4a827c1f0128b2bbff8f327b
--- /dev/null
+++ b/inferrt/src/hardware/cpu/res_manager/cpu_res_manager.cc
@@ -0,0 +1,130 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hardware/cpu/res_manager/cpu_res_manager.h"
+#include <utility>
+#include <vector>
+#include <string>
+#include <unordered_map>
+#include <memory>
+#include "runtime/hardware_abstract/memory_manager.h"
+
+namespace mindspore {
+namespace device {
+namespace cpu {
+void CPUResManager::Initialize() {
+  mem_manager_ = std::make_shared<CPUMemoryManager>();
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+}
+
+void CPUResManager::Destroy() {
+  // Release memory.
+  if (mem_manager_ != nullptr) {
+    mem_manager_->Finalize();
+    mem_manager_ = nullptr;
+  }
+}
+
+void *CPUResManager::AllocateMemory(size_t size, uint32_t stream_id) const {
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  return mem_manager_->MallocMemFromMemPool(size, false, false, stream_id);
+}
+
+void CPUResManager::FreeMemory(void *ptr) const {
+  MS_EXCEPTION_IF_NULL(ptr);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->FreeMemFromMemPool(ptr);
+}
+
+void CPUResManager::FreePartMemorys(const std::vector<void *> &free_addrs, const std::vector<void *> &keep_addrs,
+                                    const std::vector<size_t> &keep_addr_sizes) const {
+  CPUMemoryPool::GetInstance().FreePartTensorMems(free_addrs, keep_addrs, keep_addr_sizes);
+}
+
+std::vector<void *> CPUResManager::AllocateContinuousMemory(const std::vector<size_t> &size_list,
+                                                            uint32_t stream_id) const {
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  return mem_manager_->MallocContinuousMemFromMemPool(size_list, stream_id);
+}
+
+namespace {
+
+// clang-format off
+#define FOR_EACH_TYPE_BASE(M)                    \
+  M(kNumberTypeBool, bool)                       \
+  M(kNumberTypeUInt8, uint8_t)                   \
+  M(kNumberTypeInt4, int8_t)                     \
+  M(kNumberTypeInt8, int8_t)                     \
+  M(kNumberTypeInt16, int16_t)                   \
+  M(kNumberTypeInt32, int32_t)                   \
+  M(kNumberTypeInt64, int64_t)                   \
+  M(kNumberTypeUInt16, uint16_t)                 \
+  M(kNumberTypeUInt32, uint32_t)                 \
+  M(kNumberTypeUInt64, uint64_t)                 \
+  M(kNumberTypeFloat16, float16)                 \
+  M(kNumberTypeFloat32, float)                   \
+  M(kNumberTypeFloat64, double)                  \
+  M(kNumberTypeFloat8E4M3FN, float8_e4m3fn)      \
+  M(kNumberTypeFloat8E5M2, float8_e5m2)          \
+  M(kNumberTypeHiFloat8, hifloat8)               \
+  M(kNumberTypeComplex64, ComplexStorage<float>) \
+  M(kNumberTypeComplex128, ComplexStorage<double>)
+
+#ifndef KERNEL_EXECUTOR_ANDROID
+#define FOR_EACH_TYPE_EXTRA(M) M(kNumberTypeBFloat16, bfloat16)
+#else
+#define FOR_EACH_TYPE_EXTRA(M)
+#endif
+
+#define FOR_EACH_TYPE(M) \
+  FOR_EACH_TYPE_BASE(M)  \
+  FOR_EACH_TYPE_EXTRA(M)
+
+#define REGISTER_SIZE(address_type_id, address_type) { address_type_id, sizeof(address_type) },
+
+static const std::unordered_map<TypeId, size_t> kTypeSizeMap = {
+  FOR_EACH_TYPE(REGISTER_SIZE)
+};
+
+size_t GetTypeSize(TypeId tid) {
+  return kTypeSizeMap.at(tid);
+}
+
+#undef FOR_EACH_TYPE
+#undef FOR_EACH_TYPE_BASE
+#undef FOR_EACH_TYPE_EXTRA
+#undef REGISTER_SIZE
+// clang-format on
+}  // namespace
+
+
+bool CPUResManager::Copy(void *dst, const void *src, uint64_t size, CopyType kind, size_t stream_id) const {
+  if (size == 0) {
+    return true;
+  }
+  MS_EXCEPTION_IF_NULL(dst);
+  MS_EXCEPTION_IF_NULL(src);
+  auto ret_code = memcpy_s(dst, size, src, size);
+  if (ret_code == ERANGE) {
+    ConvertSameType(dst, src, size, kNumberTypeUInt8);
+  } else if (ret_code != EOK) {
+    MS_LOG(ERROR) << "Failed to copy tensor from ptr:" << src << " to :" << dst << " size:" << size;
+    return false;
+  }
+  return true;
+}
+}  // namespace cpu
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/cpu/res_manager/cpu_res_manager.h b/inferrt/src/hardware/cpu/res_manager/cpu_res_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..9330756d3a7f149e0ac476c260cc5b5aa93a6523
--- /dev/null
+++ b/inferrt/src/hardware/cpu/res_manager/cpu_res_manager.h
@@ -0,0 +1,55 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSR_PLUGIN_RES_MANAGER_CPU_CPU_RES_MANAGER_H_
+#define MINDSPORE_CCSR_PLUGIN_RES_MANAGER_CPU_CPU_RES_MANAGER_H_
+#include <utility>
+#include <vector>
+#include <string>
+#include <memory>
+#include "hardware/hardware_abstract/device_context.h"
+#include "hardware/cpu/res_manager/mem_manager/cpu_memory_manager.h"
+
+namespace mindspore {
+namespace device {
+namespace cpu {
+class CPUResManager : public DeviceResManager {
+ public:
+  CPUResManager() { Initialize(); }
+  ~CPUResManager() override = default;
+
+  void Initialize() override;
+
+  void Destroy() override;
+
+  std::vector<void *> AllocateContinuousMemory(const std::vector<size_t> &size_list,
+                                               uint32_t stream_id = kDefaultStreamIndex) const override;
+
+  bool Copy(void *dst, const void *src, uint64_t size, CopyType kind, size_t stream_id) const override;
+
+
+  // Relevant function to allocate and free device memory of raw ptr.
+  void *AllocateMemory(size_t size, uint32_t stream_id = kDefaultStreamIndex) const override;
+  void FreeMemory(void *ptr) const override;
+  void FreePartMemorys(const std::vector<void *> &free_addrs, const std::vector<void *> &keep_addrs,
+                       const std::vector<size_t> &keep_addr_sizes) const override;
+
+ private:
+  std::shared_ptr<CPUMemoryManager> mem_manager_{nullptr};
+};
+}  // namespace cpu
+}  // namespace device
+}  // namespace mindspore
+#endif
diff --git a/inferrt/src/hardware/cpu/res_manager/mem_manager/cpu_memory_manager.cc b/inferrt/src/hardware/cpu/res_manager/mem_manager/cpu_memory_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..727d24f694a9a836b7b0e54593845dc021c8e2ba
--- /dev/null
+++ b/inferrt/src/hardware/cpu/res_manager/mem_manager/cpu_memory_manager.cc
@@ -0,0 +1,110 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "plugin/cpu/res_manager/mem_manager/cpu_memory_manager.h"
+
+namespace mindspore {
+namespace device {
+namespace cpu {
+uint8_t *CPUMemoryManager::MemMalloc(size_t size) {
+  auto block = std::make_shared<std::vector<uint8_t>>();
+  try {
+    block->resize(size, 0);
+    auto ptr = block->data();
+    mem_block_map_[ptr] = block;
+    return ptr;
+  } catch (const std::exception &e) {
+    MS_LOG(EXCEPTION) << "Malloc memory failed: size " << size;
+  }
+}
+
+uint8_t *CPUMemoryManager::MallocStaticMem(size_t size, bool, uint32_t) {
+  auto ptr = MemMalloc(size);
+  static_mem_[ptr] = size;
+  return ptr;
+}
+
+uint8_t *CPUMemoryManager::MallocDynamicMem(size_t size, bool) {
+  void *ptr = nullptr;
+  size_t min_size = 0;
+  // first find the smallest cached_mem_ which fits the size
+  for (auto &&iter : cached_mem_) {
+    if (iter.second >= size) {
+      if (min_size == 0 || iter.second < min_size) {
+        ptr = iter.first;
+        min_size = iter.second;
+      }
+    }
+  }
+  if (ptr != nullptr) {
+    if (memset_s(ptr, size, 0, size) != EOK) {
+      free(ptr);
+      MS_LOG(EXCEPTION) << "Failed to init memory.";
+    }
+    dynamic_mem_[ptr] = min_size;
+    (void)cached_mem_.erase(ptr);
+    return reinterpret_cast<uint8_t *>(ptr);
+  }
+  // if not found, malloc
+  auto new_ptr = MemMalloc(size);
+  dynamic_mem_[new_ptr] = size;
+  return new_ptr;
+}
+
+void CPUMemoryManager::ResetDynamicMemory() {
+  // don't free, for multi graph
+  for (auto &&iter : dynamic_mem_) {
+    cached_mem_[iter.first] = iter.second;
+  }
+  dynamic_mem_.clear();
+}
+
+CPUMemoryManager::~CPUMemoryManager() { MemFree(); }
+
+void CPUMemoryManager::MemFree() noexcept {
+  if (mem_ptr_ != nullptr) {
+    mem_ptr_ = nullptr;
+    mem_size_ = 0;
+  }
+  static_mem_.clear();
+  dynamic_mem_.clear();
+  cached_mem_.clear();
+  mem_block_map_.clear();
+}
+
+void *CPUMemoryManager::StaticMemMalloc(size_t mem_size) {
+  auto ptr = MemMalloc(mem_size);
+  if (ptr != nullptr) {
+    static_mem_[ptr] = mem_size;
+    return ptr;
+  } else {
+    MS_LOG(EXCEPTION) << "Malloc memory failed: size " << mem_size;
+  }
+}
+
+void CPUMemoryManager::MemFree(void *ptr) {
+  auto iter = static_mem_.find(ptr);
+  if (iter != static_mem_.end()) {
+    (void)static_mem_.erase(iter);
+    auto block_iter = mem_block_map_.find(ptr);
+    if (block_iter != mem_block_map_.end()) {
+      (void)mem_block_map_.erase(block_iter);
+    }
+  }
+}
+}  // namespace cpu
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/cpu/res_manager/mem_manager/cpu_memory_manager.h b/inferrt/src/hardware/cpu/res_manager/mem_manager/cpu_memory_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..1f55d4f1ca7d4364bb9ad7064ed833a34ec30007
--- /dev/null
+++ b/inferrt/src/hardware/cpu/res_manager/mem_manager/cpu_memory_manager.h
@@ -0,0 +1,79 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_CPU_CPU_MEM_MANAGER_CPU_MEMORY_MANAGER_H_
+#define MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_CPU_CPU_MEM_MANAGER_CPU_MEMORY_MANAGER_H_
+#include <vector>
+#include <map>
+#include <memory>
+#include "hardware/hardware_abstract/visible.h"
+#include "hardware/hardware_abstract/memory_manager.h"
+#include "hardware/cpu/res_manager/mem_manager/cpu_memory_pool.h"
+
+namespace mindspore {
+namespace device {
+namespace cpu {
+class BACKEND_EXPORT CPUMemoryManager : public MemoryManager {
+ public:
+  CPUMemoryManager() = default;
+  virtual ~CPUMemoryManager();
+
+  void Initialize() override {}
+  void Finalize() override { CPUMemoryPool::GetInstance().ReleaseDeviceRes(); }
+  void ResetDynamicMemory() override;
+
+  void *StaticMemMalloc(size_t mem_size);
+  void MemFree(void *ptr);
+
+  void *MallocMemFromMemPool(size_t size, bool from_persistent_mem, bool need_recycle = false,
+                             uint32_t stream_id = kDefaultStreamIndex) override {
+    return CPUMemoryPool::GetInstance().AllocTensorMem(size, from_persistent_mem, false, stream_id);
+  }
+  void FreeMemFromMemPool(void *device_ptr) override { CPUMemoryPool::GetInstance().FreeTensorMem(device_ptr); }
+  std::vector<void *> MallocContinuousMemFromMemPool(const std::vector<size_t> &size_list,
+                                                     uint32_t stream_id = kDefaultStreamIndex) override {
+    return CPUMemoryPool::GetInstance().AllocContinuousTensorMem(size_list, stream_id);
+  }
+
+  DynamicMemPool *GetMemoryPool() override {
+    if (MS_UNLIKELY(memory_pool_ == nullptr)) {
+      memory_pool_ = &(CPUMemoryPool::GetInstance());
+    }
+    return memory_pool_;
+  }
+
+  bool GetDynamicMalloc() { return dynamic_malloc_; }
+
+ protected:
+  uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id) override;
+  uint8_t *MallocDynamicMem(size_t size, bool communication_mem) override;
+
+ private:
+  uint8_t *MemMalloc(size_t size);
+  void MemFree() noexcept;
+
+  size_t mem_size_{0};
+  uint8_t *mem_ptr_{nullptr};
+  bool dynamic_malloc_{false};
+  std::map<void *, size_t> dynamic_mem_;
+  std::map<void *, size_t> static_mem_;
+  std::map<void *, size_t> cached_mem_;
+  std::map<void *, std::shared_ptr<std::vector<uint8_t>>> mem_block_map_;
+};
+}  // namespace cpu
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_CPU_CPU_MEM_MANAGER_CPU_MEMORY_MANAGER_H_
diff --git a/inferrt/src/hardware/cpu/res_manager/mem_manager/cpu_memory_pool.cc b/inferrt/src/hardware/cpu/res_manager/mem_manager/cpu_memory_pool.cc
new file mode 100644
index 0000000000000000000000000000000000000000..78482d105bb4127257204f7202be955dba40f809
--- /dev/null
+++ b/inferrt/src/hardware/cpu/res_manager/mem_manager/cpu_memory_pool.cc
@@ -0,0 +1,86 @@
+/**
+ * Copyright 2021-2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "plugin/cpu/res_manager/mem_manager/cpu_memory_pool.h"
+
+
+namespace mindspore {
+namespace device {
+namespace cpu {
+namespace {
+const char kMemAvailable[] = "MemAvailable";
+}
+
+CPUMemoryPool &CPUMemoryPool::GetInstance() {
+  static CPUMemoryPool instance;
+  static std::once_flag flag;
+  std::call_once(flag, [&]() {
+    float init_size = runtime::RuntimeConf::GetInstance()->mem_init_size();
+    size_t init_size_byte = FloatToSize(init_size * kGBToByte);
+    float increase_size = runtime::RuntimeConf::GetInstance()->mem_block_increase_size();
+    size_t increase_size_byte = FloatToSize(increase_size * kGBToByte);
+    float max_size = runtime::RuntimeConf::GetInstance()->mem_max_size();
+    size_t max_size_byte = FloatToSize(max_size * kGBToByte);
+    instance.Initialize(init_size_byte, increase_size_byte, max_size_byte);
+#ifdef ENABLE_DEBUGGER
+    // Set memory profiler callback func.
+    instance.SetMemoryProfilerCallback([&]() {
+      static auto profiler_inst = profiler::Profiler::GetInstance(kCPUDevice);
+      MS_EXCEPTION_IF_NULL(profiler_inst);
+      if (profiler_inst->GetEnableFlag() && profiler_inst->GetProfileMemoryFlag()) {
+        profiler_inst->RecordMemoryPoolInfo(instance.TotalUsedMemStatistics(), instance.TotalMemStatistics(),
+                                            instance.TotalUsedByEventMemStatistics());
+      }
+    });
+#endif
+
+    instance.SetRankIdGetter([]() {
+      size_t rank_id = SIZE_MAX;
+      if (DistributedMeta::GetInstance()->initialized()) {
+        rank_id = DistributedMeta::GetInstance()->global_rank_id();
+      }
+      return rank_id;
+    });
+  });
+  return instance;
+}
+
+size_t CPUMemoryPool::AllocDeviceMem(size_t alloc_size, DeviceMemPtr *addr) {
+  if (alloc_size == 0) {
+    MS_LOG(EXCEPTION) << "The memory alloc size is 0.";
+  }
+
+  *addr = malloc(alloc_size);
+  if (*addr == nullptr) {
+    MS_LOG(ERROR) << "malloc memory failed.";
+    return 0;
+  }
+
+  total_used_memory_ += alloc_size;
+  MS_LOG(INFO) << "Current alloc size[" << alloc_size << "], total used size[" << total_used_memory_ << "].";
+
+  return alloc_size;
+}
+
+bool CPUMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) {
+  free(addr);
+  return true;
+}
+
+size_t CPUMemoryPool::free_mem_size() { return mindspore::GetSystemMemorySize(kMemAvailable); }
+}  // namespace cpu
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/cpu/res_manager/mem_manager/cpu_memory_pool.h b/inferrt/src/hardware/cpu/res_manager/mem_manager/cpu_memory_pool.h
new file mode 100644
index 0000000000000000000000000000000000000000..fb33d652188096a30a2ebcf491375ad98e6239a1
--- /dev/null
+++ b/inferrt/src/hardware/cpu/res_manager/mem_manager/cpu_memory_pool.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_CPU_CPU_MEM_MANAGER_CPU_MEMORY_POOL_H_
+#define MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_CPU_CPU_MEM_MANAGER_CPU_MEMORY_POOL_H_
+
+#include <memory>
+#include <mutex>
+#include <string>
+
+#include "hardware/hardware_abstract/visible.h"
+
+namespace mindspore {
+namespace device {
+namespace cpu {
+class HARDWARE_EXPORT CPUMemoryPool : public DynamicMemPoolBestFit {
+ public:
+  ~CPUMemoryPool() override = default;
+
+  static CPUMemoryPool &GetInstance();
+
+  size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) override;
+  bool FreeDeviceMem(const DeviceMemPtr &addr) override;
+  size_t free_mem_size() override;
+  std::string GetMemoryPoolType() const override { return "CPU"; }
+
+ private:
+  CPUMemoryPool() = default;
+  DISABLE_COPY_AND_ASSIGN(CPUMemoryPool);
+
+  size_t total_used_memory_{0};
+};
+}  // namespace cpu
+}  // namespace device
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PLUGIN_RES_MANAGER_CPU_CPU_MEM_MANAGER_CPU_MEMORY_POOL_H_
diff --git a/inferrt/src/hardware/hardware_abstract/CMakeLists.txt b/inferrt/src/hardware/hardware_abstract/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41cc82cbcafc141b12261add719e19d3a29ad993
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/CMakeLists.txt
@@ -0,0 +1,41 @@
+check_debug_log_out()
+
+find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
+message(STATUS "Python3_FOUND: ${Python3_FOUND}")
+message(STATUS "Python3_INCLUDE_DIRS: ${Python3_INCLUDE_DIRS}")
+include_directories(${Python3_INCLUDE_DIRS})
+
+if(TARGET pybind11::headers)
+    message("-- pybind11 already included")
+else()
+    # Prepare pybind11 module
+    message("-- pybind11 not included, start including")
+    set(depname "pybind11")
+    set(PYBIND11_PATH "${PROJECT_SOURCE_DIR}/${depname}-src")
+    message("-- PYBIND11_PATH: ${PYBIND11_PATH}")
+
+    # Download and copy pybind11 project if not exists
+    if(NOT EXISTS ${PYBIND11_PATH})
+        message("-- Downloading ${depname} module...")
+        include(FetchContent)
+        FetchContent_Declare(
+            ${depname}
+            # Change github repo to gitee's: https://github.com/pybind/pybind11 ==> https://gitee.com/mirrors/pybind11
+            GIT_REPOSITORY https://gitee.com/mirrors/pybind11.git
+            GIT_TAG 58c382a8e3d7081364d2f5c62e7f429f0412743b # stable
+        )
+        FetchContent_MakeAvailable(${depname})
+        message("-- pybind11_SOURCE_DIR: ${${depname}_SOURCE_DIR}")
+        message("-- pybind11_BINARY_DIR: ${${depname}_BINARY_DIR}")
+        # Find pybind11 package location, or call find_package(pybind11 REQUIRED)
+        message("-- Copying ${${depname}_SOURCE_DIR} to ${PROJECT_SOURCE_DIR}/...")
+        file(COPY ${${depname}_SOURCE_DIR} DESTINATION ${PROJECT_SOURCE_DIR})
+    endif()
+
+    # Include pybind11 directories
+    include_directories(${PYBIND11_PATH}/include)
+
+endif()
+
+file(GLOB_RECURSE HARDWARE_ABSTRACT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+add_library(hardware_abstract_obj STATIC ${HARDWARE_ABSTRACT_SRC_FILES})
\ No newline at end of file
diff --git a/inferrt/src/hardware/hardware_abstract/common.cc b/inferrt/src/hardware/hardware_abstract/common.cc
new file mode 100644
index 0000000000000000000000000000000000000000..31b849606fb1edc50cef850ab8ac8b43189c4b6b
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/common.cc
@@ -0,0 +1,27 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/hardware_abstract/common.h"
+
+namespace mindspore {
+GilReleaseWithCheck::GilReleaseWithCheck() {
+  if (Py_IsInitialized() != 0 && PyGILState_Check() != 0) {
+    release_ = std::make_unique<py::gil_scoped_release>();
+  }
+}
+
+GilReleaseWithCheck::~GilReleaseWithCheck() { release_ = nullptr; }
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/hardware_abstract/common.h b/inferrt/src/hardware/hardware_abstract/common.h
new file mode 100644
index 0000000000000000000000000000000000000000..8fa280efd0c3f96736f2e4ceeeddf3504703607f
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/common.h
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HARDWARE_COMMON_H__
+#define HARDWARE_COMMON_H__
+
+#include "pybind11/pybind11.h"
+
+namespace py = pybind11;
+namespace mindspore {
+class GilReleaseWithCheck {
+ public:
+  GilReleaseWithCheck();
+
+  ~GilReleaseWithCheck();
+
+ private:
+  std::unique_ptr<py::gil_scoped_release> release_;
+};
+}  // namespace mindspore
+
+#endif  // HARDWARE_COMMON_H__
diff --git a/inferrt/src/hardware/hardware_abstract/device_context.cc b/inferrt/src/hardware/hardware_abstract/device_context.cc
new file mode 100644
index 0000000000000000000000000000000000000000..041913e78e632873838321afd7705d1fa831c856
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/device_context.cc
@@ -0,0 +1,29 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/hardware_abstract/device_context.h"
+
+namespace mindspore {
+namespace device {
+DeviceResManager::DeviceResManager() {
+  device_context_ = nullptr;
+}
+
+bool DeviceContext::initialized() const {
+  return initialized_;
+}
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/hardware_abstract/device_context.h b/inferrt/src/hardware/hardware_abstract/device_context.h
new file mode 100644
index 0000000000000000000000000000000000000000..de1e9f685e24a4a13f93ed6f02f32262711567f0
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/device_context.h
@@ -0,0 +1,326 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INFERRT_SRC_HARDWARE_DEVICE_CONTEXT_H_
+#define INFERRT_SRC_HARDWARE_DEVICE_CONTEXT_H_
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <mutex>
+#include <functional>
+#include "common/common.h"
+#include "hardware/hardware_abstract/visible.h"
+#include "hardware/hardware_abstract/stream_util.h"
+#ifdef __APPLE__
+#include "async/spinlock.h"
+#endif
+
+namespace mindspore {
+class DeviceEvent;
+using DeviceEventPtr = std::shared_ptr<DeviceEvent>;
+namespace runtime {
+enum class KernelTaskType;
+}
+namespace device {
+constexpr size_t kSizeZero = 0;
+
+struct DeviceContextKey {
+  // device type name, such as 'GPU' 'Ascend' 'CPU'.
+  std::string device_name_;
+  uint32_t device_id_{0};
+
+  // Use the result of ToString() as key to look up DeviceContext
+  // in cache map which maintains created DeviceContext objects.
+  std::string ToString() const { return device_name_ + "_" + std::to_string(device_id_); }
+};
+
+class DeviceResManager;
+class KernelExecutor;
+
+// DeviceContext is unified interface of interaction with device.
+class HARDWARE_EXPORT DeviceContext {
+ public:
+  explicit DeviceContext(const DeviceContextKey &device_context_key)
+      : device_context_key_(device_context_key), initialized_(false) {}
+  virtual ~DeviceContext() = default;
+
+  // Initialize the device context.
+  virtual void Initialize() = 0;
+
+  // Destroy device context and release device resource.
+  virtual void Destroy() = 0;
+
+  // Get device_context_key_ to obtain device name and device id.
+  const DeviceContextKey &device_context_key() const { return device_context_key_; }
+
+  // Get kernel executor.
+  std::shared_ptr<KernelExecutor> GetKernelExecutor() const { return kernel_executor_; }
+
+  void SetKernelExecutor(const std::shared_ptr<KernelExecutor> &kernel_executor) { kernel_executor_ = kernel_executor; }
+
+  // Return whether this device context is initialized.
+  bool initialized() const;
+
+  DeviceContextKey device_context_key_;
+  std::unique_ptr<DeviceResManager> device_res_manager_;
+
+ protected:
+#ifdef __APPLE__
+  // There are some problems with using mutex on Mac, use spinlocks instead.
+  inline static SpinLock init_lock_;
+#else
+  inline static std::mutex init_mutex_;
+#endif
+  bool initialized_;
+
+ private:
+  std::shared_ptr<KernelExecutor> kernel_executor_;
+};
+using DeviceContextPtr = std::shared_ptr<DeviceContext>;
+class MemoryManager;
+class CollectiveCommunicationLib;
+class OffloadedMemPool;
+using DeviceMemPtr = void *;
+
+class HARDWARE_EXPORT DeviceResManager {
+ public:
+  DeviceResManager();
+
+  virtual ~DeviceResManager() = default;
+
+  // Initialize the device resource manager.
+  virtual void Initialize() {}
+
+  virtual void SetAclDeterministic() {}
+
+  // Destroy device resource manager and release device resource.
+  virtual void Destroy() {}
+
+  // Bind device to current thread to gain device control privileges
+  // If force_bind is true, bind context to current thread every time;
+  // Otherwise, only bind context to current thread for the first time.
+  virtual bool BindDeviceToCurrentThread(bool force_bind) const { return true; }
+  virtual void ResetStreamAndCtx() const {}
+
+  // Relevant function to allocate and free device memory of raw ptr.
+  virtual void *AllocateMemory(size_t size, uint32_t stream_id = kDefaultStreamIndex) const = 0;
+  virtual void FreeMemory(void *ptr) const = 0;
+  virtual void FreePartMemorys(const std::vector<void *> &free_addrs, const std::vector<void *> &keep_addrs,
+                               const std::vector<size_t> &keep_addr_sizes) const = 0;
+  virtual void DefragMemory() {}
+  virtual bool IsEnableVmm() const { return false; }
+
+  // Interface for multi stream event control.
+  virtual bool RecordEvent(int64_t task_id_on_stream, uint32_t user_stream_id,
+                           const std::vector<std::pair<uint32_t, DeviceMemPtr>> &memory_stream_addresses,
+                           const DeviceEventPtr &input_event) {
+    return false;
+  }
+
+  virtual bool WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id) {
+    return false;
+  }
+
+  virtual bool WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id) { return false; }
+
+  virtual bool SyncAllEvents() { return false; }
+
+  virtual size_t GetMaxUsedMemorySize() const { return 0; }
+
+  // Relevant function to manage memory statistics
+  virtual size_t GetTotalMemStatistics() const { return 0; }
+  virtual size_t GetTotalUsedMemStatistics() const { return 0; }
+  virtual size_t GetTotalIdleMemStatistics() const { return 0; }
+  virtual size_t GetTotalEagerFreeMemStatistics() const { return 0; }
+  virtual size_t GetUsedMemPeakStatistics() const { return 0; }
+  virtual size_t GetReservedMemPeakStatistics() const { return 0; }
+  virtual std::unordered_map<std::string, std::size_t> GetBlockCountsStatistics() const { return {}; }
+  virtual std::unordered_map<std::string, std::size_t> GetBlockUnitSizeStatistics() const { return {}; }
+  virtual std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>
+  GetCommonMemBlocksInfoStatistics() const {
+    return {};
+  }
+  virtual std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>
+  GetPersistentMemBlocksInfoStatistics() const {
+    return {};
+  }
+  virtual void ResetMaxMemoryReserved() {}
+  virtual void ResetMaxMemoryAllocated() {}
+
+  virtual size_t EmptyCache() { return -1L; }
+
+  // Allocate host memory with raii and ref count
+  virtual std::shared_ptr<void> AllocateHostMemory(size_t size) const {
+    return std::shared_ptr<void>(::malloc(size), ::free);
+  }
+  virtual size_t GetAvailableMemSize() const { return 0; }
+
+  // Allocate continuous device memory according to size list.
+  // Communication operators may need continuous memory for input and output
+  // to optimize the communication performance.
+  virtual std::vector<void *> AllocateContinuousMemory(const std::vector<size_t> &size_list,
+                                                       uint32_t stream_id = kDefaultStreamIndex) const {
+    LOG_ERROR << "Unimplemented interface.";
+    return {};
+  }
+
+  // Create a stream with assigning a stream id, the assigned stream id will be written to the parameter '*stream_id'.
+  virtual bool CreateStream(size_t *stream_id) const {
+    LOG_ERROR << "Unimplemented interface: 'CreateStream'.";
+    *stream_id = kSizeZero;
+    return false;
+  }
+
+  // Create a stream with priority.
+  virtual bool CreateStreamWithPriority(size_t *stream_id, int32_t priority) const {
+    *stream_id = kSizeZero;
+    return false;
+  }
+
+  virtual size_t QueryStreamSize() const { return 0L; }
+  virtual std::vector<uint32_t> GetStreamIds() const { return {}; }
+
+  // If multi-stream used in pynative mode, other streams must be sync before the graph
+  // is executed. Otherwise, out-of-order occurs. Therefore this flag is added.
+  // This solution is a temporary solution, this flag will be removed after multi-stream is
+  // supported in graph mode.
+  virtual bool single_op_multi_stream_enable() const { return false; }
+  virtual void set_single_op_multi_stream_enable(bool single_op_multi_stream_enable) {}
+
+  // Get the stream pointer by stream_id.
+  virtual void *GetStream(size_t stream_id) const { return nullptr; }
+
+  // Set currently using stream id.
+  virtual void SetCurrentStreamId(size_t stream_id) { return; }
+
+  // Get currently using stream id.
+  virtual size_t GetCurrentStreamId() const { return kSizeZero; }
+
+  virtual void *GetStream() const { return nullptr; }
+
+  virtual size_t GetCommunicationStreamID() const { return kDefaultStreamIndex; }
+
+  virtual size_t GetCommunicationStreamIDByGroup(const std::string &group) const { return GetCommunicationStreamID(); }
+
+  // Destroy a stream bound to the input parameter "stream_id".
+  virtual bool DestroyStream(size_t stream_id) const { return false; }
+
+  // Query tasks' completion status of a stream.
+  virtual bool QueryStream(size_t stream_id) const { return true; }
+
+  // Synchronize stream, device such as GPU and Ascend need stream to launch kernel asynchronously,
+  // Using 'SyncStream' to block thread and wait for completing all tasks on specific stream.
+  // Using 'SyncAllStream' to block thread and wait for completing all tasks on all streams.
+  // Devices without stream could ignore the implementation of these function.
+  // Since the current entry for creating streams is not unified, the implementation of the 'SyncStream' and
+  // "SyncAllStreams" interfaces are implemented by subclasses.
+  virtual bool SyncStream(size_t stream_id) const { return true; }
+
+  // 'sync_device' is used for Ascend backend.
+  virtual bool SyncAllStreams(bool sync_device = true) const { return true; }
+
+  virtual bool SyncNotDefaultStreams() const { return true; }
+
+  // Return default stream id. Normally it's 0.
+  virtual size_t DefaultStream() const { return 0; }
+
+  // Create device event for runtime.
+  virtual DeviceEventPtr CreateRuntimeEvent(bool enable_blocking, bool enable_record_wait) { return nullptr; }
+
+  // Create device event with flag.
+  virtual DeviceEventPtr CreateEventWithFlag(bool enable_timing, bool blocking, bool use_extensional_api = true) {
+    return nullptr;
+  }
+
+  // Destroy specified device event.
+  virtual bool DestroyEvent(const DeviceEventPtr &event) { return true; }
+
+  // Destroy all device events.
+  virtual bool DestroyAllEvents() { return true; }
+
+  virtual std::shared_ptr<MemoryManager> mem_manager() const { return nullptr; }
+
+  virtual bool LaunchCallback(std::function<void(void)> callback_func, size_t stream_id, bool is_block = false) const {
+    callback_func();
+    return true;
+  }
+
+ protected:
+  DeviceContext *device_context_{nullptr};
+
+ private:
+  template <class... Args>
+  friend class DeviceInterface;
+  void SetDeviceContext(DeviceContext *device_context) { device_context_ = device_context; }
+  std::shared_ptr<device::OffloadedMemPool> offloaded_mem_pool_;
+};
+
+using CallbackFunc = std::function<void(void)>;
+
+class HARDWARE_EXPORT KernelExecutor {
+ public:
+  virtual ~KernelExecutor() = default;
+
+  virtual void Initialize() {}
+  virtual void Destroy() {}
+
+  void SetDeviceContext(DeviceContext *device_context) { device_context_ = device_context; }
+
+ protected:
+  DeviceContext *device_context_{nullptr};
+};
+
+template <class... Args>
+class DeviceInterface : public DeviceContext {};
+
+template <>
+class DeviceInterface<> : public DeviceContext {
+ public:
+  explicit DeviceInterface(const DeviceContextKey &key) : DeviceContext(key) {}
+
+ protected:
+  void CheckUnset(const void *ptr, const std::string &error_msg) const {
+    if (ptr != nullptr) {
+      LOG_ERROR << error_msg;
+    }
+  }
+};
+
+template <class T, class... Args>
+class DeviceInterface<T, Args...> : public DeviceInterface<Args...> {
+ public:
+  explicit DeviceInterface(const DeviceContextKey &key) : DeviceInterface<Args...>(key) {
+    if constexpr (std::is_base_of_v<DeviceResManager, T>) {
+      DeviceInterface::CheckUnset(reinterpret_cast<void *>(DeviceContext::device_res_manager_.get()),
+                                  "DeviceResManager has been registered!");
+      DeviceContext::device_res_manager_ = std::make_unique<T>();
+      DeviceContext::device_res_manager_->SetDeviceContext(this);
+    } else if constexpr (std::is_base_of_v<KernelExecutor, T>) {
+      DeviceInterface::CheckUnset(reinterpret_cast<void *>(DeviceContext::GetKernelExecutor().get()),
+                                  "KernelExecutor has been registered!");
+      DeviceContext::SetKernelExecutor(std::make_shared<T>());
+      DeviceContext::GetKernelExecutor()->SetDeviceContext(this);
+    }
+  }
+};
+}  // namespace device
+}  // namespace mindspore
+#endif  // INFERRT_SRC_HARDWARE_DEVICE_CONTEXT_H_
diff --git a/inferrt/src/hardware/hardware_abstract/device_context_manager.cc b/inferrt/src/hardware/hardware_abstract/device_context_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f53cb3d283fcb323cc8e07669c61020bea793f6d
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/device_context_manager.cc
@@ -0,0 +1,191 @@
+/**
+ * Copyright 2021-2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/hardware_abstract/device_context_manager.h"
+#if defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
+#endif
+#ifdef __linux__
+#include <sys/wait.h>
+#endif  // #ifdef __linux__
+#include <dirent.h>
+#include <algorithm>
+#include <string>
+#include <set>
+#include <fstream>
+#include "hardware/hardware_abstract/dlopen_macro.h"
+#include "hardware/hardware_abstract/multi_stream_controller.h"
+#include "common/logger.h"
+
+namespace mindspore {
+namespace {
+size_t constexpr GetStrLen(const char *const str) {
+  if (*str == '\0') {
+    return 0;
+  } else {
+    return GetStrLen(str + 1) + 1;
+  }
+}
+
+constexpr auto kCudaHomeEnv = "CUDA_HOME";
+constexpr auto kNvccVersionKeyWords = "Cuda compilation tools, release ";
+constexpr size_t kNvccVersionKeyWordsSize = GetStrLen(kNvccVersionKeyWords);
+constexpr auto kSuccessKeyWord = "Success";
+constexpr size_t kSuccessKeyWordSize = GetStrLen(kSuccessKeyWord);
+constexpr size_t kBufferSize = 999;
+constexpr auto kGpuPluginName = "libmindspore_gpu";
+#if defined(_WIN32)
+constexpr bool kIsWindowsPlatform = true;
+#else
+constexpr bool kIsWindowsPlatform = false;
+#endif
+}  // namespace
+namespace device {
+
+DeviceContextManager &DeviceContextManager::GetInstance() {
+  static DeviceContextManager instance{};
+#ifdef WITH_BACKEND
+  instance.LoadPlugin();
+#endif
+  return instance;
+}
+
+void DeviceContextManager::Register(const std::string &device_name, DeviceContextCreator &&device_context_creator) {
+  LOG_OUT << "Register device context creator for device: " << device_name;
+  if (device_context_creators_.find(device_name) == device_context_creators_.end()) {
+    (void)device_context_creators_.emplace(device_name, device_context_creator);
+  }
+}
+
+void DeviceContextManager::ClearDeviceContexts() {
+  multi_stream_controllers_.clear();
+  for (auto &iter : device_contexts_) {
+    LOG_OUT << "Release device " << iter.first;
+    if (iter.second == nullptr) {
+      LOG_ERROR << "device context is null";
+    }
+    iter.second->Destroy();
+  }
+  backend_to_device_context_.clear();
+  device_contexts_.clear();
+}
+
+void DeviceContextManager::ChildAfterFork() {
+  LOG_OUT << "DeviceContextManager reinitialize after fork.";
+  LOG_OUT << "Clear device_contexts_.";
+  device_contexts_.clear();
+  LOG_OUT << "DeviceContextManager reinitialize after fork done.";
+}
+
+void DeviceContextManager::BindDeviceCtx() const {
+  for (auto &iter : device_contexts_) {
+    if (iter.second == nullptr) {
+      LOG_ERROR << "device context is null";
+    }
+    if (iter.second->device_res_manager_ == nullptr) {
+      LOG_ERROR << "device res manager is null";
+    }
+    if (!iter.second->device_res_manager_->BindDeviceToCurrentThread(true)) {
+      LOG_ERROR << "Bind device failed";
+    }
+  }
+}
+
+DeviceContext *DeviceContextManager::GetOrCreateDeviceContext(const DeviceContextKey &device_context_key) {
+  std::string device_context_key_str = device_context_key.ToString();
+  std::string name = device_context_key.device_name_;
+
+  auto device_context_iter = device_contexts_.find(device_context_key_str);
+  if (device_context_iter != device_contexts_.end()) {
+    return device_context_iter->second.get();
+  }
+
+  std::shared_ptr<DeviceContext> device_context;
+  auto creator_iter = device_context_creators_.find(name);
+  if (creator_iter != device_context_creators_.end()) {
+    device_context = (creator_iter->second)(device_context_key);
+    if (device_context == nullptr) {
+      LOG_ERROR << "create device context failed";
+    }
+    if (device_context->device_res_manager_ == nullptr) {
+      LOG_ERROR << "create device res manager failed";
+    }
+    device_contexts_[device_context_key_str] = device_context;
+    backend_to_device_context_[name] = device_context;
+    multi_stream_controllers_[name] =
+      std::make_shared<MultiStreamController>(device_context->device_res_manager_.get());
+  } else {
+    LOG_ERROR << "Create device context failed, please make sure target device:" << name
+              << " is available, error message of loading plugins: " << GetErrorMsg();
+  }
+  return device_context.get();
+}
+
+DeviceContextPtr DeviceContextManager::GetDeviceContext(const std::string &device_target) {
+  if (backend_to_device_context_.count(device_target) == 0) {
+    LOG_OUT << "Device context of device " << device_target << " is not created yet.";
+    return nullptr;
+  }
+  return backend_to_device_context_[device_target];
+}
+
+MultiStreamControllerPtr &DeviceContextManager::GetMultiStreamController(const std::string &device_name) {
+  auto &&iter = multi_stream_controllers_.find(device_name);
+  if (iter != multi_stream_controllers_.end()) {
+    return iter->second;
+  }
+  LOG_ERROR << "Found multi stream controller failed, and try to initialize, device_name : " << device_name << ".";
+  // use 0 temporarily.
+  uint32_t device_id = 0;
+  DeviceContextKey host_key = {device_name, device_id};
+  const auto &real_device_context = GetOrCreateDeviceContext(host_key);
+  if (real_device_context == nullptr) {
+    LOG_ERROR << "get or create device context failed";
+  }
+  auto &&iter_again = multi_stream_controllers_.find(device_name);
+  if (iter_again == multi_stream_controllers_.end()) {
+    LOG_ERROR << "Get multi stream controller failed, device_name : " << device_name << ".";
+  }
+  return iter_again->second;
+}
+
+void DeviceContextManager::WaitTaskFinishOnDevice() const {
+  for (const auto &item : device_contexts_) {
+    auto device_context = item.second;
+    try {
+      if (device_context != nullptr && !device_context->device_res_manager_->SyncAllStreams()) {
+        LOG_ERROR << "SyncStream failed";
+        return;
+      }
+    } catch (const std::exception &ex) {
+      LOG_ERROR << "SyncStream failed, exception:" << ex.what();
+      return;
+    }
+  }
+}
+
+void DeviceContextManager::SyncAllStreams() const {
+  for (const auto &item : device_contexts_) {
+    auto device_context = item.second;
+    if (device_context != nullptr && !device_context->device_res_manager_->SyncAllStreams()) {
+      LOG_ERROR << "SyncStream failed, device info: " << device_context->device_context_key().ToString();
+    }
+  }
+}
+
+std::string DeviceContextManager::GetErrorMsg() const { return dlopen_error_msg_.str(); }
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/hardware_abstract/device_context_manager.h b/inferrt/src/hardware/hardware_abstract/device_context_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..20757d42dc7330fefc403caa65e6cf94cfff91b8
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/device_context_manager.h
@@ -0,0 +1,95 @@
+/**
+ * Copyright 2021-2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_HARDWARE_DEVICE_CONTEXT_MANAGER_H_
+#define MINDSPORE_CCSRC_RUNTIME_HARDWARE_DEVICE_CONTEXT_MANAGER_H_
+
+#include <set>
+#include <any>
+#include <map>
+#include <string>
+#include <memory>
+#include <utility>
+#include <functional>
+#include <mutex>
+#include <vector>
+#include "hardware/hardware_abstract/device_context.h"
+#include "hardware/hardware_abstract/visible.h"
+
+namespace mindspore {
+namespace device {
+class MultiStreamController;
+using DeviceContextCreator = std::function<std::shared_ptr<DeviceContext>(const DeviceContextKey &)>;
+using MultiStreamControllerPtr = std::shared_ptr<MultiStreamController>;
+
+class HARDWARE_EXPORT DeviceContextManager {
+ public:
+  ~DeviceContextManager() = default;
+  static DeviceContextManager &GetInstance();
+  void Register(const std::string &device_name, DeviceContextCreator &&device_context_creator);
+  DeviceContext *GetOrCreateDeviceContext(const DeviceContextKey &device_context_key);
+  // Return the device context of the specified device target.
+  // The difference between this method and 'GetOrCreateDeviceContext' is this method only query device context by
+  // device target(without device id) since MindSpore only supports 'single process, single device'.
+  DeviceContextPtr GetDeviceContext(const std::string &device_target);
+  MultiStreamControllerPtr &GetMultiStreamController(const std::string &device_name);
+  void ClearDeviceContexts();
+  void ChildAfterFork();
+  void WaitTaskFinishOnDevice() const;
+  void SyncAllStreams() const;
+  void UnloadPlugin();
+  std::string GetErrorMsg() const;
+  void BindDeviceCtx() const;
+
+ private:
+  DeviceContextManager() = default;
+  void LoadPlugin();
+  bool SelectGpuPlugin(const std::string &cuda_home, const std::set<std::string> &file_names);
+
+  std::map<std::string, void *> plugin_maps_;
+  bool load_init_;
+  std::string plugin_path_;
+
+  // The string converted from DeviceContextKey -> DeviceContextPtr.
+  std::map<std::string, DeviceContextPtr> device_contexts_;
+  // The name of device -> vector of DeviceContextPtr.
+  std::map<std::string, DeviceContextPtr> backend_to_device_context_;
+  // The name of device -> DeviceContextCreator.
+  std::map<std::string, DeviceContextCreator> device_context_creators_;
+  // record error message of dlopen, print when create device_context failed.
+  std::stringstream dlopen_error_msg_;
+
+  // Since multi device is not supported currently, here use device target type to improve performance.
+  // Device target type : 0, 1, 2, 3, and real device support : 'GPU' 'Ascend' 'CPU'.
+  std::map<std::string, MultiStreamControllerPtr> multi_stream_controllers_;
+};
+
+class HARDWARE_EXPORT DeviceContextRegister {
+ public:
+  DeviceContextRegister(const std::string &device_name, DeviceContextCreator &&runtime_creator) {
+    DeviceContextManager::GetInstance().Register(device_name, std::move(runtime_creator));
+  }
+  ~DeviceContextRegister() = default;
+};
+
+#define MS_REGISTER_DEVICE(DEVICE_NAME, DEVICE_CONTEXT_CLASS)            \
+  static const DeviceContextRegister g_device_##DEVICE_NAME##_reg(       \
+    DEVICE_NAME, [](const DeviceContextKey &device_context_key) {        \
+      return std::make_shared<DEVICE_CONTEXT_CLASS>(device_context_key); \
+    })
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_RUNTIME_HARDWARE_DEVICE_CONTEXT_MANAGER_H_
diff --git a/inferrt/src/hardware/hardware_abstract/device_event.h b/inferrt/src/hardware/hardware_abstract/device_event.h
new file mode 100644
index 0000000000000000000000000000000000000000..224c31bfda6c4e90fb8787d837bb3f64235be296
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/device_event.h
@@ -0,0 +1,47 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CORE_IR_DEVICE_EVENT_H
+#define MINDSPORE_CORE_IR_DEVICE_EVENT_H
+
+#include <memory>
+#include <vector>
+
+namespace mindspore {
+class DeviceEvent {
+ public:
+  virtual ~DeviceEvent() = default;
+  virtual bool IsReady() const = 0;
+  virtual void WaitEvent() = 0;
+  virtual bool WaitEvent(uint32_t stream_id) = 0;
+  virtual void WaitEventWithoutReset() = 0;
+  virtual void WaitEventWithoutReset(uint32_t stream_id) {}
+  virtual void ResetEvent() {}
+  virtual void ResetEvent(uint32_t stream_id) {}
+  virtual void RecordEvent() = 0;
+  virtual void RecordEvent(uint32_t stream_id) = 0;
+  virtual bool NeedWait() = 0;
+  virtual void SyncEvent() = 0;
+  virtual bool QueryEvent() = 0;
+  virtual void ElapsedTime(float *cost_time, const DeviceEvent *other) = 0;
+  virtual bool DestroyEvent() = 0;
+  virtual void set_wait_stream(void *stream) = 0;
+  virtual void set_record_stream(void *stream) = 0;
+};
+using DeviceEventPtr = std::shared_ptr<DeviceEvent>;
+using DeviceEventPtrList = std::vector<DeviceEventPtr>;
+}  // namespace mindspore
+#endif  // MINDSPORE_CORE_IR_DEVICE_EVENT_H
diff --git a/inferrt/src/hardware/hardware_abstract/dlopen_macro.h b/inferrt/src/hardware/hardware_abstract/dlopen_macro.h
new file mode 100644
index 0000000000000000000000000000000000000000..e89786c59809b2fc0824c744762fd94b5679629a
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/dlopen_macro.h
@@ -0,0 +1,184 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_UTILS_DLOPEN_MACRO_H
+#define MINDSPORE_CCSRC_UTILS_DLOPEN_MACRO_H
+
+#ifndef _WIN32
+#include <dlfcn.h>
+#else
+#include <windows.h>
+#undef ERROR
+#undef SM_DEBUG
+#undef Yield
+#endif
+#include <string>
+#include <functional>
+
+#ifndef _WIN32
+#define PORTABLE_EXPORT __attribute__((visibility("default")))
+#else
+#define PORTABLE_EXPORT __declspec(dllexport)
+#endif
+
+constexpr char kSimuSocName[] = "MS_DRY_RUN";
+
+template <typename T>
+struct SimuDataFactory {
+  static T Data() {
+    static T data{};
+    return data;
+  }
+};
+
+template <typename T>
+struct SimuDataFactory<T *> {
+  static T *Data() {
+    static int data{};
+    return reinterpret_cast<T *>(&data);
+  }
+};
+
+template <typename T>
+struct SimuDataFactory<T **> {
+  static T **Data() {
+    static int data{};
+    static T *data_ptr = reinterpret_cast<T *>(&data);
+    return &data_ptr;
+  }
+};
+
+template <typename T>
+struct SimuCreateTypeGetter {
+  typedef T type;
+};
+
+template <typename T>
+struct SimuCreateTypeGetter<T *> {
+  typedef T type;
+};
+
+template <typename T>
+struct SimuCreateTypeGetter<T **> {
+  typedef T *type;
+};
+
+#define PLUGIN_METHOD(name, return_type, ...)                   \
+  extern "C" {                                                  \
+  PORTABLE_EXPORT return_type Plugin##name(__VA_ARGS__);        \
+  }                                                             \
+  constexpr const char *k##name##Name = "Plugin" #name;         \
+  using name##FunObj = std::function<return_type(__VA_ARGS__)>; \
+  using name##FunPtr = return_type (*)(__VA_ARGS__);
+
+#define ORIGIN_METHOD(name, return_type, ...)                   \
+  extern "C" {                                                  \
+  return_type name(__VA_ARGS__);                                \
+  }                                                             \
+  constexpr const char *k##name##Name = #name;                  \
+  using name##FunObj = std::function<return_type(__VA_ARGS__)>; \
+  using name##FunPtr = return_type (*)(__VA_ARGS__);
+
+#define ORIGIN_METHOD_WITH_SIMU(name, return_type, ...) \
+  ORIGIN_METHOD(name, return_type, __VA_ARGS__)         \
+  template <typename T>                                 \
+  inline T SimuFuncI##name(__VA_ARGS__) {               \
+    return SimuDataFactory<T>::Data();                  \
+  }                                                     \
+                                                        \
+  template <>                                           \
+  inline void SimuFuncI##name(__VA_ARGS__) {}           \
+  extern name##FunObj name##_;                          \
+  inline void SimuAssignI##name() { name##_ = SimuFuncI##name<return_type>; }
+
+#define ACLRT_GET_SOC_NAME_WITH_SIMU(name, return_type, ...) \
+  ORIGIN_METHOD(name, return_type, __VA_ARGS__)              \
+  template <typename T>                                      \
+  inline T SimuFuncI##name(__VA_ARGS__) {                    \
+    return kSimuSocName;                                     \
+  }                                                          \
+                                                             \
+  template <>                                                \
+  inline void SimuFuncI##name(__VA_ARGS__) {}                \
+  extern name##FunObj name##_;                               \
+  inline void SimuAssignI##name() { name##_ = SimuFuncI##name<return_type>; }
+
+#define ORIGIN_METHOD_WITH_SIMU_CREATE(name, return_type, create_type_ptr, ...)          \
+  ORIGIN_METHOD(name, return_type, create_type_ptr, ##__VA_ARGS__)                       \
+  template <typename T, typename U>                                                      \
+  inline T SimuFuncI##name(U *in_ret, ##__VA_ARGS__) {                                   \
+    static U st##name{};                                                                 \
+    *in_ret = st##name;                                                                  \
+    T ret{};                                                                             \
+    return ret;                                                                          \
+  }                                                                                      \
+                                                                                         \
+  template <>                                                                            \
+  inline aclError SimuFuncI##name(void **in_ret, ##__VA_ARGS__) {                        \
+    static uintptr_t currentPointer = 0;                                                 \
+    currentPointer += sizeof(void *);                                                    \
+    *in_ret = reinterpret_cast<void *>(currentPointer);                                  \
+    return ACL_SUCCESS;                                                                  \
+  }                                                                                      \
+                                                                                         \
+  template <>                                                                            \
+  inline void SimuFuncI##name(void **in_ret, ##__VA_ARGS__) {                            \
+    static uintptr_t currentPointer = 0;                                                 \
+    currentPointer += sizeof(void *);                                                    \
+    *in_ret = reinterpret_cast<void *>(currentPointer);                                  \
+  }                                                                                      \
+  extern name##FunObj name##_;                                                           \
+  inline void SimuAssignI##name() {                                                      \
+    name##_ = SimuFuncI##name<return_type, SimuCreateTypeGetter<create_type_ptr>::type>; \
+  }
+
+#define ASSIGN_SIMU(name) SimuAssignI##name();
+
+inline static std::string GetDlErrorMsg() {
+#ifndef _WIN32
+  const char *result = dlerror();
+  return (result == nullptr) ? "Unknown" : result;
+#else
+  return std::to_string(GetLastError());
+#endif
+}
+
+template <class T>
+static T DlsymWithCast(void *handle, const char *symbol_name) {
+#ifndef _WIN32
+  T symbol = reinterpret_cast<T>(reinterpret_cast<intptr_t>(dlsym(handle, symbol_name)));
+#else
+  T symbol = reinterpret_cast<T>(GetProcAddress(reinterpret_cast<HINSTANCE__ *>(handle), symbol_name));
+#endif
+  if (symbol == nullptr) {
+    std::abort();
+  }
+  return symbol;
+}
+
+#define DlsymFuncObj(func_name, plugin_handle) DlsymWithCast<func_name##FunPtr>(plugin_handle, k##func_name##Name);
+
+template <class T>
+static T DlsymAscend(void *handle, const char *symbol_name) {
+  T symbol = reinterpret_cast<T>(reinterpret_cast<intptr_t>(dlsym(handle, symbol_name)));
+  if (symbol == nullptr) {
+    std::abort();
+  }
+  return symbol;
+}
+
+#define DlsymAscendFuncObj(func_name, plugin_handle) DlsymAscend<func_name##FunPtr>(plugin_handle, k##func_name##Name)
+#endif  // MINDSPORE_CCSRC_UTILS_DLOPEN_MACRO_H
diff --git a/inferrt/src/hardware/hardware_abstract/memory/abstract_dynamic_mem_pool.cc b/inferrt/src/hardware/hardware_abstract/memory/abstract_dynamic_mem_pool.cc
new file mode 100644
index 0000000000000000000000000000000000000000..108678c2c249ffc37e7694fecdaf094e43593d5e
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/memory/abstract_dynamic_mem_pool.cc
@@ -0,0 +1,1186 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/hardware_abstract/memory/abstract_dynamic_mem_pool.h"
+
+#include <stdio.h>
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <cstdint>
+#include <fstream>
+#include <numeric>
+
+#include "hardware/hardware_abstract/memory/mem_pool_util.h"
+#include "common/logger.h"
+#include "common/common.h"
+
+namespace mindspore {
+namespace device {
+MemBuf::MemBuf(size_t size, void *addr, uint32_t stream_id, MemBlock *mem_block, MemBufStatus status)
+    : prev_(nullptr),
+      next_(nullptr),
+      size_(size),
+      addr_(addr),
+      stream_id_(stream_id),
+      mem_block_(mem_block),
+      status_(status) {}
+
+MemBuf::~MemBuf() {}
+
+MemBufAllocator::~MemBufAllocator() {
+  LOG_OUT << "MemBufAllocator[" << this << "] : " << BriefInfo() << " deconstruct.";
+  for (auto &mem_block : mem_blocks_) {
+    delete mem_block;
+  }
+  mem_blocks_.clear();
+  for (auto mem_buf : free_mem_bufs_) {
+    delete mem_buf;
+  }
+  free_mem_bufs_.clear();
+  for (auto mem_buf : eager_free_mem_bufs_) {
+    delete mem_buf;
+  }
+  eager_free_mem_bufs_.clear();
+  delete search_key_;
+}
+
+void MemBufAllocator::ReleaseDeviceRes() {
+  LOG_OUT << "Release device resource for allocator, " << BriefInfo() << ", mem_blocks_ size : " << mem_blocks_.size()
+          << ".";
+  for (auto mem_block : mem_blocks_) {
+    LOG_OUT << "Clean mem block : " << mem_block->ToJson() << ".";
+    (void)mem_block_cleaner_(mem_block);
+  }
+  for (auto mem_block : mem_blocks_) {
+    LOG_OUT << "Delete mem block : " << mem_block->ToJson() << ".";
+    delete mem_block;
+  }
+  mem_blocks_.clear();
+
+  LOG_OUT << "Free mem buf size : " << free_mem_bufs_.size() << ".";
+  for (auto mem_buf : free_mem_bufs_) {
+    delete mem_buf;
+  }
+  free_mem_bufs_.clear();
+
+  LOG_OUT << "Eager free mem buf size : " << eager_free_mem_bufs_.size() << ".";
+  for (auto mem_buf : eager_free_mem_bufs_) {
+    delete mem_buf;
+  }
+  eager_free_mem_bufs_.clear();
+}
+
+MemBuf *MemBufAllocator::Malloc(size_t size) {
+  // Malloc with expand block first.
+  if (MS_UNLIKELY(mem_blocks_.empty())) {
+    return MallocExpandBlock(size);
+  }
+
+  search_key_->size_ = size;
+  auto it = free_mem_bufs_.lower_bound(search_key_);
+  MemBuf *candidate = nullptr;
+  // 1. Try to find in free mem bufs.
+  if (MS_LIKELY(it != free_mem_bufs_.end())) {
+    candidate = *it;
+    (void)free_mem_bufs_.erase(it);
+    return MapAndSplitMemBuf(candidate, size);
+  }
+  // 2. Try to search available buf, free and eager free buf.
+  candidate = SearchAvailableMemBuf(size);
+  if (MS_UNLIKELY(candidate != nullptr)) {
+    return candidate;
+  }
+  // 3. Try to find in eager free mem bufs.
+  it = eager_free_mem_bufs_.lower_bound(search_key_);
+  if (it != eager_free_mem_bufs_.end()) {
+    candidate = *it;
+    (void)eager_free_mem_bufs_.erase(it);
+    return MapAndSplitMemBuf(candidate, size);
+  }
+
+  return nullptr;
+}
+
+inline MemBuf *MemBufAllocator::SearchAvailableMemBuf(size_t size) {
+  if (!enable_eager_free_ || MS_UNLIKELY(is_customized_)) {
+    return nullptr;
+  }
+  // Search from back to front, because the free mem buf is sorted by size.
+  // More efficient way is to search more candidates, do it in the next version.
+  for (auto backward_it = free_mem_bufs_.rbegin(); backward_it != free_mem_bufs_.rend(); backward_it++) {
+    auto mem_buf = *backward_it;
+    auto next_buf = mem_buf->next_;
+    if (next_buf == nullptr || next_buf->status_ != MemBufStatus::kMemBufEagerFree ||
+        mem_buf->size_ + next_buf->size_ < size) {
+      continue;
+    }
+
+    // Located candidates, try map and split.
+    auto need_map_size = size - mem_buf->size_;
+    auto mapped_size = mem_mapper_(need_map_size, next_buf->addr_);
+    if (mapped_size != need_map_size) {
+      LOG_OUT << "Map mem buf : " << mem_buf->ToJson() << ", next buf : " << next_buf->ToJson() << ", size : " << size
+              << ", need_map_size : " << need_map_size << ", mapped_size : " << mapped_size << " failed.";
+      return nullptr;
+    }
+    // Update mem buf.
+    free_mem_bufs_.erase(mem_buf);
+    mem_buf->size_ = size;
+    mem_buf->status_ = MemBufStatus::kMemBufUsed;
+    // Remove eager free buf and try update it.
+    eager_free_mem_bufs_.erase(next_buf);
+    next_buf->addr_ = static_cast<uint8_t *>(next_buf->addr_) + need_map_size;
+    next_buf->size_ = next_buf->size_ - need_map_size;
+    // If next buf is empty, remove it or update remain eager free mem buf.
+    if (next_buf->size_ == 0) {
+      mem_buf->next_ = next_buf->next_;
+      if (next_buf->next_ != nullptr) {
+        next_buf->next_->prev_ = mem_buf;
+      }
+      delete next_buf;
+    } else {
+      eager_free_mem_bufs_.insert(next_buf);
+    }
+    return mem_buf;
+  }
+  return nullptr;
+}
+
+bool MemBufAllocator::Free(MemBuf *mem_buf, MemBufStatus target_status) {
+  // Change mem buf status to used by event, and wait for event to free.
+  if (MS_UNLIKELY(!mem_buf->IsEventNotUsed())) {
+    mem_buf->status_ = MemBufStatus::kMemBufUsedByEvent;
+    return false;
+  }
+
+  mem_buf->status_ = target_status;
+  // Try to merge from prev.
+  auto prev_buf = mem_buf->prev_;
+  if (MS_LIKELY(prev_buf != nullptr && prev_buf->status_ == target_status)) {
+    // Erase prev buf pointer
+    auto prev = prev_buf->prev_;
+    mem_buf->prev_ = prev;
+    if (prev != nullptr) {
+      prev->next_ = mem_buf;
+    }
+
+    mem_buf->addr_ = prev_buf->addr_;
+    mem_buf->size_ += prev_buf->size_;
+    if (target_status == MemBufStatus::kMemBufIdle) {
+      auto ret = free_mem_bufs_.erase(prev_buf);
+      if (ret == 0) {
+        LOG_ERROR << "Erase mem buf : " << mem_buf->ToJson() << " prev buf " << prev_buf->ToJson() << " failed.";
+      }
+    } else if (target_status == MemBufStatus::kMemBufEagerFree) {
+      auto ret = eager_free_mem_bufs_.erase(prev_buf);
+      if (ret == 0) {
+        LOG_ERROR << "Erase mem buf : " << mem_buf->ToJson() << " prev buf " << prev_buf->ToJson() << " failed.";
+      }
+    }
+    delete prev_buf;
+  }
+  // Try to merge from next.
+  auto next_buf = mem_buf->next_;
+  if (MS_LIKELY(next_buf != nullptr && next_buf->status_ == target_status)) {
+    // Erase next buf pointer
+    auto next = next_buf->next_;
+    mem_buf->next_ = next;
+    if (next != nullptr) {
+      next->prev_ = mem_buf;
+    }
+
+    mem_buf->size_ += next_buf->size_;
+    if (target_status == MemBufStatus::kMemBufIdle) {
+      auto ret = free_mem_bufs_.erase(next_buf);
+      if (ret == 0) {
+        LOG_ERROR << "Erase next buf : " << next_buf->ToJson() << " failed.";
+      }
+    } else if (target_status == MemBufStatus::kMemBufEagerFree) {
+      auto ret = eager_free_mem_bufs_.erase(next_buf);
+      if (ret == 0) {
+        LOG_ERROR << "Erase next buf : " << next_buf->ToJson() << " failed.";
+      }
+    }
+    delete next_buf;
+  }
+
+  if (target_status == MemBufStatus::kMemBufIdle) {
+    (void)free_mem_bufs_.emplace(mem_buf);
+  } else if (target_status == MemBufStatus::kMemBufEagerFree) {
+    (void)eager_free_mem_bufs_.emplace(mem_buf);
+  }
+
+  return true;
+}
+
+MemBuf *MemBufAllocator::MallocExpandBlock(size_t size) {
+  MemBlock *mem_block = ExpandBlock(size);
+  if (mem_block == nullptr) {
+    return nullptr;
+  }
+  MemBuf *candidate = new MemBuf(
+    mem_block->size_, mem_block->addr_, mem_block->stream_id_, mem_block,
+    MS_LIKELY(!is_customized_) && enable_eager_free_ ? MemBufStatus::kMemBufEagerFree : MemBufStatus::kMemBufIdle);
+  if (candidate->size_ < size) {
+    if (candidate->status_ == MemBufStatus::kMemBufIdle) {
+      (void)free_mem_bufs_.emplace(candidate);
+    } else {
+      (void)eager_free_mem_bufs_.emplace(candidate);
+    }
+    LOG_OUT << "Candidate size: " << candidate->size_ << " is less than required size : " << size << ".";
+    return nullptr;
+  }
+
+  return MapAndSplitMemBuf(candidate, size);
+}
+
+void MemBufAllocator::Initialize(size_t size) {
+  LOG_OUT << "Initialize allocator : " << BriefInfo() << " with size : " << size << ".";
+  if (enable_eager_free_ || MS_UNLIKELY(is_customized_)) {
+    LOG_OUT << "Skip initialization of allocator, since vmm is enabled.";
+    return;
+  }
+  MemBlock *mem_block = ExpandBlock(size);
+  if (mem_block == nullptr) {
+    LOG_OUT << "Initialize allocator failed, size : " << size << ".";
+    return;
+  }
+  MemBuf *mem_buf =
+    new MemBuf(mem_block->size_, mem_block->addr_, mem_block->stream_id_, mem_block, MemBufStatus::kMemBufIdle);
+  (void)free_mem_bufs_.emplace(mem_buf);
+}
+
+const std::pair<size_t, size_t> MemBufAllocator::FreeIdleMemsByEagerFree() {
+  // Free all idle mem bufs.
+  size_t eager_free_size = 0;
+  for (auto mem_buf : free_mem_bufs_) {
+    eager_free_size += mem_buf->size_;
+    Free(mem_buf, MemBufStatus::kMemBufEagerFree);
+  }
+  free_mem_bufs_.clear();
+  // Do eager free on eager free mem bufs.
+  size_t real_free_size = 0;
+  for (auto mem_buf : eager_free_mem_bufs_) {
+    LOG_OUT << "Eager free mem buf : " << mem_buf << ", details : " << mem_buf->ToJson() << ".";
+    real_free_size += mem_eager_freer_(mem_buf->addr_, mem_buf->size_);
+  }
+  LOG_OUT << "Free idle mems by eager free, eager_free_size : " << eager_free_size
+          << ", real_free_size : " << real_free_size << ".";
+  return std::make_pair(eager_free_size, real_free_size);
+}
+
+size_t MemBufAllocator::ReleaseFreeBlocks() {
+  size_t release_size = 0;
+  for (auto iter = mem_blocks_.begin(); iter != mem_blocks_.end();) {
+    auto mem_block = *iter;
+    MemBuf mem_buf(mem_block->size_, mem_block->addr_, mem_block->stream_id_, mem_block, MemBufStatus::kMemBufIdle);
+    // Judge if mem block in free mem bufs.
+    auto &&it = free_mem_bufs_.find(&mem_buf);
+    if (it == free_mem_bufs_.end()) {
+      iter++;
+      continue;
+    }
+    auto mem_buf_it = *it;
+    if (mem_buf_it->addr_ == mem_block->addr_ && mem_buf_it->size_ == mem_block->size_) {
+      LOG_OUT << "Release mem block : " << mem_block->ToJson() << ".";
+      bool ret = mem_block_cleaner_(mem_block);
+      if (!ret) {
+        LOG_OUT << "Clean mem block : " << mem_block->ToJson() << " failed.";
+        iter++;
+        continue;
+      }
+      free_mem_bufs_.erase(it);
+      delete mem_buf_it;
+      release_size += mem_block->size_;
+      delete mem_block;
+      iter = mem_blocks_.erase(iter);
+    } else {
+      iter++;
+    }
+  }
+  return release_size;
+}
+
+inline MemBuf *MemBufAllocator::MapAndSplitMemBuf(MemBuf *candidate, size_t size) {
+  size_t remaining_size = candidate->size_ - size;
+  // Mmap memory first.
+  if (candidate->status_ == MemBufStatus::kMemBufEagerFree) {
+    size_t map_size = (remaining_size >= kDynamicMemAlignSize) ? size : candidate->size_;
+    auto mapped_size = mem_mapper_(map_size, candidate->addr_);
+    if (mapped_size != map_size) {
+      LOG_OUT << "Mapped_size : " << mapped_size << " is not equal to required size : " << map_size
+              << ", mem buf info : " << candidate->ToJson() << ".";
+      (void)eager_free_mem_bufs_.emplace(candidate);
+      return nullptr;
+    }
+  }
+
+  bool need_split = remaining_size >= kDynamicMemAlignSize;
+  // Try to split mem buf.
+  if (MS_LIKELY(need_split)) {
+    void *remaining_addr = static_cast<uint8_t *>(candidate->addr_) + size;
+    auto remaining_buf =
+      new MemBuf(remaining_size, remaining_addr, candidate->stream_id_, candidate->mem_block_, candidate->status_);
+
+    auto next = candidate->next_;
+    if (next != nullptr) {
+      next->prev_ = remaining_buf;
+      remaining_buf->next_ = next;
+    }
+    candidate->next_ = remaining_buf;
+    remaining_buf->prev_ = candidate;
+    if (remaining_buf->status_ == MemBufStatus::kMemBufIdle) {
+      (void)free_mem_bufs_.emplace(remaining_buf);
+    } else {
+      (void)eager_free_mem_bufs_.emplace(remaining_buf);
+    }
+
+    // Update candidate size.
+    candidate->size_ = size;
+  }
+
+  candidate->status_ = MemBufStatus::kMemBufUsed;
+  // Update mem block usage.
+  candidate->mem_block_->UpdateBorderAddr(candidate);
+
+  return candidate;
+}
+
+inline MemBlock *MemBufAllocator::ExpandBlock(size_t size) {
+  MemBlock *mem_block = mem_block_expander_(size);
+  if (mem_block == nullptr) {
+    LOG_OUT << "Expand block failed, expand size : " << size << ", memory is not enough.";
+    return nullptr;
+  }
+
+  if (mem_block->size_ < size) {
+    LOG_OUT << "Expand block failed, expand size : " << mem_block->size_ << " is less than require size : " << size
+            << ".";
+  }
+
+  (void)mem_blocks_.emplace_back(mem_block);
+  return mem_block;
+}
+
+AbstractDynamicMemPool::AbstractDynamicMemPool() {}
+
+void AbstractDynamicMemPool::Initialize(size_t init_size, size_t increase_size, size_t max_size) {
+  if (init_size == 0) {
+    LOG_OUT << "Skip initialization of memory pool since init size is not configured.";
+    return;
+  }
+
+  LockGuard lock(lock_);
+  LOG_OUT << "Initialize dynamic memory pool, init size : " << init_size << ", increase size : " << increase_size
+          << ", max size : " << max_size << ".";
+  init_size_ = init_size >> 1;
+  increase_size_ = increase_size;
+  max_size_ = max_size;
+
+  // Do initialization with init size.
+  auto persistent_allocator = GetMemBufAllocator(init_size_, true, kDefaultStreamIndex);
+  persistent_allocator->Initialize(AlignMemorySize(init_size_));
+  auto common_allocator = GetMemBufAllocator(init_size_, false, kDefaultStreamIndex);
+  common_allocator->Initialize(AlignMemorySize(init_size_));
+}
+
+void AbstractDynamicMemPool::ReleaseDeviceRes() {
+  LockGuard lock(lock_);
+  for (const auto &iter : stream_pair_mem_bufs_) {
+    auto size = iter.second.size();
+    LOG_OUT << "Event referred stream_pair_mem_bufs_[" << iter.first.first << "-" << iter.first.second
+            << "], size : " << size << ".";
+  }
+  // Clear map of address to mem buf.
+  for (const auto &iter : addr_mem_buf_allocators_) {
+    auto mem_buf = iter.second.first;
+    delete mem_buf;
+  }
+  addr_mem_buf_allocators_.clear();
+
+  LOG_OUT << "Release device resource for " << GetMemoryPoolType() << " : " << mem_stat_.ToReadableString() << ".";
+  for (const auto &stream_id_allocator : stream_id_allocators_) {
+    const auto &allocator = stream_id_allocator.second;
+    allocator->ReleaseDeviceRes();
+  }
+  for (const auto &customized_allocator : customized_allocators_) {
+    const auto &allocator = customized_allocator.second;
+    allocator->ReleaseDeviceRes();
+  }
+  stream_id_allocators_.clear();
+  customized_allocators_.clear();
+  stream_pair_mem_bufs_.clear();
+  mem_stat_.Reset();
+}
+
+/**
+ * @brief Alloc tensor mem.
+ * Allocation follow steps below:
+ *    1 align size
+ *    2 find from current allocator, if failed transfer to 3
+ *    3 find from another allocator, if failed transfer to 4
+ *    4 do eager free and find from current allocator again, if failed transfer to 5
+ *    5 expand block
+ */
+DeviceMemPtr AbstractDynamicMemPool::AllocTensorMem(size_t size, bool from_persistent_mem, bool, uint32_t stream_id) {
+  size_t align_size = AlignMemorySize(size);
+  LockGuard lock(lock_);
+  auto &&mem_buf_allocator = AllocMemBuf(align_size, from_persistent_mem, stream_id);
+  if (MS_UNLIKELY(mem_buf_allocator.first == nullptr)) {
+    return nullptr;
+  }
+
+  (void)addr_mem_buf_allocators_.emplace(mem_buf_allocator.first->addr_, mem_buf_allocator);
+  return mem_buf_allocator.first->addr_;
+}
+
+/**
+ * @brief Alloc mem buf.
+ * Strategy when vmm is disable:
+ *    Persistent memory:  First malloc form its own pool, if fails, try to malloc from common pool.
+ *    Common memory:  First malloc from its own pool, if fails, it will try to expand the pool.
+ *                    If the expansion fails, try to malloc from persistent pool.
+ */
+inline std::pair<MemBuf *, MemBufAllocator *> AbstractDynamicMemPool::AllocMemBuf(size_t align_size,
+                                                                                  bool from_persistent_mem,
+                                                                                  uint32_t stream_id) {
+  auto allocator = GetMemBufAllocator(align_size, from_persistent_mem, stream_id);
+
+  auto mem_buf = allocator->Malloc(align_size);
+  if (MS_UNLIKELY(mem_buf == nullptr)) {
+    // Enable malloc from another allocator when from_persistent_mem is true and vmm is not enabled.
+    if (!enable_vmm_ && from_persistent_mem && MS_LIKELY(!enable_custom_allocator_)) {
+      auto common_allocator = GetMemBufAllocator(align_size, false, stream_id);
+      mem_buf = common_allocator->Malloc(align_size);
+      allocator = common_allocator;
+    }
+
+    if (MS_UNLIKELY(mem_buf == nullptr)) {
+      if ((enable_vmm_ || IsEnableEagerFree()) && MS_LIKELY(!enable_custom_allocator_)) {
+        WaitPipelineHelper();
+        if (!SyncAllStreams()) {
+          LOG_ERROR << "Sync all streams failed.";
+          return std::make_pair(nullptr, nullptr);
+        }
+        (void)FreeIdleMemsByEagerFree();
+        mem_buf = allocator->Malloc(align_size);
+      }
+      if (MS_UNLIKELY(mem_buf == nullptr)) {
+        mem_buf = allocator->MallocExpandBlock(align_size);
+        if (MS_UNLIKELY(mem_buf == nullptr)) {
+          if (MS_LIKELY(!from_persistent_mem) && MS_LIKELY(!enable_custom_allocator_)) {
+            // Common pool expand block failed, try to malloc from persistent pool.
+            auto persistent_allocator = GetMemBufAllocator(align_size, true, stream_id);
+            mem_buf = persistent_allocator->Malloc(align_size);
+            if (MS_LIKELY(mem_buf != nullptr)) {
+              allocator = persistent_allocator;
+            }
+          }
+
+          if (MS_UNLIKELY(mem_buf == nullptr)) {
+            LOG_OUT << "Alloc tensor mem failed and try to sync all events to release memory.";
+            (void)DoSyncAllEvents();
+            mem_buf = allocator->Malloc(align_size);
+            if (MS_UNLIKELY(mem_buf == nullptr)) {
+              return std::make_pair(nullptr, nullptr);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Update stat.
+  mem_stat_.used_size_ += mem_buf->size_;
+  mem_stat_.UpdatePeakSize(enable_vmm_, GetVmmUsedMemSize());
+  return std::make_pair(mem_buf, allocator);
+}
+
+std::vector<DeviceMemPtr> AbstractDynamicMemPool::AllocContinuousTensorMem(const std::vector<size_t> &size_list,
+                                                                           uint32_t stream_id) {
+  std::vector<DeviceMemPtr> device_addr_list;
+  size_t total_size = std::accumulate(size_list.begin(), size_list.end(), static_cast<size_t>(0));
+  // Pre-alloc the one whole piece memory.
+  auto device_addr = AbstractDynamicMemPool::AllocTensorMem(total_size, false, false, stream_id);
+  if (device_addr == nullptr) {
+    return device_addr_list;
+  }
+
+  (void)device_addr_list.emplace_back(device_addr);
+  if (size_list.size() == 1) {
+    return device_addr_list;
+  }
+
+  // Try to split mem bufs.
+  LockGuard lock(lock_);
+  auto &&it = addr_mem_buf_allocators_.find(device_addr);
+  if (it != addr_mem_buf_allocators_.end()) {
+    auto mem_buf = it->second.first;
+    auto allocator = it->second.second;
+    mem_buf->size_ = size_list[0];
+    MemBuf *prev_mem_buf = mem_buf;
+    void *next_addr = static_cast<uint8_t *>(mem_buf->addr_) + size_list[0];
+    total_size -= size_list[0];
+    for (size_t i = 1; i < size_list.size(); i++) {
+      auto new_mem_buf = new MemBuf(size_list[i], next_addr, stream_id, mem_buf->mem_block_, MemBufStatus::kMemBufUsed);
+      new_mem_buf->Link(prev_mem_buf, prev_mem_buf->next_);
+      (void)addr_mem_buf_allocators_.emplace(new_mem_buf->addr_, std::make_pair(new_mem_buf, allocator));
+      // Update result.
+      (void)device_addr_list.emplace_back(next_addr);
+      // Update next addr and prev mem buf.
+      if (i < size_list.size() - 1) {
+        next_addr = static_cast<uint8_t *>(next_addr) + size_list[i];
+        total_size -= size_list[i];
+        prev_mem_buf = new_mem_buf;
+      } else {
+        // Update last mem buf
+        if (total_size != size_list[i]) {
+          LOG_OUT << "Remain size : " << total_size << " is not equal to last size : " << size_list[i] << ".";
+          new_mem_buf->size_ = total_size;
+        }
+      }
+    }
+  } else {
+    // Unreachable routine.
+    LOG_ERROR << "Find addr : " << device_addr << " failed.";
+  }
+
+  return device_addr_list;
+}
+
+// The main program entry of memory free.
+void AbstractDynamicMemPool::FreeTensorMem(const DeviceMemPtr &device_addr) {
+  LockGuard lock(lock_);
+  (void)DoFreeTensorMem(device_addr);
+}
+
+// The main program entry of memory free.
+bool AbstractDynamicMemPool::DoFreeTensorMem(const DeviceMemPtr &device_addr) {
+  void *addr = device_addr;
+  auto &&it = addr_mem_buf_allocators_.find(device_addr);
+  if (MS_LIKELY(it != addr_mem_buf_allocators_.end())) {
+    auto allocator = it->second.second;
+    auto mem_buf = it->second.first;
+    auto free_size = mem_buf->size_;
+    if (MS_LIKELY(allocator->Free(mem_buf))) {
+      mem_stat_.used_size_ -= free_size;
+      (void)addr_mem_buf_allocators_.erase(it);
+      return true;
+    }
+  } else {
+    // This may be normal case.
+    LOG_OUT << "Free tensor mem failed, can not find address : " << addr << ".";
+  }
+  return false;
+}
+
+inline MemBufAllocator *AbstractDynamicMemPool::GetMemBufAllocator(size_t size, bool from_persistent_mem,
+                                                                   uint32_t stream_id) {
+  // Not use small pool.
+  const AllocatorInfo key{stream_id, from_persistent_mem, false};
+  LOG_OUT << "Get allocator, " << key.ToString() << ".";
+
+  MemBufAllocatorPtr allocator = nullptr;
+
+  auto &&it = stream_id_allocators_.find(key);
+  if (it == stream_id_allocators_.end()) {
+    allocator = GenerateAllocator(key);
+    (void)stream_id_allocators_.emplace(key, allocator);
+  } else {
+    allocator = it->second;
+  }
+  return allocator.get();
+}
+
+// Keep addrs is in free addrs, so here find mem bufs first.
+// And then, traverse keep addrs and spilt candidates.
+void AbstractDynamicMemPool::FreePartTensorMems(const std::vector<DeviceMemPtr> &free_addrs,
+                                                const std::vector<DeviceMemPtr> &keep_addrs,
+                                                const std::vector<size_t> &keep_addr_sizes) {
+  LOG_OUT << "Free part tensor mems.";
+  LockGuard lock(lock_);
+  (void)DoFreePartTensorMems(free_addrs, keep_addrs, keep_addr_sizes);
+}
+
+std::vector<MemBuf *> AbstractDynamicMemPool::DoFreePartTensorMems(const std::vector<DeviceMemPtr> &free_addrs,
+                                                                   const std::vector<DeviceMemPtr> &keep_addrs,
+                                                                   const std::vector<size_t> &keep_addr_sizes) {
+  std::vector<MemBuf *> mem_bufs;
+  std::map<void *, std::pair<MemBuf *, MemBufAllocator *>> candidates;
+  for (const auto &free_addr : free_addrs) {
+    auto &&it = addr_mem_buf_allocators_.find(free_addr);
+    if (it != addr_mem_buf_allocators_.end()) {
+      (void)candidates.emplace(it->first, it->second);
+    } else {
+      // This is illegal routine, but level0 case entered.
+      LOG_OUT << "Find address : " << free_addr << " failed.";
+    }
+  }
+
+  std::set<std::uintptr_t> processed_keep_addrs;
+  for (size_t i = 0; i < keep_addrs.size(); i++) {
+    auto keep_addr = keep_addrs[i];
+    std::uintptr_t keep_addr_to_size = reinterpret_cast<std::uintptr_t>(keep_addr);
+    if (processed_keep_addrs.count(keep_addr_to_size) > 0) {
+      LOG_OUT << "Duplicate keep address : " << keep_addr << ".";
+      continue;
+    }
+    (void)processed_keep_addrs.insert(keep_addr_to_size);
+    auto &&it = candidates.upper_bound(keep_addr);
+    if (it == candidates.begin()) {
+      LOG_OUT << "Locate keep addr : " << keep_addr << " failed.";
+      continue;
+    }
+    auto iter = --it;
+    auto mem_buf = iter->second.first;
+    auto allocator = iter->second.second;
+    std::uintptr_t base_start = reinterpret_cast<std::uintptr_t>(mem_buf->addr_);
+    std::uintptr_t base_end = base_start + mem_buf->size_;
+    std::uintptr_t keep_start = keep_addr_to_size;
+    std::uintptr_t keep_end = keep_start + keep_addr_sizes[i];
+    // Since free part tensor mem may double free keep addr, continue for these keep addrs.
+    if (keep_start >= base_end) {
+      LOG_OUT << "Check range error, base start : " << base_start << ", base end : " << base_end
+              << ", keep start : " << keep_start << ", keep end : " << keep_end << ".";
+      continue;
+    }
+    // Split candidates. If keep start equal to base start, split mem buf into two parts, or three parts.
+    // First construct keep mem buf and set it into addr_mem_buf_allocators_, then process head buf and tail buf.
+    MemBuf *keep_mem_buf = nullptr;
+    if (keep_start == base_start) {
+      keep_mem_buf = mem_buf;
+      keep_mem_buf->size_ = keep_addr_sizes[i];
+      // Remove keep addr since keep start equal to base start, no need to free keep addr any more.
+      (void)candidates.erase(mem_buf->addr_);
+    } else {
+      // Split middle mem buf.
+      keep_mem_buf =
+        new MemBuf(keep_addr_sizes[i], keep_addr, mem_buf->stream_id_, mem_buf->mem_block_, mem_buf->status_);
+      keep_mem_buf->Link(mem_buf, mem_buf->next_);
+      (void)addr_mem_buf_allocators_.emplace(keep_addr, std::make_pair(keep_mem_buf, allocator));
+      std::uintptr_t prev_remain_size = keep_start - base_start;
+      mem_buf->size_ = prev_remain_size;
+    }
+    (void)mem_bufs.emplace_back(keep_mem_buf);
+    LOG_OUT << "keep_mem_buf : " << keep_mem_buf->ToJson() << ".";
+    // Process last mem buf.
+    if (keep_end < base_end) {
+      void *last_addr = static_cast<uint8_t *>(keep_mem_buf->addr_) + keep_mem_buf->size_;
+      auto last_mem_buf = new MemBuf(base_end - keep_end, last_addr, keep_mem_buf->stream_id_, keep_mem_buf->mem_block_,
+                                     mem_buf->status_);
+      last_mem_buf->Link(keep_mem_buf, keep_mem_buf->next_);
+      (void)addr_mem_buf_allocators_.emplace(last_mem_buf->addr_, std::make_pair(last_mem_buf, allocator));
+      if (candidates.count(last_mem_buf->addr_) > 0) {
+        LOG_OUT << "Duplicate address : " << last_mem_buf->addr_ << ".";
+      }
+      LOG_OUT << "last mem buf : " << last_mem_buf->ToJson() << ".";
+      (void)candidates.emplace(last_mem_buf->addr_, std::make_pair(last_mem_buf, allocator));
+    }
+  }
+  for (const auto &candidate : candidates) {
+    auto mem_buf = candidate.second.first;
+    if (!AbstractDynamicMemPool::DoFreeTensorMem(mem_buf->addr_)) {
+      LOG_ERROR << "Free device address failed : " << mem_buf->addr_ << ", mem_buf : " << mem_buf->ToJson() << ".";
+    }
+  }
+  return mem_bufs;
+}
+
+MemBufAllocatorPtr AbstractDynamicMemPool::GenerateAllocator(const AllocatorInfo &allocator_key) {
+  const auto is_persistent = allocator_key.from_persistent_mem;
+  const auto stream_id = allocator_key.stream_id;
+  const auto is_small = allocator_key.use_small_pool;
+
+  LOG_OUT << "Generate allocator, " << allocator_key.ToString() << ".";
+  std::function<MemBlock *(size_t)> mem_block_expander = [&, is_persistent = is_persistent,
+                                                          stream_id = stream_id](size_t size) {
+    size_t block_size = CalMemBlockAllocSize(size, is_persistent);
+    MemBlock *mem_block = nullptr;
+    if (block_size == 0) {
+      LOG_OUT << "Malloc mem block failed, is enable eager free : " << IsEnableEagerFree()
+              << ", is enable vmm : " << IsEnableVmm() << ", size : " << size << ", block size is  0.";
+      return mem_block;
+    }
+    DeviceMemPtr addr = nullptr;
+    size_t alloc_size;
+    LOG_OUT << "Malloc mem block, is enable eager free : " << IsEnableEagerFree()
+            << ", is enable vmm : " << IsEnableVmm() << ", size : " << size << ", block size : " << block_size << ".";
+    if (IsEnableVmm() || IsEnableEagerFree()) {
+      // Virtual address is unlimited.
+      auto eager_free_size = std::max(block_size, static_cast<size_t>(total_mem_size()));
+      alloc_size = AllocDeviceMemByEagerFree(eager_free_size, &addr);
+      mem_stat_.eager_free_size_ += alloc_size;
+    } else {
+      alloc_size = AllocDeviceMem(block_size, &addr);
+      if (alloc_size < block_size) {
+        LOG_OUT << "Alloc device mem failed, alloc size : " << alloc_size << ", block size : " << block_size << ".";
+      }
+    }
+    if (alloc_size == 0) {
+      return mem_block;
+    }
+    mem_stat_.alloc_size_ += alloc_size;
+    mem_block = new MemBlock(alloc_size, addr, stream_id);
+    LOG_OUT << "Malloc mem block : " << mem_block->ToJson() << ".";
+    return mem_block;
+  };
+
+  std::function<bool(MemBlock *)> mem_block_cleaner = [&](MemBlock *mem_block) {
+    mem_stat_.alloc_size_ -= mem_block->size_;
+    // Call free device mem as ascend memory pool would do stat in free operation.
+    return FreeDeviceMem(mem_block->addr_);
+  };
+  std::function<size_t(size_t size, void *addr)> mem_mapper = [&](size_t size, void *addr) {
+    mem_stat_.eager_free_size_ -= size;
+    return MmapDeviceMem(size, addr);
+  };
+  std::function<size_t(void *addr, const size_t size)> mem_eager_freer = [&](void *addr, const size_t size) {
+    LOG_OUT << "Eager free addr : " << addr << ", size : " << size << ".";
+    return FreeDeviceMemByEagerFree(addr, size);
+  };
+
+  return std::make_shared<MemBufAllocator>(mem_block_expander, mem_block_cleaner, mem_mapper, mem_eager_freer,
+                                           IsEnableVmm() || IsEnableEagerFree(), is_persistent, stream_id, is_small);
+}
+
+// Element in vector : <memory_stream_id, addr>
+bool AbstractDynamicMemPool::RecordEvent(int64_t task_id_on_stream, uint32_t user_stream_id,
+                                         const std::vector<std::pair<uint32_t, DeviceMemPtr>> &memory_stream_addresses,
+                                         const DeviceEventPtr &event) {
+  LOG_OUT << "Record event for task id on stream : " << task_id_on_stream << ", user stream id : " << user_stream_id
+          << ".";
+  LockGuard lock(lock_);
+  for (auto &[memory_stream_id, addr] : memory_stream_addresses) {
+    auto &&it = addr_mem_buf_allocators_.find(addr);
+    if (it != addr_mem_buf_allocators_.end()) {
+      auto mem_buf = it->second.first;
+      if (mem_buf->IsEventNotUsed()) {
+        mem_stat_.used_by_event_size_ += mem_buf->size_;
+      }
+      LOG_OUT << "Record event for : " << mem_buf->ToJson() << ".";
+      (void)mem_buf->RecordEvent(task_id_on_stream, user_stream_id, event);
+      (void)stream_pair_mem_bufs_[std::make_pair(user_stream_id, memory_stream_id)].emplace(mem_buf);
+    } else {
+      // Output of somas sub graph may be used by somas sub graph inner node, address may not be kept in mem pool.
+      LOG_OUT << "Unknown address : " << addr << ".";
+    }
+  }
+  return true;
+}
+
+bool AbstractDynamicMemPool::WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id) {
+  LOG_OUT << "Wait event for task id on stream : " << task_id_on_stream << ", user stream id : " << user_stream_id
+          << ", memory stream id : " << memory_stream_id << ".";
+  LockGuard lock(lock_);
+  auto key = std::make_pair(user_stream_id, memory_stream_id);
+  auto iter = stream_pair_mem_bufs_.find(key);
+  if (iter == stream_pair_mem_bufs_.end()) {
+    return false;
+  }
+
+  auto mem_bufs_ = iter->second;
+  for (const auto &mem_buf : mem_bufs_) {
+    LOG_OUT << "Wait event for : " << mem_buf->ToJson() << ".";
+    mem_buf->WaitEvent(task_id_on_stream, user_stream_id);
+    // Remove event and try to free memory.
+    if (mem_buf->IsEventNotUsed()) {
+      mem_stat_.used_by_event_size_ -= mem_buf->size_;
+      // Force clear all mem bufs.
+      for (auto &stream_pair_mem_bufs : stream_pair_mem_bufs_) {
+        (void)stream_pair_mem_bufs.second.erase(mem_buf);
+      }
+      if (mem_buf->status_ == DynamicMemBufStatus::kMemBufUsedByEvent) {
+        (void)DoFreeTensorMem(mem_buf->addr_);
+      }
+    }
+  }
+  return true;
+}
+
+bool AbstractDynamicMemPool::WaitEvent(int64_t task_id_on_stream, uint32_t memory_stream_id) {
+  LOG_OUT << "Wait event for task id on stream : " << task_id_on_stream << ", memory stream id : " << memory_stream_id
+          << ".";
+  LockGuard lock(lock_);
+  for (auto &stream_pair_mem_bufs : stream_pair_mem_bufs_) {
+    const auto &[user_stream, memory_stream] = stream_pair_mem_bufs.first;
+    if (memory_stream != memory_stream_id) {
+      continue;
+    }
+    auto mem_bufs = stream_pair_mem_bufs.second;
+    for (const auto &mem_buf : mem_bufs) {
+      LOG_OUT << "Wait event for : " << mem_buf->ToJson() << ".";
+      mem_buf->WaitEvent(task_id_on_stream, user_stream);
+      // Remove event and try to free memory.
+      if (mem_buf->IsEventNotUsed()) {
+        mem_stat_.used_by_event_size_ -= mem_buf->size_;
+        // Force clear all mem bufs.
+        for (auto &kv : stream_pair_mem_bufs_) {
+          (void)kv.second.erase(mem_buf);
+        }
+        if (mem_buf->status_ == DynamicMemBufStatus::kMemBufUsedByEvent) {
+          (void)DoFreeTensorMem(mem_buf->addr_);
+        }
+      }
+    }
+  }
+  return true;
+}
+
+bool AbstractDynamicMemPool::SyncAllEvents() {
+  LOG_OUT << "Sync all events, stream_pair_addresses_ size : " << stream_pair_mem_bufs_.size() << ".";
+  LockGuard lock(lock_);
+  return DoSyncAllEvents();
+}
+
+bool AbstractDynamicMemPool::DoSyncAllEvents() {
+  if (stream_pair_mem_bufs_.empty()) {
+    return false;
+  }
+
+  std::set<MemBuf *> carry_event_mem_bufs;
+  for (const auto &stream_pair_mem_buf : stream_pair_mem_bufs_) {
+    for (const auto &mem_buf : stream_pair_mem_buf.second) {
+      (void)carry_event_mem_bufs.emplace(mem_buf);
+    }
+  }
+  for (auto &mem_buf : carry_event_mem_bufs) {
+    if (mem_buf->SyncAllEvents() && mem_buf->status_ == DynamicMemBufStatus::kMemBufUsedByEvent) {
+      (void)DoFreeTensorMem(mem_buf->addr_);
+    }
+  }
+
+  stream_pair_mem_bufs_.clear();
+  return true;
+}
+
+size_t AbstractDynamicMemPool::CalMemBlockAllocSize(size_t size, bool from_persistent_mem, bool) {
+  auto device_free_mem_size = free_mem_size();
+  // Make sure available mem is enough.
+  if (device_free_mem_size < size) {
+    LOG_OUT << "Memory not enough: current free memory size[" << device_free_mem_size
+            << "] is smaller than required size[" << size << "].";
+    return 0;
+  }
+  auto unit_size = MemAllocUnitSize(from_persistent_mem);
+  if (device_free_mem_size < unit_size) {
+    LOG_OUT << "Device memory size [" << device_free_mem_size << "] is smaller than unit size [" << unit_size << "].";
+  }
+  // Calculate alloc size.
+  size_t alloc_size = unit_size;
+  if (size > unit_size) {
+    alloc_size = ((size + unit_size - 1) / unit_size) * unit_size;
+  }
+  return std::min(alloc_size, device_free_mem_size);
+}
+
+void AbstractDynamicMemPool::DefragMemory() {
+  LOG_OUT << "Try to defrag memory.";
+  LockGuard lock(lock_);
+
+  if (!enable_vmm_) {
+    LOG_OUT << "Skip defrag memory since vmm is not enabled.";
+    return;
+  }
+
+  if (eager_free_count_ == 0) {
+    LOG_OUT << "Exit defrag memory since eager free count is 0.";
+    return;
+  }
+  if (last_eager_free_count_ == eager_free_count_) {
+    LOG_OUT << "Exit defrag memory since last eager free count equals to eager free count : " << last_eager_free_count_
+            << ".";
+    return;
+  }
+
+  LOG_OUT << "Defrag memory start.";
+  WaitPipelineHelper();
+  if (!SyncAllStreams()) {
+    LOG_ERROR << "Sync all streams failed.";
+    return;
+  }
+  const auto [eager_free_size, real_free_size] = FreeIdleMemsByEagerFree();
+  LOG_OUT << "Defrag memory, eager_free_size : " << eager_free_size << ", real_free_size : " << real_free_size << ".";
+  last_eager_free_count_ = eager_free_count_;
+}
+
+void AbstractDynamicMemPool::WaitPipelineHelper() {
+  if (pipeline_callback_) {
+    lock_.unlock();
+    pipeline_callback_();
+    lock_.lock();
+  }
+}
+
+std::string AbstractDynamicMemPool::DynamicMemPoolStateInfo() const {
+  std::stringstream ss;
+  // Classify mem buf and stat mem buf state info.
+  size_t mem_buf_used_stat[static_cast<int>(memory::mem_pool::MemType::kOther) + 1] = {0};
+  struct AddrComparator {
+    bool operator()(MemBuf *const &left, MemBuf *const &right) const { return left->addr_ < right->addr_; }
+  };
+  std::map<MemBufAllocator *, std::set<MemBuf *, AddrComparator>> allocator_mem_bufs;
+  for (const auto &addr_mem_buf_allocator : addr_mem_buf_allocators_) {
+    const auto allocator = addr_mem_buf_allocator.second.second;
+    const auto mem_buf = addr_mem_buf_allocator.second.first;
+    mem_buf_used_stat[static_cast<int>(mem_buf->alloc_type_)] += mem_buf->size_;
+    auto &mem_bufs = allocator_mem_bufs[allocator];
+    (void)mem_bufs.insert(mem_buf);
+  }
+  for (const auto &[allocator, mem_bufs] : allocator_mem_bufs) {
+    ss << "\tIn used mem buf info for " << allocator->BriefInfo() << ", mem_bufs size : " << mem_bufs.size() << "\n";
+  }
+
+  size_t other_used_size = 0;
+  int start = static_cast<int>(memory::mem_pool::MemType::kGraphOutput);
+  int end = static_cast<int>(memory::mem_pool::MemType::kOther);
+  for (int i = start; i <= end; i++) {
+    other_used_size += mem_buf_used_stat[i];
+  }
+
+  ss << "The dynamic memory pool[" << GetMemoryPoolType() << "] stat info : " << mem_stat_.ToReadableString()
+     << ", actual peak used mem:" << ActualPeakStatistics() / kMBToByte
+     << "M. Weight used size:" << mem_buf_used_stat[static_cast<int>(memory::mem_pool::MemType::kWeight)] / kMBToByte
+     << "M, constant value used size:"
+     << mem_buf_used_stat[static_cast<int>(memory::mem_pool::MemType::kConstantValue)] / kMBToByte
+     << "M, kernel output used size:"
+     << mem_buf_used_stat[static_cast<int>(memory::mem_pool::MemType::kKernel)] / kMBToByte
+     << "M, other used size:" << other_used_size / kMBToByte << "M.\n";
+  return ss.str();
+}
+
+const std::pair<size_t, size_t> AbstractDynamicMemPool::FreeIdleMemsByEagerFree() {
+  if (!IsEnableVmm() && !IsEnableEagerFree()) {
+    LOG_OUT << "FreeIdleMemsByEagerFree is not allowed since vmm is not enabled.";
+    return std::make_pair(0L, 0L);
+  }
+
+  LOG_OUT << "Free idle mems by eager free start, allocator size : " << stream_id_allocators_.size() << ".";
+  eager_free_count_++;
+
+  size_t total_eager_free_size = 0;
+  size_t total_real_free_size = 0;
+  for (auto &stream_id_allocator : stream_id_allocators_) {
+    const auto [eager_free_size, real_free_size] = stream_id_allocator.second->FreeIdleMemsByEagerFree();
+    total_eager_free_size += eager_free_size;
+    total_real_free_size += real_free_size;
+  }
+
+  size_t not_free_size =
+    total_eager_free_size > total_real_free_size ? (total_eager_free_size - total_real_free_size) : 0;
+  if (total_real_free_size >= kGBToByte) {
+    LOG_OUT << "Eager free count : " << eager_free_count_ << ", free memory : " << total_eager_free_size
+            << ", real free : " << total_real_free_size << ", not free : " << not_free_size << ".";
+  } else {
+    LOG_OUT << "Eager free count : " << eager_free_count_ << ", free memory : " << total_eager_free_size
+            << ", real free : " << total_real_free_size << ", not free : " << not_free_size << ".";
+  }
+
+  mem_stat_.eager_free_size_ += total_eager_free_size;
+  return {total_eager_free_size, total_real_free_size};
+}
+
+size_t AbstractDynamicMemPool::ReleaseFreeBlocks() {
+  LOG_OUT << "Release free blocks start.";
+  size_t release_size = 0;
+  for (auto &stream_id_allocator : stream_id_allocators_) {
+    release_size += stream_id_allocator.second->ReleaseFreeBlocks();
+  }
+  LOG_OUT << "Release free blocks size : " << release_size << ".";
+  return release_size;
+}
+
+size_t AbstractDynamicMemPool::ReleaseCustomFreeBlocks() {
+  LOG_OUT << "Release custom free blocks start.";
+  size_t release_size = 0;
+  for (auto &customized_allocator : customized_allocators_) {
+    release_size += customized_allocator.second->ReleaseFreeBlocks();
+  }
+  LOG_OUT << "Release custom free blocks size : " << release_size << ".";
+  return release_size;
+}
+
+// The statistics information.
+size_t AbstractDynamicMemPool::TotalMemStatistics() const {
+  if (IsEnableVmm()) {
+    return GetVmmUsedMemSize() + mem_stat_.custom_alloc_size_;
+  }
+  return mem_stat_.alloc_size_ + mem_stat_.custom_alloc_size_;
+}
+
+size_t AbstractDynamicMemPool::TotalUsedMemStatistics() const { return mem_stat_.used_size_; }
+
+size_t AbstractDynamicMemPool::TotalUsedByEventMemStatistics() const { return mem_stat_.used_by_event_size_; }
+
+size_t AbstractDynamicMemPool::TotalIdleMemStatistics() const { return mem_stat_.IdleSize(); }
+
+size_t AbstractDynamicMemPool::TotalEagerFreeMemStatistics() const { return mem_stat_.eager_free_size_; }
+
+size_t AbstractDynamicMemPool::UsedMemPeakStatistics() const { return mem_stat_.peak_size_; }
+
+size_t AbstractDynamicMemPool::MaxMemAllocatedStatistics() const { return mem_stat_.iter_used_peak_size_; }
+
+size_t AbstractDynamicMemPool::MaxMemReservedStatistics() const { return mem_stat_.iter_alloc_peak_size_; }
+
+size_t AbstractDynamicMemPool::ActualPeakStatistics() const {
+  if (IsEnableVmm()) {
+    return GetVmmUsedMemSize() + mem_stat_.custom_alloc_size_;
+  }
+
+  size_t peak_size = 0;
+  for (auto &stream_id_allocator : stream_id_allocators_) {
+    peak_size += stream_id_allocator.second->ActualPeakSize();
+  }
+  for (auto &customized_allocator : customized_allocators_) {
+    peak_size += customized_allocator.second->ActualPeakSize();
+  }
+  return peak_size;
+}
+
+std::unordered_map<std::string, std::size_t> AbstractDynamicMemPool::BlockCountsStatistics() const {
+  LockGuard lock(lock_);
+  size_t persistent_block_count = 0;
+  size_t common_block_count = 0;
+  for (const auto &[allocator_info, allocator_ptr] : stream_id_allocators_) {
+    if (allocator_info.from_persistent_mem) {
+      persistent_block_count += allocator_ptr->mem_blocks_.size();
+    } else {
+      common_block_count += allocator_ptr->mem_blocks_.size();
+    }
+  }
+  std::unordered_map<std::string, size_t> block_counts;
+  block_counts[kPersistentMemPoolType] = persistent_block_count;
+  block_counts[kCommonMemPoolType] = common_block_count;
+  return block_counts;
+}
+
+std::unordered_map<std::string, std::size_t> AbstractDynamicMemPool::BlockUnitSizeStatistics() const {
+  LockGuard lock(lock_);
+  std::unordered_map<std::string, size_t> block_units;
+  block_units[kPersistentMemPoolType] = persist_unit_size_;
+  block_units[kCommonMemPoolType] = common_unit_size_;
+  return block_units;
+}
+
+std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>
+AbstractDynamicMemPool::CommonMemBlocksInfoStatistics() const {
+  LockGuard lock(lock_);
+  std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>> block_infos;
+  for (const auto &[allocator_info, allocator_ptr] : stream_id_allocators_) {
+    if (!allocator_info.from_persistent_mem) {
+      const auto &mem_blocks = allocator_ptr->mem_blocks_;
+      for (const auto mem_block : mem_blocks) {
+        std::unordered_map<std::string, size_t> block_info;
+        block_info[kBlockMemorySize] = mem_block->size_;
+        block_info[kBlockStreamId] = mem_block->stream_id_;
+        block_infos[(std::string *)(mem_block->addr_)] = block_info;
+      }
+    }
+  }
+  return block_infos;
+}
+
+std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>
+AbstractDynamicMemPool::PersistentMemBlocksInfoStatistics() const {
+  LockGuard lock(lock_);
+  std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>> block_infos;
+  for (const auto &[allocator_info, allocator_ptr] : stream_id_allocators_) {
+    if (allocator_info.from_persistent_mem) {
+      const auto &mem_blocks = allocator_ptr->mem_blocks_;
+      for (const auto mem_block : mem_blocks) {
+        std::unordered_map<std::string, size_t> block_info;
+        block_info[kBlockMemorySize] = mem_block->size_;
+        block_info[kBlockStreamId] = mem_block->stream_id_;
+        block_infos[(std::string *)(mem_block->addr_)] = block_info;
+      }
+    }
+  }
+  return block_infos;
+}
+
+void AbstractDynamicMemPool::ResetMaxMemReserved() {
+  LockGuard lock(lock_);
+  mem_stat_.iter_alloc_peak_size_ = IsEnableVmm() ? GetVmmUsedMemSize() + mem_stat_.custom_alloc_size_
+                                                  : mem_stat_.alloc_size_ + mem_stat_.custom_alloc_size_;
+}
+
+void AbstractDynamicMemPool::ResetMaxMemAllocated() {
+  LockGuard lock(lock_);
+  mem_stat_.iter_used_peak_size_ = mem_stat_.used_size_;
+}
+
+AbstractEnhancedDynamicMemPool::AbstractEnhancedDynamicMemPool() {}
+
+void AbstractEnhancedDynamicMemPool::ReportMemoryPoolInfo() {
+  // Report memory data to profiler.
+  if (memory_profiler_callback_) {
+    memory_profiler_callback_();
+  }
+}
+
+void AbstractEnhancedDynamicMemPool::ReportMemoryPoolMallocInfoToMstx(void *addr, size_t size) {
+  if (memory_malloc_mstx_callback_) {
+    memory_malloc_mstx_callback_(addr, size);
+  }
+}
+
+void AbstractEnhancedDynamicMemPool::ReportMemoryPoolFreeInfoToMstx(void *addr) {
+  if (memory_free_mstx_callback_) {
+    memory_free_mstx_callback_(addr);
+  }
+}
+
+MemoryTimeEventPtr AbstractEnhancedDynamicMemPool::GenAllocateMemoryTimeEvent(const void *addr, size_t size,
+                                                                              uint32_t stream_id, bool from_persistent,
+                                                                              bool is_persistent) {
+  auto time_event = std::make_shared<MemoryTimeEvent>();
+  time_event->created_at_ = static_cast<uint64_t>(
+    std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch())
+      .count());
+  time_event->addr_ = const_cast<void *>(addr);
+  time_event->size_ = size;
+  time_event->from_persistent_ = static_cast<uint8_t>(from_persistent);
+  time_event->is_persistent_ = static_cast<uint8_t>(is_persistent);
+  time_event->stream_id_ = stream_id;
+  time_event->run_mode_ = DynamicMemAllocatorDebugInfo::GetDebugInfo().run_mode_;
+  time_event->used_size_ = mem_stat_.used_size_;
+  time_event->peak_size_ = mem_stat_.peak_size_;
+  time_event->alloc_size_ = TotalMemStatistics();
+  time_event->used_by_event_size_ = mem_stat_.used_by_event_size_;
+  time_event->eager_free_size_ = mem_stat_.eager_free_size_;
+  time_event->owner_ = DynamicMemAllocatorDebugInfo::GetDebugInfo().name_;
+  time_event->alloc_type_ = static_cast<uint8_t>(DynamicMemAllocatorDebugInfo::GetDebugInfo().type_);
+  return time_event;
+}
+
+MemoryTimeEventPtr AbstractEnhancedDynamicMemPool::GenFreeMemoryTimeEvent(const void *addr) {
+  auto time_event = std::make_shared<MemoryTimeEvent>();
+  time_event->created_at_ = static_cast<uint64_t>(
+    std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch())
+      .count());
+  time_event->addr_ = const_cast<void *>(addr);
+  const size_t time_event_free_size = -1;
+  time_event->size_ = time_event_free_size;
+  time_event->used_size_ = mem_stat_.used_size_;
+  time_event->peak_size_ = mem_stat_.peak_size_;
+  time_event->alloc_size_ = TotalMemStatistics();
+  time_event->used_by_event_size_ = mem_stat_.used_by_event_size_;
+  time_event->eager_free_size_ = mem_stat_.eager_free_size_;
+  return time_event;
+}
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/hardware_abstract/memory/abstract_dynamic_mem_pool.h b/inferrt/src/hardware/hardware_abstract/memory/abstract_dynamic_mem_pool.h
new file mode 100644
index 0000000000000000000000000000000000000000..6833d48a8a12ed23a04fdd89b8bad83dcf338ae4
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/memory/abstract_dynamic_mem_pool.h
@@ -0,0 +1,497 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_ABSTRACT_DYNAMIC_MEM_POOL_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_ABSTRACT_DYNAMIC_MEM_POOL_H_
+
+#include <algorithm>
+#include <functional>
+#include <list>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "hardware/hardware_abstract/memory/dynamic_mem_pool.h"
+#include "hardware/hardware_abstract/visible.h"
+#include "hardware/hardware_abstract/stream_util.h"
+
+namespace mindspore {
+namespace device {
+constexpr size_t kDecimalPrecision = 3;
+// largest allocation size for small pool is 1 MB
+constexpr size_t kSmallSize = 1048576;
+
+struct HARDWARE_EXPORT MemBlock;
+
+using MemBufStatus = DynamicMemBufStatus;
+struct HARDWARE_EXPORT MemBuf : EventBase {
+  explicit MemBuf(size_t size, void *addr, uint32_t stream_id, MemBlock *mem_block, MemBufStatus status);
+
+  MemBuf() = delete;
+  MemBuf(const MemBuf &) = delete;
+  MemBuf &operator=(const MemBuf &) = delete;
+
+  ~MemBuf();
+
+  inline void Link(MemBuf *prev, MemBuf *next) {
+    if (prev != nullptr) {
+      prev->next_ = this;
+      this->prev_ = prev;
+    }
+    if (next != nullptr) {
+      next->prev_ = this;
+      this->next_ = next;
+    }
+  }
+
+  inline void Unlink() {
+    if (prev_ != nullptr) {
+      prev_->next_ = next_;
+    }
+    if (next_ != nullptr) {
+      next_->prev_ = prev_;
+    }
+    prev_ = nullptr;
+    next_ = nullptr;
+  }
+
+  inline void SetDebugInfo() {
+    owner_name_ = DynamicMemAllocatorDebugInfo::GetDebugInfo().name_;
+    alloc_type_ = DynamicMemAllocatorDebugInfo::GetDebugInfo().type_;
+  }
+
+  std::string ToJson() {
+    JsonBuilder builder;
+    builder.Append("addr_", addr_);
+    builder.Append("size_", size_);
+    builder.Append("stream_id_", stream_id_);
+    builder.Append("status_", DynamicMemBufStatusToString(status_));
+    builder.Append("owner_name_", owner_name_);
+    return builder.ToString();
+  }
+
+  MemBuf *prev_;
+  MemBuf *next_;
+
+  size_t size_;
+  void *addr_;
+  uint32_t stream_id_;
+  MemBlock *mem_block_;
+  MemBufStatus status_;
+  memory::mem_pool::MemType alloc_type_{memory::mem_pool::MemType::kOther};
+  std::string owner_name_;
+};
+
+struct MemBufComparator {
+  bool operator()(MemBuf *const &left, MemBuf *const &right) const {
+    return (left->size_ != right->size_) ? left->size_ < right->size_ : left->addr_ < right->addr_;
+  }
+};
+
+struct HARDWARE_EXPORT MemBlock {
+  explicit MemBlock(size_t size, void *addr, uint32_t stream_id) : size_(size), addr_(addr), stream_id_(stream_id) {
+    min_addr_ = nullptr;
+    max_addr_ = nullptr;
+  }
+
+  MemBlock() = delete;
+  MemBlock(const MemBlock &) = delete;
+  MemBlock &operator=(const MemBlock &) = delete;
+
+  ~MemBlock() = default;
+
+  inline void UpdateBorderAddr(MemBuf *mem_buf) {
+    if (min_addr_ == nullptr) {
+      min_addr_ = mem_buf->addr_;
+    } else {
+      min_addr_ = std::min(min_addr_, mem_buf->addr_);
+    }
+    void *right_addr = static_cast<uint8_t *>(mem_buf->addr_) + mem_buf->size_;
+    max_addr_ = std::max(max_addr_, right_addr);
+  }
+
+  inline size_t ActualPeakSize() {
+    if (min_addr_ == nullptr || max_addr_ == nullptr) {
+      return 0;
+    }
+    return static_cast<uint8_t *>(max_addr_) - static_cast<uint8_t *>(min_addr_);
+  }
+
+  std::string ToJson() {
+    JsonBuilder builder;
+    builder.Append("addr_", addr_);
+    builder.Append("size_", size_);
+    builder.Append("stream_id_", stream_id_);
+    builder.Append("min_addr_", min_addr_);
+    builder.Append("max_addr_", max_addr_);
+    return builder.ToString();
+  }
+
+  size_t size_;
+  void *addr_;
+  uint32_t stream_id_;
+
+  void *min_addr_;
+  void *max_addr_;
+};
+
+struct HARDWARE_EXPORT MemStat {
+  MemStat() { Reset(); }
+
+  MemStat(const MemStat &) = delete;
+  MemStat &operator=(const MemStat &) = delete;
+
+  void Reset() {
+    used_size_ = 0;
+    peak_size_ = 0;
+    alloc_size_ = 0;
+    custom_alloc_size_ = 0;
+
+    used_by_event_size_ = 0;
+    eager_free_size_ = 0;
+
+    iter_used_peak_size_ = 0;
+    iter_alloc_peak_size_ = 0;
+  }
+
+  inline size_t IdleSize() const { return alloc_size_ + custom_alloc_size_ - used_size_; }
+
+  inline void UpdatePeakSize(const bool is_enable_vmm, size_t vmm_used_mem_size) {
+    peak_size_ = std::max(peak_size_, used_size_);
+    iter_used_peak_size_ = std::max(iter_used_peak_size_, used_size_);
+    if (is_enable_vmm) {
+      iter_alloc_peak_size_ = std::max(iter_alloc_peak_size_, vmm_used_mem_size + custom_alloc_size_);
+    } else {
+      iter_alloc_peak_size_ = std::max(iter_alloc_peak_size_, alloc_size_ + custom_alloc_size_);
+    }
+  }
+
+  std::string ToJson() const {
+    JsonBuilder builder;
+    builder.Append("used_size_", used_size_);
+    builder.Append("peak_size_", peak_size_);
+    builder.Append("alloc_size_", alloc_size_);
+    builder.Append("idle_size_", IdleSize());
+    builder.Append("used_by_event_size_", used_by_event_size_);
+    builder.Append("eager_free_size_", eager_free_size_);
+    return builder.ToString();
+  }
+
+  std::string ToReadableString() const {
+    JsonBuilder builder;
+    builder.Append("in used mem", Format(used_size_));
+    builder.Append("peak used mem", Format(peak_size_));
+    builder.Append("alloc mem", Format(alloc_size_));
+    builder.Append("idle mem", Format(IdleSize()));
+    builder.Append("used by event mem", Format(used_by_event_size_));
+    builder.Append("eager free mem", Format(eager_free_size_));
+    return builder.ToString();
+  }
+
+  std::string Format(size_t size) const {
+    auto str = std::to_string(size * 1.0 / kMBToByte);
+    return str.substr(0, str.find(".") + kDecimalPrecision) + "MB";
+  }
+
+  size_t used_size_;
+  size_t peak_size_;
+  size_t alloc_size_;
+  size_t custom_alloc_size_;
+
+  size_t used_by_event_size_;
+  size_t eager_free_size_;
+
+  size_t iter_used_peak_size_;
+  size_t iter_alloc_peak_size_;
+};
+
+struct AllocatorInfo {
+  uint32_t stream_id = 0;
+  bool from_persistent_mem = false;
+  bool use_small_pool = false;
+
+  bool operator<(const AllocatorInfo &other) const {
+    if (stream_id != other.stream_id) {
+      return stream_id < other.stream_id;
+    }
+    if (from_persistent_mem != other.from_persistent_mem) {
+      return other.from_persistent_mem;
+    }
+    if (use_small_pool != other.use_small_pool) {
+      return other.use_small_pool;
+    }
+    return false;
+  }
+
+  std::string ToString() const {
+    std::ostringstream oss;
+    oss << "stream id: " << stream_id << ", is persistent: " << from_persistent_mem
+        << ", use small pool: " << use_small_pool;
+    return oss.str();
+  }
+};
+
+class AbstractDynamicMemPool;
+
+class HARDWARE_EXPORT MemBufAllocator {
+ public:
+  explicit MemBufAllocator(std::function<MemBlock *(size_t)> mem_block_expander,
+                           std::function<bool(MemBlock *)> mem_block_cleaner,
+                           std::function<size_t(size_t size, void *addr)> mem_mapper,
+                           std::function<size_t(void *addr, size_t size)> mem_eager_freer, bool enable_eager_free,
+                           bool is_persistent, uint32_t stream_id, bool is_small, bool is_customized = false)
+      : mem_block_expander_(mem_block_expander),
+        mem_block_cleaner_(mem_block_cleaner),
+        mem_mapper_(mem_mapper),
+        mem_eager_freer_(mem_eager_freer),
+        stream_id_(stream_id),
+        enable_eager_free_(enable_eager_free),
+        is_persistent_(is_persistent),
+        is_small_(is_small),
+        is_customized_(is_customized) {
+    search_key_ = new MemBuf(0, nullptr, 0, nullptr, MemBufStatus::kMemBufIdle);
+  }
+
+  MemBufAllocator() = delete;
+  MemBufAllocator(const MemBufAllocator &) = delete;
+  MemBufAllocator &operator=(const MemBufAllocator &) = delete;
+
+  ~MemBufAllocator();
+
+  void Initialize(size_t size);
+  void ReleaseDeviceRes();
+
+  MemBuf *Malloc(size_t size);
+  MemBuf *SearchAvailableMemBuf(size_t size);
+  bool Free(MemBuf *mem_buf, MemBufStatus target_status = MemBufStatus::kMemBufIdle);
+  MemBuf *MallocExpandBlock(size_t size);
+  const std::pair<size_t, size_t> FreeIdleMemsByEagerFree();
+
+  size_t ReleaseFreeBlocks();
+
+  size_t ActualPeakSize() const {
+    size_t peak_size = 0;
+    for (auto mem_block : mem_blocks_) {
+      peak_size += mem_block->ActualPeakSize();
+    }
+    return peak_size;
+  }
+
+  std::string BriefInfo() const {
+    std::stringstream ss;
+    ss << "Mem buf allocator, enable vmm : " << enable_eager_free_ << ", is persistent : " << is_persistent_
+       << ", stream id : " << stream_id_ << ", is small : " << is_small_ << ", is customized : " << is_customized_
+       << ".";
+    return ss.str();
+  }
+
+  uint32_t stream_id() const { return stream_id_; }
+  bool is_persistent() const { return is_persistent_; }
+  bool is_small() const { return is_small_; }
+#ifndef ENABLE_TEST
+
+ protected:
+#endif
+  MemBuf *MapAndSplitMemBuf(MemBuf *candidate, size_t size);
+  MemBlock *ExpandBlock(size_t size);
+
+  std::function<MemBlock *(size_t)> mem_block_expander_;
+  std::function<bool(MemBlock *)> mem_block_cleaner_;
+  std::function<size_t(size_t size, void *addr)> mem_mapper_;
+  std::function<size_t(void *addr, size_t size)> mem_eager_freer_;
+
+  std::list<MemBlock *> mem_blocks_;
+  using MemAllocator = memory::mem_pool::PooledAllocator<MemBuf *>;
+  std::set<MemBuf *, MemBufComparator, MemAllocator> free_mem_bufs_;
+  std::set<MemBuf *, MemBufComparator, MemAllocator> eager_free_mem_bufs_;
+
+ private:
+  MemBuf *search_key_;
+
+  uint32_t stream_id_;
+  bool enable_eager_free_;
+  bool is_persistent_;
+  bool is_small_;
+  bool is_customized_;
+
+  friend AbstractDynamicMemPool;
+};
+using MemBufAllocatorPtr = std::shared_ptr<MemBufAllocator>;
+
+using Lock = memory::mem_pool::Lock;
+using LockGuard = memory::mem_pool::LockGuard;
+class HARDWARE_EXPORT AbstractDynamicMemPool : virtual public DynamicMemPool {
+ public:
+  AbstractDynamicMemPool();
+  ~AbstractDynamicMemPool() override = default;
+
+  void Initialize(size_t init_size, size_t increase_size, size_t max_size) override;
+
+  void ReleaseDeviceRes() override;
+
+  // The main program entry of memory alloc.
+  DeviceMemPtr AllocTensorMem(size_t size, bool from_persistent_mem = false, bool need_recycle = false,
+                              uint32_t stream_id = kDefaultStreamIndex) override;
+
+  // Alloc mem buf from mem pool, return mem buf and its allocator
+  std::pair<MemBuf *, MemBufAllocator *> AllocMemBuf(size_t align_size, bool from_persistent_mem = false,
+                                                     uint32_t stream_id = kDefaultStreamIndex);
+
+  // The main program entry of continuous memory alloc.
+  std::vector<DeviceMemPtr> AllocContinuousTensorMem(const std::vector<size_t> &size_list,
+                                                     uint32_t stream_id = kDefaultStreamIndex) override;
+  // The main program entry of memory free.
+  void FreeTensorMem(const DeviceMemPtr &device_addr) override;
+  bool DoFreeTensorMem(const DeviceMemPtr &device_addr) override;
+  // The main program entry of part memory free and part memory keep.
+  void FreePartTensorMems(const std::vector<DeviceMemPtr> &free_addrs, const std::vector<DeviceMemPtr> &keep_addrs,
+                          const std::vector<size_t> &keep_addr_sizes) override;
+  virtual std::vector<MemBuf *> DoFreePartTensorMems(const std::vector<DeviceMemPtr> &free_addrs,
+                                                     const std::vector<DeviceMemPtr> &keep_addrs,
+                                                     const std::vector<size_t> &keep_addr_sizes);
+
+  // Element in vector : memory_stream_id, address
+  bool RecordEvent(int64_t task_id_on_stream, uint32_t user_stream_id,
+                   const std::vector<std::pair<uint32_t, DeviceMemPtr>> &memory_stream_addresses,
+                   const DeviceEventPtr &event) override;
+  bool WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id) override;
+  bool WaitEvent(int64_t task_id_on_stream, uint32_t memory_stream_id) override;
+  bool SyncAllEvents() override;
+  bool DoSyncAllEvents();
+
+  size_t CalMemBlockAllocSize(size_t size, bool from_persistent_mem, bool need_recycle = false) override;
+  void SetMemAllocUintSize(size_t common_size, size_t persist_size = kDynamicMemAllocUnitSize) override {
+    common_unit_size_ = common_size;
+    persist_unit_size_ = persist_size;
+  }
+  size_t MemAllocUnitSize(bool from_persistent_mem = false) const override {
+    return from_persistent_mem ? persist_unit_size_ : common_unit_size_;
+  }
+
+  void DefragMemory() override;
+
+  std::string DynamicMemPoolStateInfo() const;
+
+  // The statistics information.
+  size_t TotalMemStatistics() const override;
+  size_t TotalUsedMemStatistics() const override;
+  size_t TotalUsedByEventMemStatistics() const override;
+  size_t TotalIdleMemStatistics() const override;
+  size_t TotalEagerFreeMemStatistics() const override;
+  size_t UsedMemPeakStatistics() const override;
+  size_t MaxMemAllocatedStatistics() const override;
+  size_t MaxMemReservedStatistics() const override;
+  size_t ActualPeakStatistics() const override;
+  std::unordered_map<std::string, std::size_t> BlockCountsStatistics() const override;
+  std::unordered_map<std::string, std::size_t> BlockUnitSizeStatistics() const override;
+  std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>> CommonMemBlocksInfoStatistics()
+    const override;
+  std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>> PersistentMemBlocksInfoStatistics()
+    const override;
+  void ResetMaxMemReserved() override;
+  void ResetMaxMemAllocated() override;
+
+  const bool IsEnableVmm() const override { return enable_vmm_; }
+
+  void SetEnableVmm(bool enable_vmm) override { enable_vmm_ = enable_vmm; }
+
+  // Get method for proxy.
+  std::unordered_map<void *, std::pair<MemBuf *, MemBufAllocator *>> &addr_mem_buf_allocators() {
+    return addr_mem_buf_allocators_;
+  }
+
+  std::unordered_map<std::pair<uint32_t, uint32_t>, std::set<MemBuf *>, pair_hash> &stream_pair_mem_bufs() {
+    return stream_pair_mem_bufs_;
+  }
+
+  const std::pair<size_t, size_t> FreeIdleMemsByEagerFree() override;
+
+  size_t ReleaseFreeBlocks() override;
+  size_t ReleaseCustomFreeBlocks();
+
+  MemStat &mem_stat() { return mem_stat_; }
+
+  Lock &lock() { return lock_; }
+
+ protected:
+  void WaitPipelineHelper();
+
+  MemBufAllocatorPtr GenerateAllocator(const AllocatorInfo &allocator_key);
+  MemBufAllocator *GetMemBufAllocator(size_t size, bool from_persistent_mem, uint32_t stream_id);
+#ifndef ENABLE_TEST
+
+ protected:
+#else
+
+ public:
+#endif
+  std::map<AllocatorInfo, MemBufAllocatorPtr> stream_id_allocators_;
+  std::unordered_map<void *, std::pair<MemBuf *, MemBufAllocator *>> addr_mem_buf_allocators_;
+  std::unordered_map<std::pair<uint32_t, uint32_t>, std::set<MemBuf *>, pair_hash> stream_pair_mem_bufs_;
+  std::map<uint32_t, MemBufAllocatorPtr> customized_allocators_;
+  MemStat mem_stat_;
+
+  bool enable_vmm_{false};
+  bool enable_custom_allocator_{false};
+  std::function<MallocFuncType> custom_alloc_fn_;
+  std::function<FreeFuncType> custom_free_fn_;
+  size_t common_unit_size_{kDynamicMemAllocUnitSize};
+  size_t persist_unit_size_{kDynamicMemAllocUnitSize};
+
+  size_t eager_free_count_{0};
+  size_t last_eager_free_count_{0};
+  Lock lock_;
+
+  // init_size_ is for persistent and common.
+  size_t init_size_{kDynamicMemAllocUnitSize};
+  size_t increase_size_{kDynamicMemAllocUnitSize};
+  // Not enable currently.
+  size_t max_size_{0};
+
+  bool enable_dump_memory_{false};
+};
+
+class HARDWARE_EXPORT AbstractEnhancedDynamicMemPool : public AbstractDynamicMemPool {
+ public:
+  AbstractEnhancedDynamicMemPool();
+  AbstractEnhancedDynamicMemPool(const AbstractEnhancedDynamicMemPool &) = delete;
+  AbstractEnhancedDynamicMemPool &operator=(const AbstractEnhancedDynamicMemPool &) = delete;
+  ~AbstractEnhancedDynamicMemPool() override = default;
+
+  // Report memory pool stat info for enhanced processing.
+  virtual void ReportMemoryPoolInfo();
+  // Report memory pool stat info for mstx
+  virtual void ReportMemoryPoolMallocInfoToMstx(void *ptr, size_t size);
+  virtual void ReportMemoryPoolFreeInfoToMstx(void *ptr);
+  bool IsEnableTimeEvent() override { return enable_time_event_; }
+
+  void SetEnableTimeEvent(bool enable_time_event) override { enable_time_event_ = enable_time_event; }
+
+  virtual MemoryTimeEventPtr GenAllocateMemoryTimeEvent(const void *addr, size_t size, uint32_t stream_id,
+                                                        bool from_persistent, bool is_persistent);
+
+  virtual MemoryTimeEventPtr GenFreeMemoryTimeEvent(const void *addr);
+
+ private:
+  std::atomic<bool> enable_time_event_{false};
+};
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_ABSTRACT_DYNAMIC_MEM_POOL_H_
diff --git a/inferrt/src/hardware/hardware_abstract/memory/dynamic_mem_pool.cc b/inferrt/src/hardware/hardware_abstract/memory/dynamic_mem_pool.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b817c5db3d0e4e161c0f3b6d2f0a390c37ef38aa
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/memory/dynamic_mem_pool.cc
@@ -0,0 +1,122 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/hardware_abstract/memory/dynamic_mem_pool.h"
+
+#include <numeric>
+#include <ostream>
+#include "common/logger.h"
+
+namespace mindspore {
+namespace device {
+static thread_local AllocatorDebugInfo debug_info_;
+
+AllocatorDebugInfo &DynamicMemAllocatorDebugInfo::GetDebugInfo() noexcept { return debug_info_; }
+
+// Set the debug info when memory alloc.
+void DynamicMemAllocatorDebugInfo::SetDebugInfo(const std::string &name, memory::mem_pool::MemType type,
+                                                int input_index, int output_index, uint8_t run_mode) {
+  debug_info_.name_ = name;
+  debug_info_.type_ = type;
+  debug_info_.input_index_ = input_index;
+  debug_info_.output_index_ = output_index;
+  debug_info_.run_mode_ = run_mode;
+}
+
+static const std::map<DynamicMemBufStatus, std::string> kBufStatusString = {
+  {DynamicMemBufStatus::kMemBufIdle, "idle"},
+  {DynamicMemBufStatus::kMemBufUsed, "used"},
+  {DynamicMemBufStatus::kMemBufEagerFree, "eager_free"},
+  {DynamicMemBufStatus::kMemBufUsedByEvent, "used_by_event"}};
+
+const std::string &DynamicMemBufStatusToString(DynamicMemBufStatus status) { return kBufStatusString.at(status); }
+
+bool EventBase::RecordEvent(int64_t task_id_on_stream, uint32_t user_stream_id, const DeviceEventPtr &event) {
+  if (event == nullptr) {
+    LOG_ERROR << "Event is null.";
+  }
+  if (events_ == nullptr) {
+    events_ = std::make_shared<std::unordered_map<uint32_t, std::shared_ptr<std::list<TaskIdOnStreamEvent>>>>();
+  }
+  std::shared_ptr<std::list<TaskIdOnStreamEvent>> event_list = nullptr;
+  auto iter = events_->find(user_stream_id);
+  if (iter == events_->end()) {
+    event_list = std::make_shared<std::list<TaskIdOnStreamEvent>>();
+    (void)events_->emplace(user_stream_id, event_list);
+  } else {
+    event_list = iter->second;
+    if (event_list == nullptr) {
+      LOG_ERROR << "Event list is null.";
+    }
+  }
+  (void)event_list->emplace_back(task_id_on_stream, event);
+  return true;
+}
+
+bool EventBase::WaitEvent(uint32_t task_id_on_stream, uint32_t user_stream_id) {
+  if (events_ == nullptr) {
+    return false;
+  }
+  auto iter = events_->find(user_stream_id);
+  if (iter == events_->end()) {
+    return false;
+  }
+  auto &event_list = iter->second;
+  if (event_list == nullptr) {
+    LOG_ERROR << "Event list is null.";
+  }
+  // Pop all element in list that not bigger than task_id_on_stream.
+  while (!event_list->empty() && event_list->front().first <= task_id_on_stream) {
+    event_list->pop_front();
+  }
+  // Remove list if event list is empty.
+  if (event_list->empty()) {
+    events_->erase(iter);
+  }
+  return true;
+}
+
+bool EventBase::IsEventNotUsed() { return events_ == nullptr ? true : events_->empty(); }
+
+bool EventBase::SyncAllEvents() {
+  if (IsEventNotUsed()) {
+    return false;
+  }
+
+  for (auto iter = events_->begin(); iter != events_->end();) {
+    auto &event_list = iter->second;
+    if (event_list == nullptr) {
+      LOG_ERROR << "Event list is null.";
+    }
+    for (auto list_iter = event_list->begin(); list_iter != event_list->end();) {
+      auto &event = list_iter->second;
+      // Sync event if event is not arrived.
+      if (!event->QueryEvent()) {
+        event->SyncEvent();
+      }
+      list_iter = event_list->erase(list_iter);
+    }
+    if (event_list->empty()) {
+      // list is empty, erase list in map.
+      iter = events_->erase(iter);
+    } else {
+      LOG_ERROR << "Event list is not empty.";
+    }
+  }
+  return events_->empty();
+}
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/hardware_abstract/memory/dynamic_mem_pool.h b/inferrt/src/hardware/hardware_abstract/memory/dynamic_mem_pool.h
new file mode 100644
index 0000000000000000000000000000000000000000..cbabe2a9ba21042c777caaa6d70b026ea54d5991
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/memory/dynamic_mem_pool.h
@@ -0,0 +1,462 @@
+/**
+ * Copyright 2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_DYNAMIC_MEM_POOL_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_DYNAMIC_MEM_POOL_H_
+
+#include <algorithm>
+#include <functional>
+#include <list>
+#include <map>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <string>
+#include <tuple>
+
+#include "hardware/hardware_abstract/visible.h"
+#include "hardware/hardware_abstract/memory/mem_pool_util.h"
+#include "hardware/hardware_abstract/stream_util.h"
+#include "hardware/hardware_abstract/device_event.h"
+#include "common/logger.h"
+
+namespace mindspore {
+namespace device {
+constexpr int kShiftOffset = 2;
+// Alloc memory aligned according to 512 bytes.
+constexpr size_t kDynamicMemAlignSize = 512;
+// The minimum unit size (1G) of memory block used for dynamic extend.
+constexpr size_t kDynamicMemAllocUnitSize = 1024 << 20;
+
+const char kPersistentParamMem[] = "Persistent mem";
+const char kCommonMem[] = "Common mem";
+constexpr size_t kMBToByte = 1024 << 10;
+constexpr size_t kGBToByte = 1024 << 20;
+// The smallest memory request size, if it is smaller than this size, the device memory request may fail
+// Set experience value to 10M
+const size_t kMinimumAllocMem = 10 << 20;
+
+const char kBlockMemorySize[] = "block_memory_size";
+const char kBlockStreamId[] = "block_stream_id";
+const char kCommonMemPoolType[] = "common_mem_pool";
+const char kPersistentMemPoolType[] = "persistent_mem_pool";
+using MallocFuncType = void *(size_t, int, void *);
+using FreeFuncType = void(void *, size_t, int, void *);
+
+// The status of memory buf.
+enum class HARDWARE_EXPORT DynamicMemBufStatus : int { kMemBufIdle, kMemBufUsed, kMemBufEagerFree, kMemBufUsedByEvent };
+HARDWARE_EXPORT const std::string &DynamicMemBufStatusToString(DynamicMemBufStatus status);
+
+using DeviceMemPtr = void(*);
+struct DeviceAddrCmp {
+  bool operator()(const DeviceMemPtr &addr1, const DeviceMemPtr &addr2) const { return addr1 < addr2; }
+};
+
+// The AllocatorDebugInfo wrapper which is the local thread for the dynamic memory pool.
+class HARDWARE_EXPORT DynamicMemAllocatorDebugInfo;
+// Memory buf is the smallest operation object of dynamic memory pool.
+struct HARDWARE_EXPORT DynamicMemBuf;
+using DynamicMemBufPtr = std::shared_ptr<DynamicMemBuf>;
+// Multimap key is the tensor size, for finding the idle memory buf by tensor size.
+using SizeMapMemBuf = std::multimap<size_t, DynamicMemBufPtr>;
+// Map key is the device address, for finding the used memory buf in memory block by device address.
+using DeviceAddrMapMemBuf = std::map<DeviceMemPtr, DynamicMemBufPtr, DeviceAddrCmp>;
+// Memory block is composed of memory buf.
+class HARDWARE_EXPORT DynamicMemBlock;
+using DynamicMemBlockPtr = std::shared_ptr<DynamicMemBlock>;
+
+struct HARDWARE_EXPORT MemStatusManager;
+using MemStatusManagerPtr = std::shared_ptr<MemStatusManager>;
+
+// Help class for unordered_map, pair has no hash method, need override it.
+struct pair_hash {
+  template <class L, class R>
+  std::size_t operator()(const std::pair<L, R> &param) const {
+    size_t hash = std::hash<L>{}(param.first);
+    hash <<= (sizeof(size_t) << kShiftOffset);
+    hash ^= std::hash<R>{}(param.second);
+    return std::hash<size_t>{}(hash);
+  }
+};
+
+struct HARDWARE_EXPORT MemBuf;
+
+// Interface of dynamic memory pool.
+class HARDWARE_EXPORT DynamicMemPool {
+ public:
+  virtual ~DynamicMemPool() = default;
+
+  // Initialize memory pool with init size, increase size and max size.
+  virtual void Initialize(size_t init_size, size_t increase_size, size_t max_size) {}
+
+  // Release the real device memory.
+  virtual void ReleaseDeviceRes() { LOG_ERROR << "Not implemented"; }
+
+  // The main program entry of memory alloc.
+  virtual DeviceMemPtr AllocTensorMem(size_t size, bool from_persistent_mem = false, bool need_recycle = false,
+                                      uint32_t stream_id = kDefaultStreamIndex) {
+    LOG_ERROR << "Not implemented";
+    return nullptr;
+  }
+
+  // The main program entry of continuous memory alloc.
+  virtual std::vector<DeviceMemPtr> AllocContinuousTensorMem(const std::vector<size_t> &size_list,
+                                                             uint32_t stream_id = kDefaultStreamIndex) {
+    LOG_ERROR << "Not implemented";
+    return {};
+  }
+  // The main program entry of memory free.
+  virtual void FreeTensorMem(const DeviceMemPtr &device_addr) { LOG_ERROR << "Not implemented"; }
+
+  virtual bool DoFreeTensorMem(const DeviceMemPtr &device_addr) { return false; }
+
+  // The main program entry of part memorys free and part memorys keep.
+  virtual void FreePartTensorMems(const std::vector<DeviceMemPtr> &free_addrs,
+                                  const std::vector<DeviceMemPtr> &keep_addrs,
+                                  const std::vector<size_t> &keep_addr_sizes) {
+    LOG_ERROR << "Not implemented";
+  }
+
+  // Help method for dynamic memory proxy.
+  virtual std::vector<MemBuf *> DoFreePartTensorMems(const std::vector<DeviceMemPtr> &free_addrs,
+                                                     const std::vector<DeviceMemPtr> &keep_addrs,
+                                                     const std::vector<size_t> &keep_addr_sizes) {
+    return {};
+  }
+
+  virtual size_t EmptyCache() { return -1L; }
+
+  virtual size_t ReleaseFreeBlocks() { return -1L; }
+
+  // Element in vector : memory_stream_id, address
+  virtual bool RecordEvent(int64_t task_id_on_stream, uint32_t user_stream_id,
+                           const std::vector<std::pair<uint32_t, DeviceMemPtr>> &memory_stream_addresses,
+                           const DeviceEventPtr &event) {
+    return false;
+  }
+
+  virtual bool WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id) {
+    return false;
+  }
+
+  virtual bool WaitEvent(int64_t task_id_on_stream, uint32_t memory_stream_id) { return false; }
+
+  virtual bool SyncAllEvents() { return false; }
+
+  // The real size by memory alloc aligned.
+  virtual size_t AlignMemorySize(size_t size) const {
+    if (size == 0) {
+      return kDynamicMemAlignSize;
+    }
+    return ((size + kDynamicMemAlignSize - 1) / kDynamicMemAlignSize) * kDynamicMemAlignSize;
+  }
+
+  // Calculate memory block required alloc size when adding the memory block.
+  virtual size_t CalMemBlockAllocSize(size_t size, bool from_persistent_mem, bool need_recycle = false) {
+    return kDynamicMemAllocUnitSize;
+  }
+
+  // Set mem pool block size
+  virtual void SetMemPoolBlockSize(size_t available_device_mem_size) {}
+
+  // Get the minimum memory unit size using for dynamic extend.
+  virtual size_t MemAllocUnitSize(bool from_persistent_mem) const { return kDynamicMemAllocUnitSize; }
+
+  virtual void SetMemAllocUintSize(size_t common_size, size_t persist_size = kDynamicMemAllocUnitSize) {}
+
+  virtual void *GetMinUsingMemoryAddr() const { return nullptr; }
+
+  // The related interface of device memory real operation, needs override by device type.
+  virtual size_t AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
+    LOG_ERROR << "Not implemented";
+    return 0;
+  }
+
+  virtual bool FreeDeviceMem(const DeviceMemPtr &addr) {
+    LOG_ERROR << "Not implemented";
+    return false;
+  }
+
+  virtual size_t free_mem_size() { return 0; }
+
+  virtual uint64_t total_mem_size() const { return 0; }
+
+  virtual size_t GetMaxUsedMemSize() const { return 0; }
+
+  virtual size_t GetVmmUsedMemSize() const { return 0; }
+
+  // The related interface of device memory eager free.
+  virtual void DefragMemory() {}
+
+  // Display the brief state information of memory block and memory buf.
+  virtual void DumpDynamicMemPoolStateInfo() {}
+
+  // Display the detailed debug information of memory block and memory buf.
+  virtual void DumpDynamicMemPoolDebugInfo() {}
+
+  // The statistics information.
+  virtual size_t TotalMemStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return 0;
+  }
+
+  virtual size_t TotalUsedMemStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return 0;
+  }
+
+  virtual size_t TotalUsedByEventMemStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return 0;
+  }
+
+  virtual size_t TotalIdleMemStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return 0;
+  }
+
+  virtual size_t TotalEagerFreeMemStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return 0;
+  }
+
+  virtual size_t UsedMemPeakStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return 0;
+  }
+
+  virtual size_t MaxMemAllocatedStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return 0;
+  }
+
+  virtual size_t MaxMemReservedStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return 0;
+  }
+
+  virtual size_t ActualPeakStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return 0;
+  }
+
+  virtual std::unordered_map<std::string, std::size_t> BlockCountsStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return {};
+  }
+
+  virtual std::unordered_map<std::string, std::size_t> BlockUnitSizeStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return {};
+  }
+
+  virtual std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>
+  CommonMemBlocksInfoStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return {};
+  }
+
+  virtual std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>
+  PersistentMemBlocksInfoStatistics() const {
+    LOG_ERROR << "Not implemented";
+    return {};
+  }
+
+  virtual void ResetMaxMemReserved() { LOG_ERROR << "Not implemented"; }
+
+  virtual void ResetMaxMemAllocated() { LOG_ERROR << "Not implemented"; }
+
+  virtual std::string GetMemoryPoolType() const { return "Other"; }
+
+  virtual const bool IsEnableEagerFree() const { return false; }
+
+  virtual const bool IsEnableVmm() const { return false; }
+
+  virtual void SetEnableVmm(bool enable_vmm) {}
+
+  virtual const bool SyncAllStreams() { return false; }
+
+  virtual size_t AllocDeviceMemByEagerFree(size_t size, DeviceMemPtr *addr) { return 0; }
+
+  virtual size_t FreeDeviceMemByEagerFree(const DeviceMemPtr addr, const size_t size) { return 0; }
+
+  virtual size_t MmapDeviceMem(size_t size, DeviceMemPtr addr) { return 0; }
+
+  virtual const std::pair<size_t, size_t> FreeIdleMemsByEagerFree() { return {0, 0}; }
+
+  virtual bool IsEnableTimeEvent() { return false; }
+
+  virtual void SetEnableTimeEvent(bool enable_time_event) {}
+
+  virtual void EnablePluggableAllocator(std::function<MallocFuncType> alloc_fn, std::function<FreeFuncType> free_fn) {}
+
+  virtual void DisablePluggableAllocator() {}
+
+  // Use set method to avoid performance decrease.
+  void SetMemoryProfilerCallback(const std::function<void()> &memory_profiler_callback) {
+    memory_profiler_callback_ = memory_profiler_callback;
+  }
+
+  void SetMemoryMstxCallback(const std::function<void(void *, size_t)> memory_malloc_mstx_callback,
+                             const std::function<void(void *)> memory_free_mstx_callback) {
+    memory_malloc_mstx_callback_ = memory_malloc_mstx_callback;
+    memory_free_mstx_callback_ = memory_free_mstx_callback;
+  }
+
+  // Set rank id getter for memory pool to generate dump path.
+  virtual void SetRankIdGetter(const std::function<size_t()> &rank_id_getter) {
+    if (rank_id_getter != nullptr) {
+      rank_id_getter_ = rank_id_getter;
+    }
+  }
+
+  void SetPipelineCallback(const std::function<void()> &pipeline_callback) { pipeline_callback_ = pipeline_callback; }
+
+ protected:
+  std::function<void()> memory_profiler_callback_{nullptr};
+  std::function<size_t()> rank_id_getter_ = []() { return SIZE_MAX; };
+  std::function<void()> pipeline_callback_{nullptr};
+  std::function<void(void *, size_t)> memory_malloc_mstx_callback_{nullptr};
+  std::function<void(void *)> memory_free_mstx_callback_{nullptr};
+};
+
+// Recording information for debugging the memory allocator.
+struct HARDWARE_EXPORT AllocatorDebugInfo {
+  std::string name_{"Unknown"};
+  memory::mem_pool::MemType type_{memory::mem_pool::MemType::kOther};
+  int input_index_{-1};
+  int output_index_{-1};
+  uint8_t run_mode_{0};
+};
+
+class HARDWARE_EXPORT DynamicMemAllocatorDebugInfo {
+ public:
+  static AllocatorDebugInfo &GetDebugInfo() noexcept;
+
+  // Set the debug info when memory alloc.
+  static void SetDebugInfo(const std::string &name, memory::mem_pool::MemType type, int input_index = -1,
+                           int output_index = -1, uint8_t run_mode = 0);
+
+ private:
+  DynamicMemAllocatorDebugInfo() = default;
+  virtual ~DynamicMemAllocatorDebugInfo() = default;
+  DynamicMemAllocatorDebugInfo(const DynamicMemAllocatorDebugInfo &) = delete;
+  DynamicMemAllocatorDebugInfo &operator=(const DynamicMemAllocatorDebugInfo &) = delete;
+};
+
+using TaskIdOnStreamEvent = std::pair<int64_t, DeviceEventPtr>;
+struct HARDWARE_EXPORT EventBase {
+  // Record event on mem buf.
+  bool RecordEvent(int64_t task_id_on_stream, uint32_t user_stream_id, const DeviceEventPtr &event);
+
+  // Release events on mem buf.
+  bool WaitEvent(uint32_t task_id_on_stream, uint32_t user_stream_id);
+
+  // Indicates if mem buf used by event, return true when no event bind on mem buf.
+  bool IsEventNotUsed();
+
+  // Sync all events that bound on mem buf.
+  bool SyncAllEvents();
+
+  // Parameter: user_stream_id, list of <task_id_on_stream, event>.
+  std::shared_ptr<std::unordered_map<uint32_t, std::shared_ptr<std::list<TaskIdOnStreamEvent>>>> events_{nullptr};
+};
+
+struct HARDWARE_EXPORT JsonBuilder {
+  JsonBuilder() { buffer_ << "{"; }
+
+  template <typename T>
+  void Append(std::string key, T value) {
+    buffer_ << "\"" << key << "\":" << value << ",";
+  }
+
+  std::string ToString() {
+    buffer_.seekp(-1, buffer_.cur);
+    buffer_ << "}";
+    return buffer_.str();
+  }
+
+  std::stringstream buffer_;
+};
+
+struct HARDWARE_EXPORT MemoryTimeEvent {
+  // Creation time of address in ns.
+  uint64_t created_at_{0};
+
+  // Device address.
+  void *addr_{nullptr};
+
+  // Size of memory allocation.
+  size_t size_{0};
+
+  // Used size of memory pool.
+  size_t used_size_{0};
+
+  // Peak size of memory pool.
+  size_t peak_size_{0};
+
+  // Allocate size of memory pool.
+  size_t alloc_size_{0};
+
+  // Memory size that referred by event.
+  size_t used_by_event_size_{0};
+
+  // Eager free memory size.
+  size_t eager_free_size_{0};
+
+  // Whether allocation from persistent memory.
+  uint8_t from_persistent_{false};
+
+  // Whether allocated memory is persistent.
+  uint8_t is_persistent_{false};
+
+  // pynative or graph or ge.
+  uint8_t run_mode_{0};
+
+  // Data type of this address.
+  uint8_t alloc_type_;
+
+  // Stream id of address.
+  uint32_t stream_id_{0};
+
+  // Owner of this address.
+  std::string owner_;
+
+  std::string ToJson() {
+    JsonBuilder builder;
+    builder.Append("created_at_", created_at_);
+    builder.Append("addr_", addr_);
+    builder.Append("size_", size_);
+    builder.Append("from_persistent_", from_persistent_);
+    builder.Append("stream_id_", stream_id_);
+    builder.Append("run_mode_", run_mode_);
+    builder.Append("used_size_", used_size_);
+    builder.Append("peak_size_", peak_size_);
+    builder.Append("alloc_size_", alloc_size_);
+    builder.Append("used_by_event_size_", used_by_event_size_);
+    builder.Append("eager_free_size_", eager_free_size_);
+    builder.Append("owner_", owner_);
+    builder.Append("alloc_type_", alloc_type_);
+    return builder.ToString();
+  }
+};
+using MemoryTimeEventPtr = std::shared_ptr<MemoryTimeEvent>;
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_MEM_REUSE_DYNAMIC_MEM_POOL_H_
diff --git a/inferrt/src/hardware/hardware_abstract/memory/mem_pool_util.cc b/inferrt/src/hardware/hardware_abstract/memory/mem_pool_util.cc
new file mode 100644
index 0000000000000000000000000000000000000000..23273509b04bdee8cb6a9760ad897aa6e28148fc
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/memory/mem_pool_util.cc
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hardware/hardware_abstract/memory/mem_pool_util.h"
+#include <map>
+
+namespace mindspore {
+namespace memory {
+namespace mem_pool {
+const std::map<MemType, std::string> kMemTypeStr = {{MemType::kWeight, "Weight"},
+                                                    {MemType::kConstantValue, "ConstantValue"},
+                                                    {MemType::kKernel, "Kernel"},
+                                                    {MemType::kGraphOutput, "GraphOutput"},
+                                                    {MemType::kSomas, "Somas"},
+                                                    {MemType::kSomasOutput, "SomasOutput"},
+                                                    {MemType::kGeConst, "GeConst"},
+                                                    {MemType::kGeFixed, "GeFixed"},
+                                                    {MemType::kBatchMemory, "BatchMemory"},
+                                                    {MemType::kContinuousMemory, "ContinuousMemory"},
+                                                    {MemType::kPyNativeInput, "PyNativeInput"},
+                                                    {MemType::kPyNativeOutput, "PyNativeOutput"},
+                                                    {MemType::kWorkSpace, "WorkSpace"},
+                                                    {MemType::kOther, "Other"}};
+
+std::string MemTypeToStr(MemType mem_type) { return kMemTypeStr.at(mem_type); }
+}  // namespace mem_pool
+}  // namespace memory
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/hardware_abstract/memory/mem_pool_util.h b/inferrt/src/hardware/hardware_abstract/memory/mem_pool_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..0530008be02020e3af4ec65bed63c22bc2f547a6
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/memory/mem_pool_util.h
@@ -0,0 +1,168 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_MEMORY_MEM_POOL_MEM_POOL_UTIL_H_
+#define MINDSPORE_CCSRC_MEMORY_MEM_POOL_MEM_POOL_UTIL_H_
+
+#include <atomic>
+#include <string>
+
+#include "hardware/hardware_abstract/visible.h"
+
+namespace mindspore {
+namespace memory {
+namespace mem_pool {
+enum class MemType : int {
+  kWeight = 0,
+  kConstantValue,
+  kKernel,
+  kGraphOutput,
+  kSomas,
+  kSomasOutput,
+  kGeConst,
+  kGeFixed,
+  kBatchMemory,
+  kContinuousMemory,
+  kPyNativeInput = 10,
+  kPyNativeOutput,
+  kWorkSpace,
+  kOther
+};
+
+class HARDWARE_EXPORT Lock {
+ public:
+  inline void lock() {
+    while (locked.test_and_set(std::memory_order_acquire)) {
+    }
+  }
+  inline void unlock() { locked.clear(std::memory_order_release); }
+
+ protected:
+  std::atomic_flag locked = ATOMIC_FLAG_INIT;
+};
+
+class HARDWARE_EXPORT LockGuard {
+ public:
+  explicit LockGuard(const Lock &lock) : lock_(const_cast<Lock *>(&lock)) { lock_->lock(); }
+  ~LockGuard() { lock_->unlock(); }
+
+ private:
+  Lock *lock_;
+};
+
+HARDWARE_EXPORT std::string MemTypeToStr(MemType mem_type);
+
+constexpr size_t kPoolGrowSize = 1 << 20;
+
+template <class T>
+class ObjectPool {
+  struct Buf {
+    Buf *next_;
+  };
+
+  class Buffer {
+    static const std::size_t bucket_size = sizeof(T) > sizeof(Buf) ? sizeof(T) : sizeof(Buf);
+    static const std::size_t kDataBucketSize = bucket_size * kPoolGrowSize;
+
+   public:
+    explicit Buffer(Buffer *next) : next_(next) {}
+
+    T *GetBlock(std::size_t index) {
+      if (index >= kPoolGrowSize) {
+        throw std::bad_alloc();
+      }
+      return reinterpret_cast<T *>(&data_[bucket_size * index]);
+    }
+
+    Buffer *const next_;
+
+   private:
+    uint8_t data_[kDataBucketSize];
+  };
+
+  Buf *free_list_ = nullptr;
+  Buffer *buffer_head_ = nullptr;
+  std::size_t buffer_index_ = kPoolGrowSize;
+
+ public:
+  ObjectPool() = default;
+  ObjectPool(ObjectPool &&object_pool) = delete;
+  ObjectPool(const ObjectPool &object_pool) = delete;
+  ObjectPool operator=(const ObjectPool &object_pool) = delete;
+  ObjectPool operator=(ObjectPool &&object_pool) = delete;
+
+  ~ObjectPool() {
+    while (buffer_head_ != nullptr) {
+      Buffer *buffer = buffer_head_;
+      buffer_head_ = buffer->next_;
+      delete buffer;
+    }
+  }
+
+  T *Borrow() {
+    if (free_list_ != nullptr) {
+      Buf *buf = free_list_;
+      free_list_ = buf->next_;
+      return reinterpret_cast<T *>(buf);
+    }
+
+    if (buffer_index_ >= kPoolGrowSize) {
+      buffer_head_ = new Buffer(buffer_head_);
+      buffer_index_ = 0;
+    }
+
+    return buffer_head_->GetBlock(buffer_index_++);
+  }
+
+  void Return(T *obj) {
+    Buf *buf = reinterpret_cast<Buf *>(obj);
+    buf->next_ = free_list_;
+    free_list_ = buf;
+  }
+};
+
+// Not support older windows version.
+template <class T>
+class PooledAllocator : private ObjectPool<T> {
+ public:
+  typedef std::size_t size_type;
+  typedef std::ptrdiff_t difference_type;
+  typedef T *pointer;
+  typedef const T *const_pointer;
+  typedef T &reference;
+  typedef const T &const_reference;
+  typedef T value_type;
+
+  template <class U>
+  struct rebind {
+    typedef PooledAllocator<U> other;
+  };
+
+  pointer allocate(size_type n, const void *hint = 0) {
+    if (n != 1 || hint) throw std::bad_alloc();
+    return ObjectPool<T>::Borrow();
+  }
+
+  void deallocate(pointer p, size_type n) { ObjectPool<T>::Return(p); }
+
+  void construct(pointer p, const_reference val) { new (p) T(val); }
+
+  void destroy(pointer p) { p->~T(); }
+};
+}  // namespace mem_pool
+}  // namespace memory
+}  // namespace mindspore
+#endif
diff --git a/inferrt/src/hardware/hardware_abstract/memory_manager.cc b/inferrt/src/hardware/hardware_abstract/memory_manager.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0952c8964168ecd9e367e46393ecd6476feab03a
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/memory_manager.cc
@@ -0,0 +1,65 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hardware/hardware_abstract/memory_manager.h"
+#include <string>
+#include "common/common.h"
+
+namespace mindspore {
+namespace device {
+constexpr size_t kAlignBytes = 32;
+
+size_t MemoryManager::GetCommonAlignSize(size_t input_size) {
+  return ((input_size + kMemAlignSize + kAlignBytes - 1) / kMemAlignSize) * kMemAlignSize;
+}
+
+size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) {
+  return ((input_size + kMemAlignSize - 1) / kMemAlignSize) * kMemAlignSize + kTwiceMemAlignSize;
+}
+
+void MemoryManager::FreeMemFromMemPool(void *device_ptr) {
+  if (device_ptr == nullptr) {
+    LOG_ERROR << "FreeMemFromMemPool device_ptr is null.";
+  }
+}
+
+uint8_t *MemoryManager::MallocWorkSpaceMem(size_t size) { return MallocDynamicMem(size, false); }
+
+uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
+  LOG_OUT << "Call default dynamic malloc " << size << " v " << communication_mem;
+  return nullptr;
+}
+
+void *MemoryManager::MallocMemFromMemPool(size_t size, bool from_persistent_mem, bool, uint32_t stream_id) {
+  if (size == 0) {
+    LOG_ERROR << "MallocMemFromMemPool size is 0.";
+  }
+  return nullptr;
+}
+
+std::vector<void *> MemoryManager::MallocContinuousMemFromMemPool(const std::vector<size_t> &size_list,
+                                                                  uint32_t stream_id) {
+  if (size_list.empty()) {
+    LOG_ERROR << "MallocContinuousMemFromMemPool size list's size is 0.";
+  }
+  std::vector<void *> device_ptr_list;
+  for (size_t i = 0; i < size_list.size(); ++i) {
+    (void)device_ptr_list.emplace_back(nullptr);
+  }
+  return device_ptr_list;
+}
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/hardware_abstract/memory_manager.h b/inferrt/src/hardware/hardware_abstract/memory_manager.h
new file mode 100644
index 0000000000000000000000000000000000000000..49011b321b9d82b7289033b158717b77924c8096
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/memory_manager.h
@@ -0,0 +1,129 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_MEMORY_MANAGER_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_MEMORY_MANAGER_H_
+#include <memory>
+#include <utility>
+#include <vector>
+#include <map>
+#include <queue>
+#include <string>
+#include <unordered_map>
+#include "common/logger.h"
+#include "hardware/hardware_abstract/memory/dynamic_mem_pool.h"
+#include "hardware/hardware_abstract/visible.h"
+
+namespace mindspore {
+namespace device {
+enum class MemType { kStaticMem, kDynamicMem, kSomasReuseDynamicMem };
+const uint32_t kInvalidGraphId = UINT32_MAX;
+constexpr int kGetAllOuts = -1;
+constexpr uint64_t kMemAlignSize = 512;
+constexpr uint64_t kTwiceMemAlignSize = kMemAlignSize << 1;
+class HARDWARE_EXPORT MemoryManager {
+ public:
+  MemoryManager() = default;
+  virtual ~MemoryManager() = default;
+
+  virtual void Initialize() = 0;
+  virtual void Finalize() = 0;
+  virtual void ResetDynamicMemory() {}
+  virtual void ClearGlobalIdleMem() {}
+
+  uint8_t *MallocWorkSpaceMem(size_t size);
+  virtual void *MallocMemFromMemPool(size_t size, bool from_persistent_mem, bool need_recycle = false,
+                                     uint32_t stream_id = kDefaultStreamIndex);
+  virtual size_t GetMaxUsedMemorySize() const { return 0; }
+  virtual void FreeMemFromMemPool(void *device_ptr);
+  virtual std::vector<void *> MallocContinuousMemFromMemPool(const std::vector<size_t> &size_list,
+                                                             uint32_t stream_id = kDefaultStreamIndex);
+
+  static size_t GetCommonAlignSize(size_t input_size);
+  static size_t GetCommunicationAlignSize(size_t input_size);
+
+  virtual size_t GetAvailableMemSize() {
+    LOG_ERROR << "Return default 0 mem size!";
+    return 0;
+  }
+
+  bool RecordEvent(int64_t task_id_on_stream, uint32_t user_stream_id,
+                   const std::vector<std::pair<uint32_t, DeviceMemPtr>> &memory_stream_addresses,
+                   const DeviceEventPtr &event) {
+    if (GetMemoryPool() == nullptr) {
+      LOG_OUT << "memory pool is nullptr.";
+      return false;
+    }
+    return GetMemoryPool()->RecordEvent(task_id_on_stream, user_stream_id, memory_stream_addresses, event);
+  }
+  bool WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id) {
+    if (GetMemoryPool() == nullptr) {
+      LOG_OUT << "memory pool is nullptr.";
+      return false;
+    }
+    return GetMemoryPool()->WaitEvent(task_id_on_stream, user_stream_id, memory_stream_id);
+  }
+  bool WaitEvent(int64_t task_id_on_stream, uint32_t memory_stream_id) {
+    if (GetMemoryPool() == nullptr) {
+      LOG_OUT << "memory pool is nullptr.";
+      return false;
+    }
+    return GetMemoryPool()->WaitEvent(task_id_on_stream, memory_stream_id);
+  }
+  bool SyncAllEvents() {
+    if (GetMemoryPool() == nullptr) {
+      LOG_OUT << "memory pool is nullptr.";
+      return false;
+    }
+    return GetMemoryPool()->SyncAllEvents();
+  }
+
+  virtual DynamicMemPool *GetMemoryPool() = 0;
+
+  // Relevant function to manage memory statistics
+  virtual size_t GetTotalMemStatistics() const { return 0; }
+  virtual size_t GetTotalUsedMemStatistics() const { return 0; }
+  virtual size_t GetTotalIdleMemStatistics() const { return 0; }
+  virtual size_t GetTotalEagerFreeMemStatistics() const { return 0; }
+  virtual size_t GetUsedMemPeakStatistics() const { return 0; }
+  virtual size_t GetReservedMemPeakStatistics() const { return 0; }
+  virtual std::unordered_map<std::string, std::size_t> GetBlockCountsStatistics() const { return {}; }
+  virtual std::unordered_map<std::string, std::size_t> GetBlockUnitSizeStatistics() const { return {}; }
+  virtual std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>
+  GetCommonMemBlocksInfoStatistics() const {
+    return {};
+  }
+  virtual std::unordered_map<device::DeviceMemPtr, std::unordered_map<std::string, size_t>>
+  GetPersistentMemBlocksInfoStatistics() const {
+    return {};
+  }
+  virtual void ResetMaxMemoryReserved() {}
+  virtual void ResetMaxMemoryAllocated() {}
+  virtual size_t EmptyCache() { return -1L; }
+
+ protected:
+  virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem, uint32_t graph_id) = 0;
+  virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem) {
+    return MallocStaticMem(size, communication_mem, kInvalidGraphId);
+  }
+  virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
+
+  // Hold memory pool for common operations on memory.
+  DynamicMemPool *memory_pool_{nullptr};
+};
+}  // namespace device
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_MEMORY_MANAGER_H_
diff --git a/inferrt/src/hardware/hardware_abstract/multi_stream_controller.cc b/inferrt/src/hardware/hardware_abstract/multi_stream_controller.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a849a9655430352e4764e671920137ccd57b3664
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/multi_stream_controller.cc
@@ -0,0 +1,339 @@
+/**
+ * Copyright 2024-2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "hardware/hardware_abstract/multi_stream_controller.h"
+
+#include <algorithm>
+#include <atomic>
+#include "common/logger.h"
+
+namespace mindspore {
+namespace device {
+constexpr size_t kDefaultStreamRefreshSize = 2;
+
+namespace {
+template <typename T>
+struct AtomicWrapper {
+  AtomicWrapper() : value_(0L) {}
+  explicit AtomicWrapper(const std::atomic<T> &value) : value_(value.load()) {}
+  AtomicWrapper(const AtomicWrapper &other) : value_(other.value_.load()) {}
+  AtomicWrapper &operator=(const AtomicWrapper &other) {
+    if (this == &other) {
+      return *this;
+    }
+    value_.store(other.value_.load());
+    return *this;
+  }
+
+  std::atomic<T> value_;
+};
+
+class LockGuard {
+ public:
+  explicit LockGuard(SpinLock &lock) : spin_lock_(lock) { spin_lock_.lock(); }
+  ~LockGuard() { spin_lock_.unlock(); }
+
+ private:
+  SpinLock &spin_lock_;
+};
+}  // namespace
+
+class TaskIdOnStreamManager {
+ public:
+  TaskIdOnStreamManager() = default;
+
+  void Resize(uint32_t stream_size) {
+    if (initialized_ && stream_size <= initialize_size_) {
+      LOG_OUT << "Task id on stream manager has already initialized, current size : " << initialize_size_ << ".";
+      return;
+    }
+    LOG_OUT << "Task id on stream manager initialize : " << initialized_ << ", stream_size : " << stream_size << ".";
+    uint32_t min_stream_size = 2;
+    initialize_size_ = std::max(stream_size, min_stream_size);
+    generator_.resize(initialize_size_);
+    status_.resize(initialize_size_);
+    for (auto &vec : status_) {
+      vec.resize(initialize_size_);
+    }
+    initialized_ = true;
+  }
+
+  inline int64_t Query(uint32_t user_stream_id, uint32_t memory_stream_id) {
+    return status_[user_stream_id][memory_stream_id];
+  }
+
+  inline bool Update(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id) {
+    if (status_[user_stream_id][memory_stream_id] >= task_id_on_stream) {
+      return false;
+    }
+    status_[user_stream_id][memory_stream_id] = task_id_on_stream;
+    return true;
+  }
+
+  inline int64_t Launch(uint32_t stream_id) {
+    if (stream_id >= generator_.size()) {
+      LOG_OUT << "Launch stream id : " << stream_id << " failed, generator_ size : " << generator_.size();
+      generator_.resize(stream_id + 1);
+      status_.resize(stream_id + 1);
+      for (auto &vec : status_) {
+        vec.resize(stream_id + 1);
+      }
+    }
+    return ++generator_[stream_id].value_;
+  }
+
+  inline int64_t Get(uint32_t stream_id) { return generator_[stream_id].value_; }
+
+ private:
+  bool initialized_{false};
+  uint32_t initialize_size_{0};
+  std::vector<AtomicWrapper<int64_t>> generator_;
+  std::vector<std::vector<int64_t>> status_;
+};
+
+// Event pool recycled with ref count, pool will reuse event when cannot create more events.
+class EventPool {
+ public:
+  explicit EventPool(std::function<DeviceEventPtr(void)> event_creator) : event_creator_(std::move(event_creator)) {}
+  ~EventPool() {
+    LockGuard lock(lock_);
+    expired_ = true;
+    events_.clear();
+    cached_events_.clear();
+  }
+
+  EventPool() = delete;
+  EventPool(const EventPool &) = delete;
+  EventPool &operator=(const EventPool &) = delete;
+
+  // Get event from pool, event was wrapper by shared_ptr.
+  DeviceEventPtr Get() {
+    LOG_OUT << "Event pool get start.";
+    LockGuard lock(lock_);
+    DeviceEvent *event = nullptr;
+    // Try to create event firstly before reached core size.
+    if (size_ < core_size_) {
+      auto created_event = event_creator_();
+      if (created_event != nullptr && created_event->IsReady()) {
+        cached_events_.push_back(created_event);
+        size_++;
+        event = created_event.get();
+      } else {
+        core_size_ = size_;
+      }
+    }
+    // Try to reuse event.
+    if (event == nullptr) {
+      auto iter = events_.begin();
+      while (iter != events_.end()) {
+        auto event_in_list = *iter;
+        if (event_in_list == nullptr) {
+          LOG_ERROR << "exception : event in list is nullptr, events_ size : " << events_.size() << ".";
+        }
+        if (event_in_list->QueryEvent()) {
+          event = event_in_list;
+          events_.erase(iter);
+          break;
+        }
+        iter++;
+      }
+    }
+    // Reuse failed, try to create more event.
+    if (event == nullptr) {
+      auto created_event = event_creator_();
+      if (created_event != nullptr && created_event->IsReady()) {
+        cached_events_.push_back(created_event);
+        event = created_event.get();
+        size_++;
+      } else {
+        LOG_ERROR << "Get event failed.";
+      }
+    }
+    LOG_OUT << "Get event, events_ size : " << events_.size() << ", event : " << event << ".";
+
+    auto event_ptr = std::shared_ptr<DeviceEvent>(event, [&](DeviceEvent *e) {
+      LockGuard lock(lock_);
+      if (!expired_) {
+        LOG_OUT << "Return event : " << e << ".";
+        events_.push_back(e);
+      } else {
+        LOG_OUT << "Return event : " << e << "failed.";
+      }
+    });
+    return event_ptr;
+  }
+
+ private:
+  SpinLock lock_;
+  bool expired_{false};
+  // Pool will just create event before reach core size, use half of size limits as core size.
+  size_t core_size_{32768};
+  size_t size_{0};
+  std::function<DeviceEventPtr(void)> event_creator_;
+  std::list<DeviceEvent *> events_;
+  // cached_events_ hold shared ptr of event, since device res manager return a smart pointer.
+  std::list<DeviceEventPtr> cached_events_;
+};
+using EventPoolPtr = std::shared_ptr<EventPool>;
+
+MultiStreamController::MultiStreamController(DeviceResManager *device_res_base) : device_res_base_(device_res_base) {
+  if (device_res_base_ == nullptr) {
+    LOG_ERROR << "device_res_base_ is nullptr.";
+  }
+  task_id_on_stream_manager_ = std::make_shared<TaskIdOnStreamManager>();
+}
+
+void MultiStreamController::Refresh() {
+  LockGuard lock(lock_);
+  auto stream_size = device_res_base_->QueryStreamSize();
+  LOG_OUT << "Stream manager initialize, stream_size : " << stream_size << ".";
+  if (stream_size == 0) {
+    // CPU has no concept of stream, stream size must be zero.
+    LOG_OUT << "Stream size is 0, will initialize with 2 streams.";
+    stream_size = kDefaultStreamRefreshSize;
+  }
+  task_id_on_stream_manager_->Resize(stream_size);
+  if (event_pool_ == nullptr) {
+    event_pool_ = std::make_shared<EventPool>([&]() {
+      // Event in pool need to do synchronization between streams, need to enable blocking.
+      return device_res_base_->CreateRuntimeEvent(true, false);
+    });
+  }
+}
+
+bool MultiStreamController::UpdateTaskIdOnStream(int64_t task_id_on_stream, uint32_t user_stream_id,
+                                                 uint32_t memory_stream_id) {
+  LockGuard lock(lock_);
+  return task_id_on_stream_manager_->Update(task_id_on_stream, user_stream_id, memory_stream_id);
+}
+
+int64_t MultiStreamController::QueryTaskIdOnStream(uint32_t user_stream_id, uint32_t memory_stream_id) {
+  LockGuard lock(lock_);
+  return task_id_on_stream_manager_->Query(user_stream_id, memory_stream_id);
+}
+
+int64_t MultiStreamController::LaunchTaskIdOnStream(uint32_t stream_id) {
+  LockGuard lock(lock_);
+  return task_id_on_stream_manager_->Launch(stream_id);
+}
+
+int64_t MultiStreamController::GetTaskIdOnStream(uint32_t stream_id) {
+  LockGuard lock(lock_);
+  return task_id_on_stream_manager_->Get(stream_id);
+}
+
+std::mutex &MultiStreamController::GetStreamMutex(size_t stream_id) {
+  LockGuard lock(lock_);
+  return stream_mutexes_[stream_id];
+}
+
+bool MultiStreamController::RecordEvent(int64_t task_id_on_stream, uint32_t user_stream_id,
+                                        const std::vector<std::pair<uint32_t, DeviceMemPtr>> &memory_stream_addresses,
+                                        const DeviceEventPtr &input_event) {
+  LockGuard lock(lock_);
+  DeviceEventPtr event = nullptr;
+  if (input_event != nullptr) {
+    event = input_event;
+  } else {
+    event = device_res_base_->CreateRuntimeEvent(false, true);
+    if (event == nullptr) {
+      return true;
+    }
+    event->RecordEvent(user_stream_id);
+  }
+
+  return device_res_base_->RecordEvent(task_id_on_stream, user_stream_id, memory_stream_addresses, event);
+}
+
+bool MultiStreamController::WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id) {
+  LockGuard lock(lock_);
+  // If update task id on stream failed, means task id on stream is elder one, no need to wait event on mem manager.
+  if (!task_id_on_stream_manager_->Update(task_id_on_stream, user_stream_id, memory_stream_id)) {
+    LOG_OUT << "Skip Wait Event.";
+    return false;
+  }
+  return device_res_base_->WaitEvent(task_id_on_stream, user_stream_id, memory_stream_id);
+}
+
+bool MultiStreamController::WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id) {
+  LockGuard lock(lock_);
+  return device_res_base_->WaitEvent(task_id_on_stream, user_stream_id);
+}
+
+bool MultiStreamController::DispatchRecordWaitEvent(uint32_t user_stream_id, uint32_t memory_stream_id) {
+  LockGuard lock(lock_);
+  if (event_pool_ == nullptr) {
+    LOG_OUT << "Event pool is not initialized.";
+    event_pool_ = std::make_shared<EventPool>([&]() {
+      // Event in pool need to do synchronization between streams, need to enable blocking.
+      return device_res_base_->CreateRuntimeEvent(true, false);
+    });
+  }
+  auto event = event_pool_->Get();
+  // Note : record event on memory stream id and wait event on user stream id to make sure memory is safe.
+  event->RecordEvent(memory_stream_id);
+  event->WaitEvent(user_stream_id);
+  return true;
+}
+
+bool MultiStreamController::SyncStream(size_t stream_id) {
+  LockGuard lock(lock_);
+  bool ret = device_res_base_->SyncStream(stream_id);
+  auto task_id_on_stream = task_id_on_stream_manager_->Get(stream_id);
+  device_res_base_->WaitEvent(task_id_on_stream, stream_id);
+  return ret;
+}
+
+bool MultiStreamController::SyncAllStreams() {
+  LockGuard lock(lock_);
+  bool ret = device_res_base_->SyncAllStreams();
+  device_res_base_->SyncAllEvents();
+  return ret;
+}
+
+bool MultiStreamController::SyncNotDefaultStreams() {
+  LockGuard lock(lock_);
+  bool ret = device_res_base_->SyncNotDefaultStreams();
+  const auto &stream_ids = device_res_base_->GetStreamIds();
+  for (auto stream_id : stream_ids) {
+    auto task_id_on_stream = task_id_on_stream_manager_->Get(stream_id);
+    device_res_base_->WaitEvent(task_id_on_stream, stream_id);
+  }
+  return ret;
+}
+
+bool MultiStreamController::WaitMultiStream(size_t wait_stream_id) {
+  LockGuard lock(lock_);
+  LOG_OUT << "Wait multi stream on wait stream id : " << wait_stream_id << ".";
+  const auto &stream_ids = device_res_base_->GetStreamIds();
+  if (event_pool_ == nullptr) {
+    LOG_OUT << "Event pool is not initialized.";
+    event_pool_ = std::make_shared<EventPool>([&]() {
+      // Event in pool need to do synchronization between streams, need to enable blocking.
+      return device_res_base_->CreateRuntimeEvent(true, false);
+    });
+  }
+  device_res_base_->BindDeviceToCurrentThread(true);
+  auto event = event_pool_->Get();
+  for (auto stream_id : stream_ids) {
+    if (stream_id != wait_stream_id) {
+      event->RecordEvent(stream_id);
+      event->WaitEvent(wait_stream_id);
+    }
+  }
+  return true;
+}
+}  // namespace device
+}  // namespace mindspore
diff --git a/inferrt/src/hardware/hardware_abstract/multi_stream_controller.h b/inferrt/src/hardware/hardware_abstract/multi_stream_controller.h
new file mode 100644
index 0000000000000000000000000000000000000000..443020e28edb362dd2e271e742116cea989668f9
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/multi_stream_controller.h
@@ -0,0 +1,100 @@
+/**
+ * Copyright 2024-2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_MULTI_STREAM_CONTROLLER_HEADER_H
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_MULTI_STREAM_CONTROLLER_HEADER_H
+
+#include <cstdint>
+
+#include <functional>
+#include <list>
+#include <memory>
+#include <mutex>
+#include <vector>
+#include <unordered_map>
+#include <utility>
+#include <atomic>
+
+#include "hardware/hardware_abstract/device_event.h"
+#include "hardware/hardware_abstract/device_context.h"
+#include "hardware/hardware_abstract/visible.h"
+
+namespace mindspore {
+namespace device {
+class SpinLock {
+ public:
+  void lock() {
+    while (locked.test_and_set(std::memory_order_acquire)) {
+    }
+  }
+
+  void unlock() { locked.clear(std::memory_order_release); }
+
+ private:
+  std::atomic_flag locked = ATOMIC_FLAG_INIT;
+};
+class TaskIdOnStreamManager;
+using TaskIdOnStreamManagerPtr = std::shared_ptr<TaskIdOnStreamManager>;
+
+class EventPool;
+using EventPoolPtr = std::shared_ptr<EventPool>;
+
+class HARDWARE_EXPORT MultiStreamController {
+ public:
+  explicit MultiStreamController(DeviceResManager *device_res_base);
+
+  MultiStreamController(const MultiStreamController &) = delete;
+  MultiStreamController &operator=(const MultiStreamController &) = delete;
+  MultiStreamController(const MultiStreamController &&) = delete;
+
+  ~MultiStreamController() = default;
+
+  void Refresh();
+
+  bool UpdateTaskIdOnStream(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id);
+
+  int64_t QueryTaskIdOnStream(uint32_t user_stream_id, uint32_t memory_stream_id);
+
+  int64_t LaunchTaskIdOnStream(uint32_t stream_id);
+  int64_t GetTaskIdOnStream(uint32_t stream_id);
+
+  std::mutex &GetStreamMutex(size_t stream_id);
+
+  // memory_stream_addresses pair : memory_stream_id, address.
+  bool RecordEvent(int64_t task_id_on_stream, uint32_t user_stream_id,
+                   const std::vector<std::pair<uint32_t, void *>> &memory_stream_addresses,
+                   const DeviceEventPtr &input_event = nullptr);
+  bool WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id);
+  bool WaitEvent(int64_t task_id_on_stream, uint32_t user_stream_id);
+  bool DispatchRecordWaitEvent(uint32_t user_stream_id, uint32_t memory_stream_id);
+
+  bool SyncStream(size_t stream_id);
+  bool SyncAllStreams();
+  bool SyncNotDefaultStreams();
+
+  bool WaitMultiStream(size_t wait_stream_id);
+
+ protected:
+  TaskIdOnStreamManagerPtr task_id_on_stream_manager_;
+  std::unordered_map<uint32_t, std::mutex> stream_mutexes_;
+  EventPoolPtr event_pool_;
+
+  DeviceResManager *device_res_base_;
+  SpinLock lock_;
+};
+using MultiStreamControllerPtr = std::shared_ptr<MultiStreamController>;
+}  // namespace device
+}  // namespace mindspore
+#endif
diff --git a/inferrt/src/hardware/hardware_abstract/stream_util.h b/inferrt/src/hardware/hardware_abstract/stream_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..59a21bc129b235d440b12622d9b8027aba4acabf
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/stream_util.h
@@ -0,0 +1,23 @@
+/**
+ * Copyright 2024-2024 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_INCLUDE_COMMON_UTILS_STREAM_UTIL_H_
+#define MINDSPORE_CCSRC_INCLUDE_COMMON_UTILS_STREAM_UTIL_H_
+namespace mindspore {
+constexpr auto kDefaultStreamIndex = 0;
+constexpr auto kWorldGroupStreamIndex = 1;
+}  // namespace mindspore
+#endif
diff --git a/inferrt/src/hardware/hardware_abstract/visible.h b/inferrt/src/hardware/hardware_abstract/visible.h
new file mode 100644
index 0000000000000000000000000000000000000000..4a3fc96c5a2cea87921ba246db95ff986ffcdbba
--- /dev/null
+++ b/inferrt/src/hardware/hardware_abstract/visible.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2025 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INFERRT_SRC_HARDWARE_VISIBLE_H_
+#define INFERRT_SRC_HARDWARE_VISIBLE_H_
+
+#if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__CYGWIN__))
+#ifdef HARDWARE_DLL
+#define HARDWARE_EXPORT __declspec(dllexport)
+#else
+#define HARDWARE_EXPORT __declspec(dllimport)
+#endif
+#define HARDWARE_LOCAL
+#else
+#define HARDWARE_EXPORT __attribute__((visibility("default")))
+#define HARDWARE_LOCAL __attribute__((visibility("hidden")))
+#endif
+
+#endif  // INFERRT_SRC_HARDWARE_VISIBLE_H_
diff --git a/inferrt/src/hardware/tests/CMakeLists.txt b/inferrt/src/hardware/tests/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e4abec03f43ff40c71cec65c3555a42a4a9d0d1
--- /dev/null
+++ b/inferrt/src/hardware/tests/CMakeLists.txt
@@ -0,0 +1,32 @@
+check_debug_log_out()
+
+if(DEFINED ENV{ASCEND_CUSTOM_PATH})
+  set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH})
+else()
+  set(ASCEND_PATH /usr/local/Ascend)
+endif()
+
+message("Note compile ascend path: ${ASCEND_PATH}")
+include_directories(${ASCEND_PATH}/latest/include/)
+link_directories(${ASCEND_PATH}/latest/lib64/)
+
+find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
+include_directories(${Python3_INCLUDE_DIRS})
+
+
+set(depname "pybind11")
+set(PYBIND11_PATH "${PROJECT_SOURCE_DIR}/${depname}-src")
+include_directories(${PYBIND11_PATH}/include)
+
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -ldl")
+
+add_executable(test_obj test_func.cc)
+target_link_libraries(test_obj
+    -Wl,--whole-archive
+    hardware_ascend_obj
+    -Wl,--no-whole-archive
+    hardware_abstract_obj
+    Python3::Python
+    ascendcl
+    runtime
+  )
\ No newline at end of file
diff --git a/inferrt/src/hardware/tests/test_func.cc b/inferrt/src/hardware/tests/test_func.cc
new file mode 100644
index 0000000000000000000000000000000000000000..38bb570d4f33c06a187d4ca65b3659e3faed989e
--- /dev/null
+++ b/inferrt/src/hardware/tests/test_func.cc
@@ -0,0 +1,25 @@
+#include "hardware/ascend/ascend_device_context.h"
+#include "hardware/hardware_abstract/device_context_manager.h"
+#include "common/common.h"
+
+using namespace mindspore::device;
+int main() {
+  DeviceContextKey device_context_key{"Ascend", 0};
+  auto device_context = DeviceContextManager::GetInstance().GetOrCreateDeviceContext(device_context_key);
+  if (device_context == nullptr) {
+    LOG_ERROR << "Get device context failed.";
+    return 0;
+  }
+  if (device_context->device_res_manager_ == nullptr) {
+    LOG_ERROR << "Get device res manager failed.";
+    return 0;
+  }
+  device_context->Initialize();
+
+  // Test allocate memory.
+  auto ptr = device_context->device_res_manager_->AllocateMemory(8);
+  LOG_ERROR << "ptr:" << ptr;
+  device_context->device_res_manager_->FreeMemory(ptr);
+
+  return 0;
+}