From ac3578676b87cdcd447af50010f596304fd19529 Mon Sep 17 00:00:00 2001
From: huanruizhi <huanruizhi@hisilicon.com>
Date: Sat, 7 Aug 2021 14:57:51 +0800
Subject: [PATCH] DC&&MDC

---
 CMakeLists.txt                                |   5 +-
 configure.py                                  | 314 ++++----
 tf_adapter/BUILD                              |   6 +-
 tf_adapter/common/common.h                    |   9 +-
 tf_adapter/kernels/geop_npu.cc                | 104 ++-
 tf_adapter/kernels/geop_npu.h                 |   6 +
 tf_adapter/kernels/host_queue_dataset_op.cc   | 565 +++++++++++---
 tf_adapter/kernels/infeed_outfeed_ops.cc      | 174 ++---
 tf_adapter/ops/npu_dataset_ops.cc             |  19 +-
 tf_adapter/ops/npu_ops.cc                     |   2 -
 .../optimizers/dp_tf_ge_conversion_pass.cc    | 641 +++++++++-------
 .../optimizers/om_partition_subgraphs_pass.cc |   6 +-
 .../npu_bridge/estimator/npu/npu_hook.py      |   1 -
 tf_adapter/tests/CMakeLists.txt               |   1 +
 .../tests/depends/ascendcl/CMakeLists.txt     |  29 +
 .../depends/ascendcl/src/ascendcl_stub.cc     | 268 +++++++
 .../depends/ascendcl/src/ascendcl_stub.h      |  82 ++
 tf_adapter/tests/st/CMakeLists.txt            |  10 +-
 .../tests/st/kernels/pbtxt/geop_dpop.pbtxt    | 697 +++++++++++++++++
 .../testcase/dataset/dataset_test_base.cc     | 704 ++++++++++++++++++
 .../testcase/dataset/function_testlib.cc      | 649 ++++++++++++++++
 .../dataset/host_queue_dats_set_st.cc         | 203 +++++
 .../st/kernels/testcase/geop_npu_test.cc      |  13 +-
 .../kernels/testcase/infeed_outfeed_test.cc   |  76 ++
 .../testcase/dp_tf_ge_conversion_pass_test.cc |  12 +
 .../tests/st/util/testcase/ge_plugin_test.cc  |   1 -
 tf_adapter/tests/ut/CMakeLists.txt            |   5 +-
 .../tests/ut/kernels/pbtxt/geop_dpop.pbtxt    | 697 +++++++++++++++++
 .../testcase/dataset/dataset_test_base.cc     | 704 ++++++++++++++++++
 .../testcase/dataset/function_testlib.cc      | 649 ++++++++++++++++
 .../dataset/host_queue_dats_set_ut.cc         | 203 +++++
 .../ut/kernels/testcase/geop_npu_test.cc      |  18 +-
 .../kernels/testcase/infeed_outfeed_test.cc   |  76 ++
 .../testcase/dp_tf_ge_conversion_pass_test.cc |  12 +
 .../tests/ut/util/testcase/ge_plugin_test.cc  |   1 -
 tf_adapter/util/acl_channel.cc                | 228 ++++++
 tf_adapter/util/acl_channel.h                 |  46 ++
 tf_adapter/util/ge_plugin.cc                  |  46 --
 tf_adapter/util/npu_attrs.cc                  |  24 +-
 tf_adapter/util/npu_attrs.h                   |   4 +-
 40 files changed, 6607 insertions(+), 703 deletions(-)
 create mode 100644 tf_adapter/tests/depends/ascendcl/CMakeLists.txt
 create mode 100644 tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc
 create mode 100644 tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h
 create mode 100644 tf_adapter/tests/st/kernels/pbtxt/geop_dpop.pbtxt
 create mode 100644 tf_adapter/tests/st/kernels/testcase/dataset/dataset_test_base.cc
 create mode 100644 tf_adapter/tests/st/kernels/testcase/dataset/function_testlib.cc
 create mode 100644 tf_adapter/tests/st/kernels/testcase/dataset/host_queue_dats_set_st.cc
 create mode 100644 tf_adapter/tests/st/kernels/testcase/infeed_outfeed_test.cc
 create mode 100644 tf_adapter/tests/ut/kernels/pbtxt/geop_dpop.pbtxt
 create mode 100644 tf_adapter/tests/ut/kernels/testcase/dataset/dataset_test_base.cc
 create mode 100644 tf_adapter/tests/ut/kernels/testcase/dataset/function_testlib.cc
 create mode 100644 tf_adapter/tests/ut/kernels/testcase/dataset/host_queue_dats_set_ut.cc
 create mode 100644 tf_adapter/tests/ut/kernels/testcase/infeed_outfeed_test.cc
 create mode 100644 tf_adapter/util/acl_channel.cc
 create mode 100644 tf_adapter/util/acl_channel.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9a71e499c..932bf5777 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -190,6 +190,8 @@ else()
         -Wl,--no-as-needed
         c_sec
         ge_runner
+        ascendcl
+        acl_tdt_channel
         datatransfer
         fmk_parser
         fmk_onnx_parser
@@ -197,7 +199,6 @@ else()
         tensorflow_framework
         pywrap_tensorflow_internal
         -Wl,--as-needed
-        -s
     )
 
     # rename libtf_adapter.so to _tf_adapter.so
@@ -231,7 +232,7 @@ else()
         && cp -r ${CMAKE_CURRENT_BINARY_DIR}/_tf_adapter.so ${CMAKE_CURRENT_BINARY_DIR}/wheel/npu_bridge
         && cp -r $<TARGET_FILE:SoftDp> ${CMAKE_CURRENT_BINARY_DIR}/wheel/npu_bridge
         # && cp -r ${CMAKE_CURRENT_BINARY_DIR}/../../../../../soft_dp/libSoftDp.so ${CMAKE_CURRENT_BINARY_DIR}/wheel/npu_bridge
-    	&& cd ${CMAKE_CURRENT_BINARY_DIR}/wheel
+        && cd ${CMAKE_CURRENT_BINARY_DIR}/wheel
         && ${HI_PYTHON} setup.py bdist_wheel >/dev/null
         && cp -f dist/npu_bridge-1.15.0-py3-none-any.whl ${CMAKE_CURRENT_BINARY_DIR}/
         && echo "package whl end"
diff --git a/configure.py b/configure.py
index 62f8b8be9..65b95c495 100755
--- a/configure.py
+++ b/configure.py
@@ -22,10 +22,11 @@ from __future__ import print_function
 import os
 import subprocess
 import sys
+
 try:
-  from shutil import which
+    from shutil import which
 except ImportError:
-  from distutils.spawn import find_executable as which
+    from distutils.spawn import find_executable as which
 
 _COMPAT_TENSORFLOW_VERSION = "1.15.0"
 _COMPAT_PYTHON_VERSION = "Python 3.7"
@@ -34,176 +35,183 @@ _ASCEND_INSTALL_PATH_ENV = "ASCEND_CUSTOM_PATH"
 _OPEN_UT = "OPEN_UT"
 
 
-
 def run_command(cmd):
-  output = subprocess.check_output(cmd)
-  return output.decode('UTF-8').strip()
+    output = subprocess.check_output(cmd)
+    return output.decode('UTF-8').strip()
 
 
 def get_input(question):
-  try:
     try:
-      answer = raw_input(question)
-    except NameError:
-      answer = input(question)
-  except EOFError:
-    answer = ''
-  return answer
+        try:
+            answer = raw_input(question)
+        except NameError:
+            answer = input(question)
+    except EOFError:
+        answer = ''
+    return answer
+
 
 def real_config_path(file):
-  return os.path.join("tools", file)
+    return os.path.join("tools", file)
+
 
 def setup_python():
-  """Get python install path."""
-  default_python_bin_path = which('python3')
-  custom_python_bin_path = ''
-  ask_python_bin_path = ''
-  if default_python_bin_path:
-    custom_python_bin_path = default_python_bin_path
-    compile_args = run_command([
-      custom_python_bin_path, '--version'])
-    if not _COMPAT_PYTHON_VERSION in compile_args:
-      print('Invalid default python version: %s, only support Python 3.7.' % compile_args)
-      ask_python_bin_path = ('Please specify the location of python with valid '
-                             'tensorflow 1.15.0 site-packages installed. [Default '
-                             'is %s]\n(Please enter the correct python path: ') % default_python_bin_path
-      custom_python_bin_path = ''
-  else:
-    ask_python_bin_path = ('Please specify the location of python with valid '
-                           'tensorflow 1.15.0 site-packages installed. [Default '
-                           'is %s]\n(Please enter the correct python path: ') % default_python_bin_path
-
-  while True:
-    if not custom_python_bin_path:
-      python_bin_path = get_input(ask_python_bin_path)
-    else:
-      python_bin_path = custom_python_bin_path
-      custom_python_bin_path = None
-    if not python_bin_path:
-      python_bin_path = default_python_bin_path
-      pass
-    # Check if the path is valid
-    if os.path.isfile(python_bin_path) and os.access(python_bin_path, os.X_OK):
-      pass
-    elif not os.path.exists(python_bin_path):
-      print('Invalid python path: %s cannot be found.' % python_bin_path)
-      continue
+    """Get python install path."""
+    default_python_bin_path = which('python3')
+    custom_python_bin_path = ''
+    ask_python_bin_path = ''
+    if default_python_bin_path:
+        custom_python_bin_path = default_python_bin_path
+        compile_args = run_command([
+            custom_python_bin_path, '--version'])
+        if not _COMPAT_PYTHON_VERSION in compile_args:
+            print('Invalid default python version: %s, only support Python 3.7.' % compile_args)
+            ask_python_bin_path = ('Please specify the location of python with valid '
+                                   'tensorflow 1.15.0 site-packages installed. [Default '
+                                   'is %s]\n(Please enter the correct python path: ') % default_python_bin_path
+            custom_python_bin_path = ''
     else:
-      print('%s is not executable.  Is it the python binary?' % python_bin_path)
-      continue
-
-    try:
-      compile_args = run_command([
-        python_bin_path, '-c',
-        'import distutils.sysconfig; import tensorflow as tf; print(tf.__version__ + "|" + tf.sysconfig.get_lib('
-        ') + "|" + "|".join(tf.sysconfig.get_compile_flags()) + "|" + distutils.sysconfig.get_python_inc())'
-      ]).split("|")
-      if not compile_args[0].startswith(_COMPAT_TENSORFLOW_VERSION):
-        print('Invalid python path: %s compat tensorflow version is %s'
-              ' got %s.' % (python_bin_path, _COMPAT_TENSORFLOW_VERSION,
-              compile_args[0]))
-        continue
-    except subprocess.CalledProcessError:
-      print('Invalid python path: %s tensorflow not installed.' %
-            python_bin_path)
-      continue
-    # Write tools/python_bin_path.sh
-    with open(real_config_path('PYTHON_BIN_PATH'), 'w') as f:
-      f.write(python_bin_path)
-    with open(real_config_path('COMPILE_FLAGS'), 'w') as f:
-      for flag in compile_args[2:-1]:
-        f.write("".join([flag , '\n']))
-      f.write("".join(["-I" , compile_args[-1] , '\n']))
-    print('tensorflow path: %s.' % compile_args[1])
-    with open(real_config_path('LINK_FLAGS'), 'w') as f:
-      f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n'))
-      f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n'))
-    with open(real_config_path('UT_LINK_FLAGS'), 'w') as f:
-      f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n'))
-      f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n'))
-    with open(real_config_path('ST_LINK_FLAGS'), 'w') as f:
-      f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n'))
-      f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n'))
-    break
+        ask_python_bin_path = ('Please specify the location of python with valid '
+                               'tensorflow 1.15.0 site-packages installed. [Default '
+                               'is %s]\n(Please enter the correct python path: ') % default_python_bin_path
+
+    while True:
+        if not custom_python_bin_path:
+            python_bin_path = get_input(ask_python_bin_path)
+        else:
+            python_bin_path = custom_python_bin_path
+            custom_python_bin_path = None
+        if not python_bin_path:
+            python_bin_path = default_python_bin_path
+            pass
+        # Check if the path is valid
+        if os.path.isfile(python_bin_path) and os.access(python_bin_path, os.X_OK):
+            pass
+        elif not os.path.exists(python_bin_path):
+            print('Invalid python path: %s cannot be found.' % python_bin_path)
+            continue
+        else:
+            print('%s is not executable.  Is it the python binary?' % python_bin_path)
+            continue
+
+        try:
+            compile_args = run_command([
+                python_bin_path, '-c',
+                'import distutils.sysconfig; import tensorflow as tf; print(tf.__version__ + "|" + tf.sysconfig.get_lib('
+                ') + "|" + "|".join(tf.sysconfig.get_compile_flags()) + "|" + distutils.sysconfig.get_python_inc())'
+            ]).split("|")
+            if not compile_args[0].startswith(_COMPAT_TENSORFLOW_VERSION):
+                print('Invalid python path: %s compat tensorflow version is %s'
+                      ' got %s.' % (python_bin_path, _COMPAT_TENSORFLOW_VERSION,
+                                    compile_args[0]))
+                continue
+        except subprocess.CalledProcessError:
+            print('Invalid python path: %s tensorflow not installed.' %
+                  python_bin_path)
+            continue
+        # Write tools/python_bin_path.sh
+        with open(real_config_path('PYTHON_BIN_PATH'), 'w') as f:
+            f.write(python_bin_path)
+        with open(real_config_path('COMPILE_FLAGS'), 'w') as f:
+            for flag in compile_args[2:-1]:
+                f.write("".join([flag, '\n']))
+            f.write("".join(["-I", compile_args[-1], '\n']))
+        print('tensorflow path: %s.' % compile_args[1])
+        with open(real_config_path('LINK_FLAGS'), 'w') as f:
+            f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n'))
+            f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n'))
+        with open(real_config_path('UT_LINK_FLAGS'), 'w') as f:
+            f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n'))
+            f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n'))
+        with open(real_config_path('ST_LINK_FLAGS'), 'w') as f:
+            f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n'))
+            f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n'))
+        break
 
 
 def setup_ascend(env_path):
-  """Get ascend install path."""
-  default_ascend_path = os.path.realpath("/usr/local/Ascend")
-  custom_ascend_path = env_path
-  while True:
-    if not custom_ascend_path:
-      ascend_path = default_ascend_path
-    else:
-      ascend_path = custom_ascend_path
-    # Check if the path is valid
-    if os.path.isdir(ascend_path) and os.access(ascend_path, os.X_OK):
-      break
-    elif not os.path.exists(ascend_path):
-      print('Invalid ascend path: %s cannot be found.' % ascend_path)
-  print('ascend path: %s.' % ascend_path)
-  with open(real_config_path('LINK_FLAGS'), 'a') as f:
-    if 'ALL_IN_ONE_ENABLE' in os.environ:
-      f.write(os.path.join(ascend_path, "compiler", "lib64", "libge_runner.so\n"))
-      f.write(os.path.join(ascend_path, "compiler", "lib64", "libfmk_parser.so\n"))
-      f.write(os.path.join(ascend_path, "compiler", "lib64", "libfmk_onnx_parser.so\n"))
-      f.write(os.path.join(ascend_path, "compiler", "lib64", "libdatatransfer.so\n"))
-      f.write(os.path.join(ascend_path, "compiler", "lib64", "libindextransform.so\n"))
-      f.write(os.path.join(ascend_path, "compiler", "lib64", "libalog.so\n"))
-    else:
-      f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libge_runner.so\n"))
-      f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libfmk_parser.so\n"))
-      f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libfmk_onnx_parser.so\n"))
-      f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libdatatransfer.so\n"))
-      f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libindextransform.so\n"))
-      f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libalog.so\n"))
+    """Get ascend install path."""
+    default_ascend_path = os.path.realpath("/usr/local/Ascend")
+    custom_ascend_path = env_path
+    while True:
+        if not custom_ascend_path:
+            ascend_path = default_ascend_path
+        else:
+            ascend_path = custom_ascend_path
+        # Check if the path is valid
+        if os.path.isdir(ascend_path) and os.access(ascend_path, os.X_OK):
+            break
+        elif not os.path.exists(ascend_path):
+            print('Invalid ascend path: %s cannot be found.' % ascend_path)
+    print('ascend path: %s.' % ascend_path)
+    with open(real_config_path('LINK_FLAGS'), 'a') as f:
+        if 'ALL_IN_ONE_ENABLE' in os.environ:
+            f.write(os.path.join(ascend_path, "compiler", "lib64", "libge_runner.so\n"))
+            f.write(os.path.join(ascend_path, "compiler", "lib64", "libfmk_parser.so\n"))
+            f.write(os.path.join(ascend_path, "compiler", "lib64", "libfmk_onnx_parser.so\n"))
+            f.write(os.path.join(ascend_path, "compiler", "lib64", "libdatatransfer.so\n"))
+            f.write(os.path.join(ascend_path, "compiler", "lib64", "libindextransform.so\n"))
+            f.write(os.path.join(ascend_path, "runtime", "lib64", "libascendcl.so\n"))
+            f.write(os.path.join(ascend_path, "compiler", "lib64", "libalog.so\n"))
+            f.write(os.path.join(ascend_path, "compiler", "lib64", "libacl_tdt_channel.so\n"))
+        else:
+            f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libge_runner.so\n"))
+            f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libfmk_parser.so\n"))
+            f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libfmk_onnx_parser.so\n"))
+            f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libdatatransfer.so\n"))
+            f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libindextransform.so\n"))
+            f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libalog.so\n"))
+            f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libascendcl.so\n"))
+            f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libacl_tdt_channel.so\n"))
+
 
 def setup_swig():
-  """Get swig install path."""
-  default_swig_path = which('swig')
-  custom_swig_path = ''
-  ask_swig_path = ''
-  if default_swig_path:
-    custom_swig_path = default_swig_path
-    compile_args = run_command([
-      custom_swig_path, '-version'])
-    if not _COMPAT_SWIG_VERSION in compile_args:
-      print('Invalid default python version: %s.' % compile_args)
-      ask_swig_path = ('Please specify the location of swig. [Default is '
-                       '%s]\n(Please enter the correct swig path: ') % default_swig_path
-      custom_swig_path = ''
-  else:
-    ask_swig_path = ('Please specify the location of swig. [Default is '
-                     '%s]\n(Please enter the correct swig path: ') % default_swig_path
-
-  while True:
-    if not custom_swig_path:
-      swig_path = get_input(ask_swig_path)
-    else:
-      swig_path = custom_swig_path
-      custom_swig_path = None
-    if not swig_path:
-      swig_path = default_swig_path
-    # Check if the path is valid
-    if os.path.isfile(swig_path) and os.access(swig_path, os.X_OK):
-      break
-    elif not os.path.exists(swig_path):
-      print('Invalid swig path: %s cannot be found.' % swig_path)
-      continue
+    """Get swig install path."""
+    default_swig_path = which('swig')
+    custom_swig_path = ''
+    ask_swig_path = ''
+    if default_swig_path:
+        custom_swig_path = default_swig_path
+        compile_args = run_command([
+            custom_swig_path, '-version'])
+        if not _COMPAT_SWIG_VERSION in compile_args:
+            print('Invalid default python version: %s.' % compile_args)
+            ask_swig_path = ('Please specify the location of swig. [Default is '
+                             '%s]\n(Please enter the correct swig path: ') % default_swig_path
+            custom_swig_path = ''
     else:
-      print('%s is not executable.  Is it the swig binary?' % swig_path)
-      continue
+        ask_swig_path = ('Please specify the location of swig. [Default is '
+                         '%s]\n(Please enter the correct swig path: ') % default_swig_path
+
+    while True:
+        if not custom_swig_path:
+            swig_path = get_input(ask_swig_path)
+        else:
+            swig_path = custom_swig_path
+            custom_swig_path = None
+        if not swig_path:
+            swig_path = default_swig_path
+        # Check if the path is valid
+        if os.path.isfile(swig_path) and os.access(swig_path, os.X_OK):
+            break
+        elif not os.path.exists(swig_path):
+            print('Invalid swig path: %s cannot be found.' % swig_path)
+            continue
+        else:
+            print('%s is not executable.  Is it the swig binary?' % swig_path)
+            continue
+
+    with open(real_config_path('SWIG_BIN_PATH'), 'w') as f:
+        f.write(swig_path)
 
-  with open(real_config_path('SWIG_BIN_PATH'), 'w') as f:
-    f.write(swig_path)
 
 def main():
-  env_snapshot = dict(os.environ)
-  setup_python()
-  if not env_snapshot.get(_OPEN_UT):
-    setup_ascend(env_snapshot.get(_ASCEND_INSTALL_PATH_ENV))
-  setup_swig()
+    env_snapshot = dict(os.environ)
+    setup_python()
+    if not env_snapshot.get(_OPEN_UT):
+        setup_ascend(env_snapshot.get(_ASCEND_INSTALL_PATH_ENV))
+    setup_swig()
 
 
 if __name__ == '__main__':
-  main()
+    main()
diff --git a/tf_adapter/BUILD b/tf_adapter/BUILD
index 05a146b5e..6d817a4f1 100644
--- a/tf_adapter/BUILD
+++ b/tf_adapter/BUILD
@@ -33,9 +33,9 @@ cc_binary(
     linkopts = [] + select({
                 # Public introduction of external dependencies on project.
                 # External linked libraries, typically, located in out/${product}/host/obj/lib
-                ":cloud_build": ["-Lexternal/tf_adapter_cloud_host_libs/ -lc_sec -lge_runner -ltsdclient -ldatatransfer -lfmk_parser -lfmk_onnx_parser -lindextransform"],
-                ":mini_build": ["-Lexternal/tf_adapter_mini_host_libs/ -lc_sec -lge_runner -ltsdclient -ldatatransfer -lfmk_parser -lfmk_onnx_parser -lindextransform",],
-                ":onetrack_build": ["-Lexternal/tf_adapter_onetrack_host_libs/ -lc_sec -lge_runner -ltsdclient -ldatatransfer -lfmk_parser -lfmk_onnx_parser -lindextransform",],
+                ":cloud_build": ["-Lexternal/tf_adapter_cloud_host_libs/ -lc_sec -lge_runner -lascendcl -lfmk_parser -lfmk_onnx_parser -lindextransform"],
+                ":mini_build": ["-Lexternal/tf_adapter_mini_host_libs/ -lc_sec -lge_runner -lascendcl -lfmk_parser -lfmk_onnx_parser -lindextransform",],
+                ":onetrack_build": ["-Lexternal/tf_adapter_onetrack_host_libs/ -lc_sec -lge_runner -lascendcl -lfmk_parser -lfmk_onnx_parser -lindextransform",],
                 "//conditions:default": [],
             }) + [
             # "-z defs",
diff --git a/tf_adapter/common/common.h b/tf_adapter/common/common.h
index 35b0c6ed6..5cb93b007 100644
--- a/tf_adapter/common/common.h
+++ b/tf_adapter/common/common.h
@@ -31,11 +31,18 @@
   if ((v) == nullptr) {                                                                                                \
     ADP_LOG(ERROR) << #v " is nullptr.";                                                                               \
     LOG(ERROR) << #v " is nullptr.";                                                                                   \
-    return errors::InvalidArgument(#v " is nullptr.");                                                                 \
+    return errors::Internal(#v " is nullptr.");                                                                        \
   }
 
 #define REQUIRES_STATUS_OK(s)                                                                                          \
   if (!s.ok()) { return s; }
 
 #define ADAPTER_ENV_MAX_LENTH 1024 * 1024
+
+#define ADAPTER_LOG_IF_ERROR(...)                                                                                     \
+  do {                                                                                                                \
+    const ::tensorflow::Status _status = (__VA_ARGS__);                                                               \
+    if (TF_PREDICT_FALSE(!_status.ok())) LOG(INFO) << _status.ToString();                                             \
+  } while (0)
+
 #endif  // TENSORFLOW_COMMON_COMMON_H_
diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc
index 687a1f5b5..fad63acdf 100644
--- a/tf_adapter/kernels/geop_npu.cc
+++ b/tf_adapter/kernels/geop_npu.cc
@@ -809,7 +809,7 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) {
       return;
     }
     int64 run_end_time = InferShapeUtil::GetCurrentTimestap();
-    ADP_LOG(INFO) << "[GEOP] RunGraphAsync callback, status:" << ge_status << ", kernel_name:"
+    ADP_LOG(EVENT) << "[GEOP] RunGraphAsync callback, status:" << ge_status << ", kernel_name:"
                   << ctx->op_kernel().name() << "[ " << (run_end_time - run_start_time) << "us]";
     done();
   };
@@ -839,6 +839,41 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) {
   return;
 }
 
+void GeOp::ChangeChannelNameAttr(NodeDef &node_def) {
+  std::hash<std::string> hash_channel_name;
+  const std::string pre_channel_name = node_def.attr().at("channel_name").s();
+  uint32_t device_id = 0;
+  (void)GetEnvDeviceID(device_id);
+  AttrValue channel_name = AttrValue();
+  channel_name.set_s(std::to_string(hash_channel_name(tf_session_ + pre_channel_name +
+    "_device_" + std::to_string(device_id))));
+  (*node_def.mutable_attr())["channel_name"] = channel_name;
+  ADP_LOG(INFO) << "[GEOP] changed the value of channel_name attr of node:" << node_def.name() << " to " << channel_name.s();
+}
+
+void GeOp::ProcessDpOpFuncDef(Node *node) {
+  const std::string func_name = node->def().attr().at("function").func().name();
+  const std::string org_func_def_lib = node->def().attr().at("func_def").s();
+  FunctionDefLibrary func_def_lib;
+  func_def_lib.ParseFromString(org_func_def_lib);
+  for (auto &func_def : *func_def_lib.mutable_function()) {
+    if (func_def.signature().name() == func_name) {
+      for (auto &node_def : *func_def.mutable_node_def()) {
+        if (node_def.op() == "IteratorV2" || node_def.op() == "Iterator") {
+          NpuAttrs::SetDatasetExecuteInDeviceStatus(tf_session_ + node_def.name(), true);
+        }
+        if (node_def.op() == "DeviceQueueDataset") { ChangeChannelNameAttr(node_def); }
+      }
+    }
+  }
+  std::string new_func_def_lib;
+  func_def_lib.SerializeToString(&new_func_def_lib);
+  AttrValue func_def_value = AttrValue();
+  func_def_value.set_s(new_func_def_lib);
+  NodeDef &node_def = const_cast<NodeDef &>(node->def());
+  (*node_def.mutable_attr())["func_def"] = func_def_value;
+}
+
 void GeOp::AddNodeAttrs(Node *node, bool &is_initialize) {
   // Add dp custom kernel label
   if (node->type_string() == "IteratorGetNext") {
@@ -876,6 +911,68 @@ void GeOp::AddNodeAttrs(Node *node, bool &is_initialize) {
   }
 }
 
+void GeOp::HandleDpOpAndGetNextNodes(Graph &graph) {
+  std::vector<Node *> remove_nodes;
+  for (Node *node : graph.nodes()) {
+    CHECK_NOT_NULL(node);
+    if (node->type_string() == "DPOP") {
+      ProcessDpOpFuncDef(node);
+    } else if (node->type_string() == "IteratorGetNext") {
+      Node *iterator_node = nullptr;
+      std::string iterator_name;
+      NodeDef &node_def = const_cast<NodeDef &>(node->def());
+      for (auto in_edge : node->in_edges()) {
+        CHECK_NOT_NULL(in_edge);
+        CHECK_NOT_NULL(in_edge->src());
+        if (in_edge->src()->type_string() == "IteratorV2" || in_edge->src()->type_string() == "Iterator") {
+          iterator_name = in_edge->src()->name();
+          iterator_node = in_edge->src();
+        }
+      }
+      if (dynamic_input_ == "1" && NpuAttrs::IsDatasetExecuteInDevice(tf_session_ + iterator_name)) {
+        node_def.set_op("DynamicGetNext");
+      }
+      if (dynamic_input_ == "1" && dynamic_graph_execute_mode_ == "lazy_recompile") {
+        graph_options_["ge.exec.enableCopyOutputAddr"] = "1";
+      }
+      if (!NpuAttrs::IsDatasetExecuteInDevice(tf_session_ + iterator_name)) {
+        uint32_t device_id = 0;
+        (void)GetEnvDeviceID(device_id);
+        Node *aicpu_getnext = nullptr;
+        std::string aicpu_getnext_name = "aicpu_getnext_" + node->name();
+        auto getnext_attrs = node->def().attr();
+        std::hash<std::string> hash_channel_name;
+        std::string channel_name = std::to_string(hash_channel_name(tf_session_ + iterator_name +
+                                                                    "_device_" + std::to_string(device_id)));
+        std::string aicpu_getnext_type = dynamic_input_ == "1" ? "DynamicGetNext" : "GetNext";
+        TF_CHECK_OK(NodeBuilder(aicpu_getnext_name, aicpu_getnext_type)
+                               .Device(node->def().device())
+                               .Attr("channel_name", channel_name)
+                               .Attr("output_types", getnext_attrs["output_types"])
+                               .Attr("output_shapes", getnext_attrs["output_shapes"])
+                               .Finalize(&graph, &aicpu_getnext));
+        for (auto out_edge : node->out_edges()) {
+          CHECK_NOT_NULL(out_edge);
+          graph.AddEdge(aicpu_getnext, out_edge->src_output(), out_edge->dst(), out_edge->dst_input());
+        }
+        const OpDef &getnext_op_def = aicpu_getnext->op_def();
+        NodeDef &node_def = const_cast<NodeDef &>(aicpu_getnext->def());
+        std::string op_def_s;
+        getnext_op_def.SerializeToString(&op_def_s);
+        tensorflow::AttrValue value;
+        value.set_s(op_def_s);
+        node_def.mutable_attr()->insert({"op_def", value});
+        remove_nodes.push_back(node);
+        remove_nodes.push_back(iterator_node);
+      }
+    }
+  }
+  for (Node *node : remove_nodes) {
+    ADP_LOG(INFO) << "[GEOP] Remove node:" << node->name();
+    graph.RemoveNode(node);
+  }
+}
+
 // Build GraphDef from FunctionDef.
 Status GeOp::BuildGraphDef(FunctionLibraryDefinition &flib_def,
                            const std::vector<Tensor> &input_vec, GraphDef &graph_def, bool &is_initialize) {
@@ -947,6 +1044,7 @@ Status GeOp::BuildGraphDef(FunctionLibraryDefinition &flib_def,
       return ret;
     }
   }
+  HandleDpOpAndGetNextNodes(graph);
   graph.ToGraphDef(&graph_def);
   char *enable_force_v2_control = getenv("ENABLE_FORCE_V2_CONTROL");
   if (enable_force_v2_control != nullptr && strcmp("1", enable_force_v2_control) == 0) {
@@ -1216,10 +1314,6 @@ Status GeOp::GenerateDesc(Node *&node) {
   REQUIRES_NOT_NULL(node);
   NodeDef &node_def = const_cast<NodeDef &>(node->def());
   const OpDef &op_def = node->op_def();
-  if (dynamic_input_ == "1" && node->type_string() == "IteratorGetNext") {
-    node_def.set_op("DynamicGetNext");
-    if (dynamic_graph_execute_mode_ == "lazy_recompile") { graph_options_["ge.exec.enableCopyOutputAddr"] = "1"; }
-  }
 
   std::string format = this->data_format_;  // format
   int32_t domi_format = domi::domiTensorFormat_t::DOMI_TENSOR_RESERVED;
diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h
index f6c1d5d75..bbddc40c9 100644
--- a/tf_adapter/kernels/geop_npu.h
+++ b/tf_adapter/kernels/geop_npu.h
@@ -97,6 +97,12 @@ class GeOp : public AsyncOpKernel {
   void AnalyzeInputDesc(void *tensor_ptr, ge::Tensor &input, ge::DataType type,
                         std::vector<std::string> &input_shapes);
 
+  void ProcessDpOpFuncDef(Node *node);
+
+  void HandleDpOpAndGetNextNodes(Graph &graph);
+
+  void ChangeChannelNameAttr(NodeDef &node_def);
+
  private:
   static const std::string INPUT_DESC;
   static const std::string OUTPUT_DESC;
diff --git a/tf_adapter/kernels/host_queue_dataset_op.cc b/tf_adapter/kernels/host_queue_dataset_op.cc
index 94c90bfd9..4dd84e1c3 100644
--- a/tf_adapter/kernels/host_queue_dataset_op.cc
+++ b/tf_adapter/kernels/host_queue_dataset_op.cc
@@ -1,19 +1,32 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "acl/acl_tdt.h"
+#include "acl/acl.h"
+#include "acl/error_codes/rt_error_codes.h"
 #include "tdt/tdt_host_interface.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
 #include "tensorflow/core/framework/stats_aggregator.h"
@@ -28,20 +41,31 @@
 #include "tf_adapter/common/adp_logger.h"
 #include "tf_adapter/common/common.h"
 #include "tf_adapter/kernels/data_item_deliver.h"
+#include "tf_adapter/kernels/threads_pool.h"
+#include "tf_adapter/util/acl_channel.h"
 #include "tf_adapter/util/npu_attrs.h"
 #include <dlfcn.h>
 #include <thread>
+#include <map>
 #include <vector>
-
 #include "unistd.h"
-
 namespace tensorflow {
 namespace data {
 namespace {
 using namespace std;
 using namespace tdt;
+inline bool VersionFlag(){
+    char *is_new = getenv("IS_NEW");
+    if (is_new != nullptr && strcmp("1", is_new) == 0) {
+        return true;
+    }
+// for test wait for drv code
+    return true;
+}
 
-const static uint32_t kMaxValue = 128;
+const static uint32_t kMaxValue = 128U;
+const static uint32_t kMaxShape = 2048U;
+const static uint32_t kUnknowshapeDepth = 3U;
 // total memory usage controlled below 2G
 const uint64_t kTotalBytes = 2147483648;
 std::atomic<bool> tdt_release(false);
@@ -58,96 +82,132 @@ class HostQueueDatasetOp : public DatasetOpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("_local_rank_id", &tmp_rank_id));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("_local_device_list", &tmp_device_list));
-    ADP_LOG(INFO) << "Get local rank id:" << tmp_rank_id << ", local device list:" << tmp_device_list;
+    ADP_LOG(INFO) << "Get local rank id:" << tmp_rank_id
+                  << ", local device list:" << tmp_device_list;
     // local rank id range 0-7
     local_rank_id_ = std::atoi(tmp_rank_id.c_str());
     for (size_t i = 0; i < tmp_device_list.size(); i += 2) {
       int device_id = std::atoi(&tmp_device_list[i]);
-      OP_REQUIRES(ctx, device_id >= 0, errors::InvalidArgument("device id should be >= 0."));
+      OP_REQUIRES(ctx, device_id >= 0,
+                  errors::InvalidArgument("device id should be >= 0."));
       local_device_list_.push_back(device_id);
     }
-
-    ADP_LOG(INFO) << "Start to init tdt.";
+    ADP_LOG(INFO) << "Start to init channel.";
     uint32_t device_id = 0;
     OP_REQUIRES_OK(ctx, GetEnvDeviceID(device_id));
     device_id_ = device_id;
-    int32_t tdt_status = TdtInFeedInit(device_id_);
-    OP_REQUIRES(ctx, tdt_status == 0,
-                errors::InvalidArgument("Tdt client init failed."));
-    ADP_LOG(INFO) << "Init tdt host success.";
+    if (!VersionFlag()) {
+      int32_t tdt_status = TdtInFeedInit(device_id_);
+      OP_REQUIRES(ctx, tdt_status == 0,
+                  errors::InvalidArgument("Tdt client init failed."));
+      ADP_LOG(INFO) << "Init tdt host success.";
+    }
     tdt_release = false;
   }
   ~HostQueueDatasetOp() {
     ADP_LOG(INFO) << "Start to destroy tdt.";
-    if (!tdt_release) {
-      int32_t tdt_status = TdtInFeedDestroy(device_id_);
-      if (tdt_status != 0) {
-        ADP_LOG(ERROR) << "Tdt client close failed.";
-        LOG(ERROR) << "Tdt client close failed.";
-      } else {
-        ADP_LOG(INFO) << "Tdt client close success.";
-        tdt_release = true;
-        NpuAttrs::SetUseTdtStatus(device_id_, false);
-      }
+    if(!VersionFlag()){
+      if (!tdt_release) {
+         int32_t tdt_status = TdtInFeedDestroy(device_id_);
+         if (tdt_status != 0) {
+            ADP_LOG(ERROR) << "Tdt client close failed.";
+            LOG(ERROR) << "Tdt client close failed.";
+          } else {
+            ADP_LOG(INFO) << "Tdt client close success.";
+            tdt_release = true;
+             NpuAttrs::SetUseTdtStatus(device_id_, false);
+          }
+        }
     }
   }
   void MakeDataset(OpKernelContext *ctx, DatasetBase **output) override {
     std::vector<DatasetBase *> inputs;
+    tf_session_ = ctx->session_handle();
     CHECK_NOT_NULL(output);
     for (int i = 0; i < ctx->num_inputs(); ++i) {
       DatasetBase *input = nullptr;
       OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(i), &input));
       inputs.push_back(input);
     }
-    *output = new (nothrow) Dataset(ctx, inputs, channel_name_, output_types_, output_shapes_,
-                                    local_rank_id_, local_device_list_, device_id_);
+    *output = new (nothrow) Dataset(
+        ctx, inputs, channel_name_, output_types_, output_shapes_,
+        local_rank_id_, local_device_list_, device_id_, tf_session_);
     OP_REQUIRES(ctx, *output != nullptr,
-                errors::InvalidArgument("Data process host queue dataset op: new dataset failed."));
+                errors::InvalidArgument(
+                    "Data process host queue dataset op: new dataset failed."));
   }
 
  private:
   class Dataset : public DatasetBase {
-   public:
-    Dataset(OpKernelContext *ctx, const std::vector<DatasetBase *> &inputs, const string &channelName,
-            const DataTypeVector &outputTypes, const vector<PartialTensorShape> &outputShapes,
-            const int &local_rank_id, const std::vector<uint32_t> &local_device_list,
-            const uint32_t &device_id)
-        : DatasetBase(DatasetContext(ctx)), inputs_(inputs), channel_name_(channelName), output_types_(outputTypes),
-          output_shapes_(outputShapes), local_rank_id_(local_rank_id), local_device_list_(local_device_list),
-          device_id_(device_id) {
-      for (const auto &input : inputs_) { input->Ref(); }
+  public:
+    Dataset(OpKernelContext *ctx, const std::vector<DatasetBase *> &inputs,
+            const string &channelName, const DataTypeVector &outputTypes,
+            const vector<PartialTensorShape> &outputShapes,
+            const int &local_rank_id,
+            const std::vector<uint32_t> &local_device_list,
+            const uint32_t &device_id,
+            const string &tf_session)
+        : DatasetBase(DatasetContext(ctx)),
+          inputs_(inputs),
+          channel_name_(channelName),
+          output_types_(outputTypes),
+          output_shapes_(outputShapes),
+          local_rank_id_(local_rank_id),
+          local_device_list_(local_device_list),
+          device_id_(device_id),
+          tf_session_(tf_session) {
+      for (const auto &input : inputs_) {
+        input->Ref();
+      }
     }
 
     ~Dataset() override {
-      for (const auto &input : inputs_) { input->Unref(); }
+      for (const auto &input : inputs_) {
+        input->Unref();
+      }
     }
 
-    unique_ptr<IteratorBase> MakeIteratorInternal(const string &prefix) const override {
-      return unique_ptr<IteratorBase>(new (nothrow) Iterator({this, strings::StrCat(prefix, "::HostQueue")}));
+    unique_ptr<IteratorBase> MakeIteratorInternal(
+        const string &prefix) const override {
+      return unique_ptr<IteratorBase>(new (nothrow) Iterator(
+          {this, strings::StrCat(prefix, "::HostQueue")}));
     }
 
-    const DataTypeVector &output_dtypes() const override { return output_types_; }
-    const vector<PartialTensorShape> &output_shapes() const override { return output_shapes_; }
+    const DataTypeVector &output_dtypes() const override {
+      return output_types_;
+    }
+    const vector<PartialTensorShape> &output_shapes() const override {
+      return output_shapes_;
+    }
 
-    string DebugString() const override { return "HostQueueDatasetOp::Dataset"; }
+    string DebugString() const override {
+      return "HostQueueDatasetOp::Dataset";
+    }
 
    protected:
-    Status AsGraphDefInternal(SerializationContext *ctx, DatasetGraphDefBuilder *b, Node **output) const override {
+    Status AsGraphDefInternal(SerializationContext *ctx,
+                              DatasetGraphDefBuilder *b,
+                              Node **output) const override {
       return Status::OK();
     }
 
    private:
     class Iterator : public DatasetIterator<Dataset> {
      public:
-      explicit Iterator(const Params &params) : DatasetIterator<Dataset>(params) {
-        data_deliver_ = new DataItemDeliver(
-            dataset()->local_rank_id_, dataset()->device_id_,
-            dataset()->local_device_list_, dataset()->channel_name_);
+      explicit Iterator(const Params &params)
+          : DatasetIterator<Dataset>(params) {
+        if (!VersionFlag()) {
+          data_deliver_ = new DataItemDeliver(
+              dataset()->local_rank_id_, dataset()->device_id_,
+              dataset()->local_device_list_, dataset()->channel_name_);
+        }
       }
 
       ~Iterator() override {
-        std::vector<DataItem> stop_message;
-        data_deliver_->ParallelSendDataVec(stop_message);
+        if (!VersionFlag()){
+            std::vector<DataItem> stop_message;
+            data_deliver_->ParallelSendDataVec(stop_message);
+        }
         {
           mutex_lock lck(mu_);
           finish_send_ = true;
@@ -159,7 +219,39 @@ class HostQueueDatasetOp : public DatasetOpKernel {
           cancelled_ = true;
           cond_var_.notify_all();
         }
-        delete data_deliver_;
+        if (!VersionFlag()){
+             delete data_deliver_;
+        }
+        if (VersionFlag()) {
+          if (!tdt_release && dataset()->local_rank_id_ == 0) {
+            std::vector<std::future<aclError>> acl_status;
+            int index_handle = 0;
+            for (auto device_id : dataset()->local_device_list_) {
+              acl_status.emplace_back(dataset()->pools_->Enqueue(
+                  acltdtDestroyChannel, acl_handles_[index_handle]));
+              index_handle++;
+            }
+            for (auto &result : acl_status) {
+              if (result.get() != ACL_ERROR_NONE) {
+                ADP_LOG(ERROR) << "Queue desrory failed.";
+              }
+            }
+            ADP_LOG(INFO) << "Queue desrory all host success.";
+            tdt_release = true;
+          } else if (!tdt_release && dataset()->local_rank_id_ == -1) {
+            ADP_LOG(INFO) << "Start to destroy channel.";
+            aclError acl_status = acltdtDestroyChannel(acl_handle_);
+            if (acl_status != ACL_ERROR_NONE) {
+              ADP_LOG(ERROR) << "Queue desrory failed.";
+            } else {
+              ADP_LOG(INFO) << "Queue desrory all host success.";
+              tdt_release = true;
+            }
+          } else {
+            ADP_LOG(INFO) << "Queue do not destroy in slave.";
+            tdt_release = true;
+          }
+        }
         ADP_LOG(INFO) << "HostQueueDatasetOp's iterator is released.";
       }
 
@@ -169,13 +261,16 @@ class HostQueueDatasetOp : public DatasetOpKernel {
         while (true) {
           {
             mutex_lock lck(mu_);
-            while (!cancelled_ && (buffer_.size() >= kMaxValue || total_bytes_ > kTotalBytes)) {
+            while (!cancelled_ && (buffer_.size() >= kMaxValue ||
+                                   total_bytes_ > kTotalBytes)) {
               RecordStop(ctx.get());
               cond_var_.wait(lck);
               RecordStart(ctx.get());
             }
 
-            if (cancelled_) { return; }
+            if (cancelled_) {
+              return;
+            }
           }
 
           mutex_lock parent_l(parent_mu_);
@@ -186,14 +281,17 @@ class HostQueueDatasetOp : public DatasetOpKernel {
             ADP_LOG(INFO) << "Do not need to GetNext.";
             return;
           } else {
-            buffer_element.status = input_impls_[1]->GetNext(ctx.get(), &args, &end_of_sequence);
+            buffer_element.status =
+                input_impls_[1]->GetNext(ctx.get(), &args, &end_of_sequence);
           }
-          if (!buffer_element.status.ok() || (buffer_element.status.ok() && end_of_sequence)) {
+          if (!buffer_element.status.ok() ||
+              (buffer_element.status.ok() && end_of_sequence)) {
             if (!buffer_element.status.ok()) {
-              ADP_LOG(ERROR) << "Failed to get tensor data, Status:" << buffer_element.status.ToString();
-              LOG(ERROR) << "Failed to get tensor data, Status:" << buffer_element.status.ToString();
+              ADP_LOG(ERROR) << "Failed to get tensor data, Status:"
+                             << buffer_element.status.ToString();
             } else {
-              ADP_LOG(INFO) << "Finish to get tensor data, Status:" << buffer_element.status.ToString()
+              ADP_LOG(INFO) << "Finish to get tensor data, Status:"
+                            << buffer_element.status.ToString()
                             << "; end_of_sequence:" << end_of_sequence;
             }
             mutex_lock lck(mu_);
@@ -240,6 +338,169 @@ class HostQueueDatasetOp : public DatasetOpKernel {
         }
         ADP_LOG(INFO) << "Slave SendDataThread exit.";
       }
+
+      void PushDataFront(const vector<Tensor> &args) {
+        mutex_lock lck(mu_);
+        BufferElement buffer_element;
+        buffer_element.status = Status::OK();
+        buffer_element.host_thread_finished = false;
+        buffer_element.value = args;
+        buffer_.push_front(buffer_element);
+        cond_var_.notify_all();
+      }
+
+      void QueryThreadStatus(const std::shared_ptr<IteratorContext> &ctx) {
+        mutex_lock lck(mu_);
+        while (!cancelled_ && !finish_send_ && buffer_.empty()) {
+          RecordStop(ctx.get());
+          cond_var_.wait(lck);
+          RecordStart(ctx.get());
+        }
+        if (cancelled_ || finish_send_) {
+          ADP_LOG(INFO) << "Host queue " << dataset()->channel_name_
+                        << " push data thread exit with cancelled: "
+                        << cancelled_ << ", finished:" << finish_send_
+                        << " when wait data.";
+          return;
+        }
+      }
+
+      void SendMultiEndData() {
+        std::vector<std::future<aclError>> status;
+        for (auto handle : acl_handles_) {
+          for (auto &tensor : buffer_.front().value) {
+          }
+          status.emplace_back(dataset()->pools_->Enqueue(
+              SendTensorsByAcl, handle,
+              (buffer_.front().status.ok() ? ACL_TENSOR_DATA_END_OF_SEQUENCE
+                                           : ACL_TENSOR_DATA_ABNORMAL),
+              buffer_.front().value));
+        }
+        for (auto &result : status) {
+          if (result.get() != ACL_RT_SUCCESS) {
+            ADP_LOG(INFO) << "End training as host push end data failed.";
+          }
+        }
+      }
+      // When calling SendTensorsByAcl and its'return is the queue is full or
+      // empty (actually no event, drv wants us to treat it as a no event,
+      // because they cannot return no evnet code , only empty). The above 2
+      // cases , we need to push data into dequeue to sent again. (include multi
+      // and single )
+      void SendMultiData(const vector<Tensor> &args) {
+        uint64_t total_bytes = 0;
+        for (auto &tensor : args) {
+          total_bytes += tensor.TotalBytes();
+        }
+        std::map<acltdtChannelHandle *, std::future<aclError>> status_map;
+        for (auto acl_handle : acl_handles_) {
+          status_map.insert({acl_handle, dataset()->pools_->Enqueue(
+              SendTensorsByAcl, acl_handle, ACL_TENSOR_DATA_TENSOR, args)});
+        }
+        bool is_send_success = false;
+        while (!is_send_success) {
+          is_send_success = true;
+          for (auto iter = status_map.begin(); iter != status_map.end();) {
+            aclError tmp_code = iter->second.get();
+            if (tmp_code == ACL_RT_SUCCESS) {
+              status_map.erase(iter++);
+            } else if (tmp_code == ACL_ERROR_RT_QUEUE_EMPTY ||
+                       tmp_code == ACL_ERROR_RT_QUEUE_FULL) {
+              sleep(1);
+              iter->second = dataset()->pools_->Enqueue(
+                  SendTensorsByAcl, iter->first, ACL_TENSOR_DATA_TENSOR, args);
+              is_send_success = false;
+              iter++;
+            } else {
+              ADP_LOG(INFO) << "End training and host push data finished.";
+              mutex_lock lck(mu_);
+              cancelled_ = true;
+              cond_var_.notify_all();
+              return;
+            }
+          }
+        }
+        {
+          mutex_lock lck(mu_);
+          total_bytes_ -= total_bytes;
+          cond_var_.notify_all();
+        }
+      }
+
+      void SendSingleData(const vector<Tensor> &args) {
+        uint64_t total_bytes = 0;
+        for (auto &tensor : args) {
+          total_bytes += tensor.TotalBytes();
+        }
+        aclError status =
+            SendTensorsByAcl(acl_handle_, ACL_TENSOR_DATA_TENSOR, args);
+        if (status == ACL_ERROR_RT_QUEUE_EMPTY ||
+            status == ACL_ERROR_RT_QUEUE_FULL) {
+          sleep(1);
+          PushDataFront(args);
+          return;
+        }
+        if (status != ACL_RT_SUCCESS) {
+          mutex_lock lck(mu_);
+          cancelled_ = true;
+          cond_var_.notify_all();
+          return;
+        }
+        {
+          mutex_lock lck(mu_);
+          total_bytes_ -= total_bytes;
+          cond_var_.notify_all();
+        }
+      }
+      void SendDataThreadForMbuf(const std::shared_ptr<IteratorContext> &ctx) {
+        ADP_LOG(INFO) << "Begin to send data.";
+        vector<Tensor> args;
+        while (true) {
+          {
+            mutex_lock lck(mu_);
+            while (!cancelled_ && !finish_send_ && buffer_.empty()) {
+              RecordStop(ctx.get());
+              cond_var_.wait(lck);
+              RecordStart(ctx.get());
+            }
+            if (cancelled_ || finish_send_) {
+              ADP_LOG(INFO)
+                  << "Host queue " << dataset()->channel_name_
+                  << " push data thread exit with cancelled: " << cancelled_
+                  << ", finished:" << finish_send_ << " when wait data.";
+              return;
+            }
+            if (buffer_.front().host_thread_finished) {
+              if (dataset()->local_rank_id_ == 0) {
+                SendMultiEndData();
+              } else {
+                aclError status =
+                    SendTensorsByAcl(acl_handle_,
+                                     (buffer_.front().status.ok()
+                                          ? ACL_TENSOR_DATA_END_OF_SEQUENCE
+                                          : ACL_TENSOR_DATA_ABNORMAL),
+                                     {});
+                if (status != ACL_RT_SUCCESS) {
+                  ADP_LOG(INFO)
+                      << "End training as host push end data failed." << status;
+                }
+              }
+              cancelled_ = true;
+              cond_var_.notify_all();
+              return;
+            } else {
+              args = buffer_.front().value;
+              buffer_.pop_front();
+            }
+          }
+          if (dataset()->local_rank_id_ == 0) {
+            SendMultiData(args);
+          } else {
+            SendSingleData(args);
+          }
+        }
+      }
+
       void SendDataThread(const std::shared_ptr<IteratorContext> &ctx) {
         vector<Tensor> args;
         while (true) {
@@ -344,62 +605,169 @@ class HostQueueDatasetOp : public DatasetOpKernel {
         }
       }
 
-      Status EnsureReceiveThreadStarted(IteratorContext *ctx) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      Status EnsureReceiveThreadStarted(IteratorContext *ctx)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         // ctx is not nullptr
         if (!receive_thread_) {
-          std::shared_ptr<IteratorContext> new_ctx(new (std::nothrow) IteratorContext(*ctx));
+          std::shared_ptr<IteratorContext> new_ctx(new (std::nothrow)
+                                                       IteratorContext(*ctx));
           REQUIRES_NOT_NULL(new_ctx);
           REQUIRES_NOT_NULL(ctx->env());
-          receive_thread_.reset(
-              ctx->env()->StartThread({}, "receive_thread", [this, new_ctx]() { GetDataThread(new_ctx); }));
+
+          receive_thread_.reset(ctx->env()->StartThread(
+              {}, "receive_thread",
+              [this, new_ctx]() { GetDataThread(new_ctx); }));
         }
         return Status::OK();
       }
 
-      Status EnsureSendThreadStarted(IteratorContext *ctx) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+      Status EnsureSendThreadStarted(IteratorContext *ctx)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
         if (!send_thread_) {
-          std::shared_ptr<IteratorContext> new_ctx(new (std::nothrow) IteratorContext(*ctx));
+          std::shared_ptr<IteratorContext> new_ctx(new (std::nothrow)
+                                                       IteratorContext(*ctx));
           REQUIRES_NOT_NULL(new_ctx);
           REQUIRES_NOT_NULL(ctx->env());
-          if (dataset()->local_rank_id_ <= 0) {
-            send_thread_.reset(ctx->env()->StartThread(
-                {}, "send_thread",
-                [this, new_ctx]() { SendDataThread(new_ctx); }));
+          if (!VersionFlag()) {
+            if (dataset()->local_rank_id_ <= 0) {
+              send_thread_.reset(ctx->env()->StartThread(
+                  {}, "send_thread",
+                  [this, new_ctx]() { SendDataThread(new_ctx); }));
+            } else {
+              send_thread_.reset(ctx->env()->StartThread(
+                  {}, "send_thread", [this]() { SendDataThread(); }));
+            }
           } else {
             send_thread_.reset(ctx->env()->StartThread(
-                {}, "send_thread", [this]() { SendDataThread(); }));
+                {}, "send_thread",
+                [this, new_ctx]() { SendDataThreadForMbuf(new_ctx); }));
+          }
+        }
+        return Status::OK();
+      }
+      bool IsUnkownShape() {
+        for (auto &out_shape : dataset()->output_shapes_) {
+          auto tmp_shape = const_cast<PartialTensorShape &>(out_shape);
+          if (tmp_shape.unknown_rank()) {
+            return true;
+          }
+          return false;
+        }
+      }
+
+      Status CreatMutiChannel(std::hash<std::string> &hash_channel_name, std::string &channel_name, uint32_t shape_depth) {
+        ADP_LOG(INFO) << "Start to init all host thread.";
+        dataset()->pools_ = std::make_shared<ThreadPool>();
+        dataset()->pools_->InitThreadPool(dataset()->local_device_list_.size());
+        vector<std::future<acltdtChannelHandle *>> acl_handles;
+        for (auto device_id : dataset()->local_device_list_) {
+          aclError status = aclrtSetDevice(static_cast<int32_t>(device_id));
+          if (status != ACL_SUCCESS) {
+            return errors::InvalidArgument("SetDevice fail");
+          }
+          channel_name = std::to_string(hash_channel_name(
+              dataset()->tf_session_ + dataset()->channel_name_ + "_device_" +
+              std::to_string(device_id)));
+          acl_handles.emplace_back(dataset()->pools_->Enqueue(
+              acltdtCreateChannelWithCapacity, device_id, channel_name.c_str(),
+              shape_depth));
+        }
+        for (auto &&handle : acl_handles) {
+          handle.wait();
+          auto tmp_handle = handle.get();
+          if (tmp_handle == nullptr) {
+            ADP_LOG(ERROR) << "Call acltdtCreateChannelWithCapacity failed";
+            return errors::InvalidArgument(
+                "Call acltdtCreateChannelWithCapacity failed");
           }
+          acl_handles_.push_back(tmp_handle);
         }
+        ADP_LOG(INFO) << "Init all host thread success.";
         return Status::OK();
       }
 
+      Status CreatChannel() {
+        int32_t out_shape_size = dataset()->output_shapes_.size();
+        if(out_shape_size == 0){
+          ADP_LOG(ERROR)
+              << "Out_shape_size is euqal to zero ,can not set channel depth";
+        }
+        uint32_t shape_depth =
+            IsUnkownShape() ? kUnknowshapeDepth : kMaxShape / out_shape_size;
+        shape_depth = std::min(shape_depth, 128U);
+        std::hash<std::string> hash_channel_name;
+        std::string channel_name;
+        if (dataset()->local_rank_id_ == 0) {
+          return CreatMutiChannel(hash_channel_name, channel_name, shape_depth);
+        } else if (dataset()->local_rank_id_ == -1) {
+          ADP_LOG(INFO) << "Start to init one host thread.";
+          aclError status_sigle =
+              aclrtSetDevice(static_cast<int32_t>(dataset()->device_id_));
+          if (status_sigle != ACL_SUCCESS) {
+            return errors::InvalidArgument("SetDevice fail");
+          }
+          channel_name = std::to_string(hash_channel_name(
+              dataset()->tf_session_ + dataset()->channel_name_ + "_device_" +
+              std::to_string(dataset()->device_id_)));
+          acl_handle_ = acltdtCreateChannelWithCapacity(
+              dataset()->device_id_, channel_name.c_str(), shape_depth);
+          if (acl_handle_ == nullptr) {
+            ADP_LOG(ERROR) << "Call acltdtCreateChannelWithCapacity failed";
+            return errors::InvalidArgument(
+                "Call acltdtCreateChannelWithCapacity failed");
+          }
+          ADP_LOG(INFO) << "Init tdt one thread success.";
+          return Status::OK();
+        } else {
+          ADP_LOG(INFO) << "Queue do not init in slave.";
+        }
+      }
+
       Status Initialize(IteratorContext *ctx) override {
-        ADP_LOG(INFO) << "Start to check channel name. channelName: " << dataset()->channel_name_;
         if (dataset()->channel_name_.empty()) {
           return errors::InvalidArgument("HostQueueDataset channel_name is null.");
         }
-
+        Status temp_status = Status::OK();
+        if(VersionFlag()) {
+           temp_status = CreatChannel();
+           if(temp_status != Status::OK()){
+              return errors::InvalidArgument("Call CreatChannel queue failed");
+           }
+        }
         ADP_LOG(INFO) << "Start to check receive and send thread.";
         try {
           input_impls_.resize(dataset()->inputs_.size());
         } catch (...) { return errors::InvalidArgument("HostQueueDataset resize failed."); }
 
         for (size_t i = 0; i < input_impls_.size(); ++i) {
-          TF_RETURN_IF_ERROR(
-              dataset()->inputs_[i]->MakeIterator(ctx, strings::StrCat(prefix(), "[", i, "]"), &input_impls_[i]));
-        }
-        if (dataset()->local_rank_id_ == 0) {
-          TF_RETURN_IF_ERROR(data_deliver_->ParallelInitSocketClient());
-        } else if(dataset()->local_rank_id_ > 0) {
-          TF_RETURN_IF_ERROR(data_deliver_->InitSocketServer());
+          TF_RETURN_IF_ERROR(dataset()->inputs_[i]->MakeIterator(
+              ctx, strings::StrCat(prefix(), "[", i, "]"), &input_impls_[i]));
         }
+        if(!VersionFlag()){
+          if (dataset()->local_rank_id_ == 0) {
+              TF_RETURN_IF_ERROR(data_deliver_->ParallelInitSocketClient());
+              } else if(dataset()->local_rank_id_ > 0) {
+                     TF_RETURN_IF_ERROR(data_deliver_->InitSocketServer());
+            }
+          }
         {
           mutex_lock lck(mu_);
-          TF_RETURN_IF_ERROR(EnsureReceiveThreadStarted(ctx));
-           TF_RETURN_IF_ERROR(EnsureSendThreadStarted(ctx));
+          if(VersionFlag()){
+            if(dataset()->local_rank_id_ <= 0) {
+               TF_RETURN_IF_ERROR(EnsureReceiveThreadStarted(ctx));
+               TF_RETURN_IF_ERROR(EnsureSendThreadStarted(ctx));
+            } else {
+                    ADP_LOG(INFO) << "HostQueue is not chief, not send data.";
+                    return Status::OK();
+              }
+          } else {
+                  TF_RETURN_IF_ERROR(EnsureReceiveThreadStarted(ctx));
+                  TF_RETURN_IF_ERROR(EnsureSendThreadStarted(ctx));
+          }
         }
 
-        ADP_LOG(INFO) << "HostQueue success to Initialize. channelName: " << dataset()->channel_name_;
+        ADP_LOG(INFO) << "HostQueue success to Initialize. channelName: "
+                      << dataset()->channel_name_;
         return Status::OK();
       }
 
@@ -435,24 +803,31 @@ class HostQueueDatasetOp : public DatasetOpKernel {
       bool finish_send_ GUARDED_BY(mu_) = false;
       bool host_thread_finished_ GUARDED_BY(mu_) = false;
       uint64_t total_bytes_ GUARDED_BY(mu_) = 0;
-      // The following two thread must be the first member to be destructed, because tensorflow::Thread does not provide
-      // an explicit join function. If the thread is destructed after other members, such as buffer_, when the thread
-      // joins, it will access the already destructed buffer_ , Resulting in an unknown error.
+      // The following two thread must be the first member to be destructed,
+      // because tensorflow::Thread does not provide an explicit join function.
+      // If the thread is destructed after other members, such as buffer_, when
+      // the thread joins, it will access the already destructed buffer_ ,
+      // Resulting in an unknown error.
       std::unique_ptr<Thread> receive_thread_ GUARDED_BY(mu_);
       std::unique_ptr<Thread> send_thread_ GUARDED_BY(mu_);
       DataItemDeliver *data_deliver_;
+      acltdtChannelHandle* acl_handle_;
+      std::vector<acltdtChannelHandle*> acl_handles_;
     };
     const std::vector<DatasetBase *> inputs_;
     std::string channel_name_;
+    std::string tf_session_;
     const DataTypeVector output_types_;
     const vector<PartialTensorShape> output_shapes_;
     int local_rank_id_;
+    mutable std::shared_ptr<ThreadPool> pools_;
     std::vector<uint32_t> local_device_list_;
     uint32_t device_id_;
   };
   std::string channel_name_;
   DataTypeVector output_types_;
   vector<PartialTensorShape> output_shapes_;
+  std::string tf_session_;
   int local_rank_id_;
   std::vector<uint32_t> local_device_list_;
   uint32_t device_id_;
diff --git a/tf_adapter/kernels/infeed_outfeed_ops.cc b/tf_adapter/kernels/infeed_outfeed_ops.cc
index d355d66e1..97fe25dec 100644
--- a/tf_adapter/kernels/infeed_outfeed_ops.cc
+++ b/tf_adapter/kernels/infeed_outfeed_ops.cc
@@ -14,74 +14,16 @@
  * limitations under the License.
  */
 
-#include "securec.h"
-#include "tdt/tdt_host_interface.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tf_adapter/common/adp_logger.h"
 #include "tf_adapter/common/common.h"
+#include "tf_adapter/util/acl_channel.h"
+#include "tf_adapter/util/npu_attrs.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include <string>
 
 namespace tensorflow {
 namespace {
-Status GetTensorShape(const string &tensor_shape, TensorShape &shape) {
-  // change "[32,224,224,3]" => "32,224,224,3"
-  // tensor_shape.size() - 2 is the second to last
-  string str = tensor_shape.substr(1, tensor_shape.size() - 2);
-  string::size_type index = 0;
-  if (!str.empty()) {
-    while ((index = str.find(' ', index)) != string::npos) { str.erase(index, 1); }
-  }
-  string split = ",";
-  string::size_type pos2 = str.find(split);
-  string::size_type pos1 = 0;
-  while (pos2 != string::npos) {
-    try {
-      shape.AddDim(std::stoi(str.substr(pos1, pos2 - pos1)));
-    } catch (...) { return errors::InvalidArgument("Invalid shape string: ", tensor_shape); }
-    // string::size_type can store the length of any string object
-    pos1 = pos2 + split.size();
-    pos2 = str.find(split, pos1);
-  }
-  if (pos1 != str.length()) {
-    try {
-      shape.AddDim(std::stoi(str.substr(pos1)));
-    } catch (...) { return errors::InvalidArgument("Invalid shape string: ", tensor_shape); }
-  }
-  return Status::OK();
-}
-
-Status ConvertDataItem2Tensor(const std::vector<tdt::DataItem> &items, std::vector<Tensor> &tensors) {
-  for (auto &item : items) {
-    if (item.dataType_ == tdt::TDT_END_OF_SEQUENCE) {
-      ADP_LOG(INFO) << "End of processing.";
-      return Status::OK();
-    }
-    DataType type = DT_FLOAT;
-    DataTypeFromString(item.tensorType_, &type);
-    if (type == DT_STRING) {
-      Tensor result_tensor(tensorflow::DT_STRING, TensorShape({}));
-      std::shared_ptr<std::string> data_str_ptr = std::static_pointer_cast<std::string>(item.dataPtr_);
-      result_tensor.scalar<string>()() =
-          std::move(string(reinterpret_cast<const char *>(data_str_ptr->c_str()), item.dataLen_));
-      tensors.emplace_back(std::move(result_tensor));
-    } else if (DataTypeCanUseMemcpy(type)) {
-      TensorShape tensorShape;
-      Status s = GetTensorShape(item.tensorShape_, tensorShape);
-      if (!s.ok()) { return s; }
-      Tensor result_tensor = Tensor(type, tensorShape);
-      std::shared_ptr<std::string> data_str_ptr = std::static_pointer_cast<std::string>(item.dataPtr_);
-      errno_t ret = memcpy_s(const_cast<char *>(result_tensor.tensor_data().data()), result_tensor.tensor_data().size(),
-                             data_str_ptr->c_str(), item.dataLen_);
-      if (ret != EOK) { return errors::Unknown("memcpy failed"); }
-      tensors.emplace_back(std::move(result_tensor));
-    } else {
-      return errors::InvalidArgument("Not support this type: ", type);
-    }
-  }
-  return Status::OK();
-}
-
 class OutfeedEnqueueOp : public OpKernel {
  public:
   explicit OutfeedEnqueueOp(OpKernelConstruction *ctx) : OpKernel(ctx) {
@@ -99,56 +41,96 @@ class OutfeedEnqueueOp : public OpKernel {
 class OutfeedDequeueOp : public OpKernel {
  public:
   explicit OutfeedDequeueOp(OpKernelConstruction *ctx) : OpKernel(ctx) {
-    // ctx is not nullptr
     OP_REQUIRES_OK(ctx, ctx->GetAttr("channel_name", &channel_name_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
-    OP_REQUIRES(ctx, tdt::TdtHostPreparePopData() == 0, errors::Internal("Prepare Pop Data failed"));
-    ADP_LOG(INFO) << "OutfeedDequeueOp built";
+    // Create log summary acl channel
+    ADP_LOG(INFO) << "Start create acl channel for out-feed dequeue op " << channel_name_;
+    uint32_t device_id = 0;
+    OP_REQUIRES_OK(ctx, GetEnvDeviceID(device_id));
+    const static std::string kReceivePrefix = "TF_RECEIVE_";
+    char *is_new = getenv("IS_NEW");
+    if (is_new != nullptr && strcmp("1", is_new) == 0) {
+      const size_t kDefaultCapacity = 3;
+      acl_handle_ = acltdtCreateChannelWithCapacity(device_id, (kReceivePrefix + channel_name_).c_str(),
+                                                    kDefaultCapacity);
+    } else {
+      acl_handle_ = acltdtCreateChannel(device_id, (kReceivePrefix + channel_name_).c_str());
+    }
+    OP_REQUIRES(ctx, acl_handle_ != nullptr, errors::Internal("Acl create receive channel failed."));
+    ADP_LOG(INFO) << "Succeed create acl channel for out-feed dequeue op " << channel_name_;
   }
-  ~OutfeedDequeueOp() override { ADP_LOG(INFO) << "OutfeedDequeueOp has been destructed"; }
-  void Compute(OpKernelContext *ctx) override {
-    CHECK_NOT_NULL(ctx);
-    std::vector<tdt::DataItem> bundle;
-    OP_REQUIRES(ctx, tdt::TdtHostPopData(channel_name_, bundle) == 0,
-                errors::Internal("TdtHostPopData get data failed"));
-    std::vector<Tensor> out_tensors;
-    OP_REQUIRES_OK(ctx, ConvertDataItem2Tensor(bundle, out_tensors));
-    OP_REQUIRES(ctx, !out_tensors.empty(), errors::OutOfRange("Outfeed tensors reach the end"));
-    OP_REQUIRES(
-        ctx, out_tensors.size() == output_shapes_.size(),
-        errors::Internal("Outfeed tensors num mismatch", out_tensors.size(), "vs. expect", output_shapes_.size()));
-    for (int i = 0; i < ctx->num_outputs(); ++i) { ctx->set_output(i, out_tensors[i]); }
+  ~OutfeedDequeueOp() override {
+    ADP_LOG(INFO) << "Start destroy acl channel for out-feed dequeue op " << channel_name_;
+    if (acl_handle_ != nullptr) {
+      if (acltdtDestroyChannel(acl_handle_) != ACL_ERROR_NONE) {
+        ADP_LOG(ERROR) << "Failed destroy acl channel for out-feed dequeue op " << channel_name_;
+      } else {
+        ADP_LOG(INFO) << "Succeed destroy acl channel for out-feed dequeue op " << channel_name_;
+      }
+    }
   }
-  bool IsExpensive() override { return false; }
+  void Compute(OpKernelContext *ctx) override {
+    ADP_LOG(INFO) << "Start compute out-feed dequeue op " << channel_name_;
+    CancellationManager *cm = ctx->cancellation_manager();
+    CancellationToken token = cm->get_cancellation_token();
+    bool already_cancelled = !cm->RegisterCallback(token, [this]() {
+      ADP_LOG(INFO) << "Start run cancellation callback of out-feed dequeue op " << channel_name_;
+      char *is_new = getenv("IS_NEW");
+      if (is_new != nullptr && strcmp("1", is_new) == 0) {
+        if (acltdtDestroyChannel(acl_handle_) != ACL_ERROR_NONE) {
+          ADP_LOG(ERROR) << "Failed destroy acl data channel for host queue " << channel_name_;
+        } else {
+          ADP_LOG(INFO) << "Succeed destroy acl data channel for host queue " << channel_name_;
+          acl_handle_ = nullptr;
+        }
+      } else {
+        if (acltdtStopChannel(acl_handle_) != ACL_ERROR_NONE) {
+          ADP_LOG(ERROR) << "Failed stop acl data channel for host queue " << channel_name_;
+        } else {
+          ADP_LOG(INFO) << "Succeed stop acl data channel for host queue " << channel_name_;
+        }
+      }
+    });
 
- private:
-  DataTypeVector output_types_;
-  std::vector<PartialTensorShape> output_shapes_;
-  std::string channel_name_;
-};
+    if (TF_PREDICT_FALSE(already_cancelled)) {
+      ctx->SetStatus(errors::Internal("out-feed op ", channel_name_, " called after cancelled."));
+      return;
+    }
 
-class StopOutfeedDequeueOp : public OpKernel {
- public:
-  explicit StopOutfeedDequeueOp(OpKernelConstruction *ctx) : OpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("channel_name", &channel_name_));
-    ADP_LOG(INFO) << "StopOutfeedDequeueOp built";
-  }
-  ~StopOutfeedDequeueOp() override { ADP_LOG(INFO) << "StopOutfeedDequeueOp has been destructed"; }
-  void Compute(OpKernelContext *ctx) override {
-    ADP_LOG(INFO) << "StopOutfeedDequeueOp running";
-    OP_REQUIRES(ctx, tdt::TdtHostStop(channel_name_) == 0, errors::Internal("TdtHostStop failed"));
+    std::vector<Tensor> tensors;
+    ADP_LOG(INFO) << "Start recv tensors by acl out-feed dequeue op " << channel_name_;
+    auto status = RecvTensorByAcl(acl_handle_, tensors);
+    ADP_LOG(INFO) << "Start de-register callback out-feed dequeue op " << channel_name_;
+    (void) cm->DeregisterCallback(token);
+    OP_REQUIRES_OK(ctx, status);
+    OP_REQUIRES(ctx, !tensors.empty(), errors::OutOfRange("out-feed op ", channel_name_, " received end-of-sequence"));
+    OP_REQUIRES(ctx, tensors.size() == output_shapes_.size(),
+                errors::Internal("out-feed op ", channel_name_, " received ", tensors.size(), " tensors but expect ",
+                                 output_shapes_.size(), " tensors"));
+    ADP_LOG(INFO) << "out-feed op output num:" << ctx->num_outputs();
+    for (int i = 0; i < ctx->num_outputs(); ++i) {
+      ADP_LOG(INFO) << "output tensor " << i << ", " << tensors[i].DebugString();
+      ctx->set_output(i, tensors[i]);
+    }
   }
   bool IsExpensive() override { return false; }
 
  private:
+  DataTypeVector output_types_;
+  std::vector<PartialTensorShape> output_shapes_;
   std::string channel_name_;
+  acltdtChannelHandle *acl_handle_ = nullptr;
 };
 
-REGISTER_KERNEL_BUILDER(Name("OutfeedDequeueOp").Device(DEVICE_CPU), OutfeedDequeueOp);
-
-REGISTER_KERNEL_BUILDER(Name("OutfeedEnqueueOp").Device(DEVICE_CPU), OutfeedEnqueueOp);
+REGISTER_KERNEL_BUILDER(Name("OutfeedDequeueOp")
+.
+Device(DEVICE_CPU), OutfeedDequeueOp
+);
 
-REGISTER_KERNEL_BUILDER(Name("StopOutfeedDequeueOp").Device(DEVICE_CPU), StopOutfeedDequeueOp);
+REGISTER_KERNEL_BUILDER(Name("OutfeedEnqueueOp")
+.
+Device(DEVICE_CPU), OutfeedEnqueueOp
+);
 }  // namespace
 }  // namespace tensorflow
diff --git a/tf_adapter/ops/npu_dataset_ops.cc b/tf_adapter/ops/npu_dataset_ops.cc
index 50177a8b8..082445d4b 100644
--- a/tf_adapter/ops/npu_dataset_ops.cc
+++ b/tf_adapter/ops/npu_dataset_ops.cc
@@ -65,5 +65,22 @@ REGISTER_OP("AdpGetNext")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
     .Attr("queue_name: string")
-    .SetShapeFn(shape_inference::ScalarShape);
+    .SetIsStateful()
+    .SetShapeFn(tensorflow::shape_inference::ScalarShape);
+
+REGISTER_OP("GetNext")
+    .Output("components: output_types")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .Attr("channel_name: string")
+    .SetIsStateful()
+    .SetShapeFn(tensorflow::shape_inference::ScalarShape);
+
+REGISTER_OP("DynamicGetNext")
+    .Output("components: output_types")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .Attr("channel_name: string")
+    .SetIsStateful()
+    .SetShapeFn(tensorflow::shape_inference::ScalarShape);
 }  // namespace tensorflow
diff --git a/tf_adapter/ops/npu_ops.cc b/tf_adapter/ops/npu_ops.cc
index 1da33c991..8a7100055 100644
--- a/tf_adapter/ops/npu_ops.cc
+++ b/tf_adapter/ops/npu_ops.cc
@@ -131,8 +131,6 @@ REGISTER_OP("OutfeedDequeueOp")
     .SetIsStateful()
     .SetShapeFn(OutfeedDequeueShapeFn);
 
-REGISTER_OP("StopOutfeedDequeueOp").Attr("channel_name: string").SetIsStateful().SetShapeFn(shape_inference::NoOutputs);
-
 REGISTER_OP("DropOutDoMask")
     .Input("x: T")
     .Input("mask: uint8")
diff --git a/tf_adapter/optimizers/dp_tf_ge_conversion_pass.cc b/tf_adapter/optimizers/dp_tf_ge_conversion_pass.cc
index aff5e2495..b160e02bb 100644
--- a/tf_adapter/optimizers/dp_tf_ge_conversion_pass.cc
+++ b/tf_adapter/optimizers/dp_tf_ge_conversion_pass.cc
@@ -146,6 +146,7 @@ class DpTfToGEConversionPassImpl {
   inline bool IsDeviceQueueDatasetNode() const;
   inline bool IsIteratorNode(const Node *n) const;
   inline bool IsSkipDataset(const Node *n) const;
+  inline bool IsGeSupportDataset(const Node *n) const;
   inline std::string GetEdgeName(const Edge *e) const;
   inline std::string GetRandomName(const std::string &prefix) const;
   std::string GetRandomName() const;
@@ -161,6 +162,20 @@ class DpTfToGEConversionPassImpl {
   bool RemoveIsolatedNode(Graph *g, std::unordered_set<Node *> visited);
   Status RemoveNotSupportDataset(Graph *g, const std::string &device_queue_dataset,
                                  const std::string &make_iterator) const;
+  Status AddDataTransDatasets(Node *topo_end, std::string &host_channel_name, std::string &device_channel_name,
+    std::map<std::string, std::string> &all_options);
+  void GetTopoEndsNodes(std::vector<Node *> &topo_ends);
+  Status BuildDeviceDpGraph(Node *topo_end, Graph *device_graph, const std::string device_channel_name);
+  Status AddAttr2DeviceNodes(Node *topo_end, Graph *device_graph);
+  Status AddGeopNodeFunctionDef(FunctionDefLibrary &fdeflib, const std::string &fn_geop, const std::string &fn_dpop,
+    const string &default_device);
+  Status AddGeopDatasetFunctionDef(FunctionDefLibrary &fdeflib, const std::string &fn_geop,
+    const std::string &fn_geop_dataset, const string &default_device, std::map<std::string, std::string> all_options);
+  Status BuildGeOpDatasetFunction(FunctionDefLibrary &fdeflib, Graph *device_graph,
+    const std::string &fn_geop_dataset, const string &default_device, std::map<std::string, std::string> all_options);
+  Status AddGeOpDatasetFunctionLibrary(FunctionLibraryDefinition *flib, Node *topo_end, const std::string &device_channel_name,
+    const std::string &fn_geop_dataset, std::map<std::string, std::string> &all_options);
+  Status AddGeOpDatasetAndDpGroupDataset(Node *topo_end, const std::string &fn_geop_dataset, const std::string &host_channel_name, const std::string &device_channel_name);
 
   // graph num
   int graph_run_num_;
@@ -197,6 +212,10 @@ inline bool DpTfToGEConversionPassImpl::IsSkipDataset(const Node *n) const {
   return std::find(SKIP_DATASET_LIST.begin(), SKIP_DATASET_LIST.end(), n->type_string()) != SKIP_DATASET_LIST.end();
 }
 
+inline bool DpTfToGEConversionPassImpl::IsGeSupportDataset(const Node *n) const {
+  return std::find(GE_OPS_WHITELIST.begin(), GE_OPS_WHITELIST.end(), n->type_string()) != GE_OPS_WHITELIST.end();
+}
+
 inline std::string DpTfToGEConversionPassImpl::GetEdgeName(const Edge *e) const {
   if (e == nullptr || e->src() == nullptr || e->dst() == nullptr) { return "invalid_edge"; }
   return strings::StrCat("Edge_from_", e->src()->name(), "_out", e->src_output(), "_To_", e->dst()->name(), "_in",
@@ -350,21 +369,37 @@ Status DpTfToGEConversionPassImpl::InsertChannelQueue(Node *topo_end, std::strin
                                                       std::string &device_queue_name,
                                                       std::map<std::string, std::string> &all_options) const {
   ADP_LOG(INFO) << "Start to insert HostQueueDataset and DeviceQueueDataset.";
+  REQUIRES_NOT_NULL(topo_end);
+  char *is_new = std::getenv("IS_NEW");
+  const Node *iterator_node = nullptr;
+  if (IsMakeIteratorNode(topo_end)) {
+    topo_end->input_node(1, &iterator_node);
+  }
+
+  uint32_t device_id = 0;
+  GetEnvDeviceID(device_id);
   for (const Edge *e : split_edges_.at(topo_end)) {
     REQUIRES_NOT_NULL(e);
     REQUIRES_NOT_NULL(e->src());
     REQUIRES_NOT_NULL(e->dst());
+    bool need_add_device_dataset = false;
+    if (is_new != nullptr && strcmp("1", is_new) == 0 && IsGeSupportDataset(e->dst())) { need_add_device_dataset = true; }
+
     std::string local_rank_id = all_options["local_rank_id"];
     std::string local_device_list = all_options["local_device_list"];
-    std::string queue_name;
+    std::string channel_name;
     if (local_rank_id == "-1") {
-      queue_name = strings::StrCat("Queue_", GetEdgeName(e), "_", GetRandomName());
+      REQUIRES_NOT_NULL(iterator_node);
+      if (is_new != nullptr && strcmp("1", is_new) == 0 && !need_add_device_dataset) {
+        channel_name = iterator_node->name();
+      } else {
+        channel_name = "Queue_" + GetEdgeName(e) + "_" + GetRandomName();
+      }
     } else {
-      queue_name = strings::StrCat(e->src()->name(), "_index_", std::to_string(g_channel_index));
+      channel_name = strings::StrCat(e->src()->name(), "_index_", std::to_string(g_channel_index));
       g_channel_index += 1;
     }
-    host_queue_name = strings::StrCat("Host", queue_name);
-    device_queue_name = strings::StrCat("Device", queue_name);
+    host_queue_name = "HostQueue_" + channel_name;
     ADP_LOG(INFO) << "Add_" << host_queue_name;
     // Host and Device queue should save type and shape
     auto m_src = e->src()->def().attr();
@@ -372,27 +407,31 @@ Status DpTfToGEConversionPassImpl::InsertChannelQueue(Node *topo_end, std::strin
     string::size_type idx = SummarizeAttrValue(m_src["output_types"]).find("Unknown AttrValue");
     if (idx == string::npos) { type_status = true; }
     Node *queue_node_host = nullptr;
-    // Make sure that 'queue_name' of host and device queue be same
+    // Make sure that 'channel_name' of host and device queue be same
     TF_CHECK_OK(NodeBuilder(host_queue_name, "HostQueueDataset")
                     .Input(e->src(), e->src_output())  // Will be replaced by GEOPDataset later
                     .Input(e->src(), e->src_output())
                     .Device(e->src()->def().device())
-                    .Attr("channel_name", queue_name)
+                    .Attr("channel_name", channel_name)
                     .Attr("output_types", type_status ? m_src["output_types"] : m_src["Toutput_types"])
                     .Attr("output_shapes", m_src["output_shapes"])
                     .Attr("_local_rank_id", local_rank_id)
                     .Attr("_local_device_list", local_device_list)
-                    .Finalize(&*graph_, &queue_node_host));
+                    .Finalize(graph_, &queue_node_host));
     REQUIRES_NOT_NULL(queue_node_host);
+
+    if (is_new != nullptr && strcmp("1", is_new) == 0 && !need_add_device_dataset) { return Status::OK(); }
+
+    device_queue_name = "DeviceQueue_" + channel_name;
     ADP_LOG(INFO) << "Add_" << device_queue_name;
     Node *queue_node_device = nullptr;
-    // Make sure that 'queue_name' of host and device queue be same
+    // Make sure that 'channel_name' of host and device queue be same
     TF_CHECK_OK(NodeBuilder(device_queue_name, "DeviceQueueDataset")
                     .Device(e->dst()->def().device())
-                    .Attr("channel_name", queue_name)
+                    .Attr("channel_name", channel_name)
                     .Attr("output_types", type_status ? m_src["output_types"] : m_src["Toutput_types"])
                     .Attr("output_shapes", m_src["output_shapes"])
-                    .Finalize(&*graph_, &queue_node_device));
+                    .Finalize(graph_, &queue_node_device));
     REQUIRES_NOT_NULL(queue_node_device);
     // 0 means the the 0th output of queue_node_device
     REQUIRES_NOT_NULL(graph_->AddEdge(queue_node_device, 0, e->dst(), e->dst_input()));
@@ -451,6 +490,7 @@ Status DpTfToGEConversionPassImpl::RemoveNotSupportDataset(Graph *g, const std::
 }
 
 void DpTfToGEConversionPassImpl::RemoveSplitEdges(Node *topo_end) {
+  ADP_LOG(INFO) << "Start to remove split edges";
   for (const Edge *e : split_edges_.at(topo_end)) {
     ADP_LOG(INFO) << "Remove_" << GetEdgeName(e);
     graph_->RemoveEdge(e);
@@ -488,16 +528,7 @@ bool DpTfToGEConversionPassImpl::GetNodeFuncs(const FunctionLibraryDefinition *f
   return !node_funcs.empty();
 }
 
-bool DpTfToGEConversionPassImpl::RunPass(std::unique_ptr<Graph> *g, FunctionLibraryDefinition *flib,
-                                         std::map<std::string, std::string> all_options) {
-  ADP_LOG(INFO) << ">>>> DpTfToGEConversionPassImpl::RunPass <<<<";
-  // Convert just for convenient access
-  split_edges_.clear();
-  graph_ = &**g;
-  flib_def_ = &(*g)->flib_def();
-
-  // Find split edges from subgraphs, which MakeIterator connect to Itearator op
-  std::vector<Node *> topo_ends;
+void DpTfToGEConversionPassImpl::GetTopoEndsNodes(std::vector<Node *> &topo_ends) {
   for (Node *node : graph_->op_nodes()) {
     if (IsMakeIteratorNode(node)) {
       for (Node *in_node : node->in_nodes()) {
@@ -509,279 +540,351 @@ bool DpTfToGEConversionPassImpl::RunPass(std::unique_ptr<Graph> *g, FunctionLibr
       }
     }
   }
-  // After traversal, topo_ends should store MakeIterator Nodes.
-  if (topo_ends.empty()) {
-    ADP_LOG(INFO) << "Do not find MakeIterator <- IteratorV2 connects in the graph,"
-              << " pass datapreprocess pass.";
-    return true;
-  }
-  ADP_LOG(INFO) << "Start to write graph's pbtxt before optimization.";
+}
 
-  const char *need_print = getenv("PRINT_MODEL");
-  if (need_print != nullptr && strcmp("1", need_print) == 0) {
-    GraphDef before_graphdef;
-    (*g)->ToGraphDef(&before_graphdef);
-    string pre_model_path = GetDumpPath() + "BeforeSubGraph_dp_";
-    string pmodel_path = pre_model_path + std::to_string(graph_run_num_) + ".pbtxt";
-    TF_DO_CHECK_OK(WriteTextProto(Env::Default(), pmodel_path, before_graphdef), ERROR);
-  }
+Status DpTfToGEConversionPassImpl::AddDataTransDatasets(Node *topo_end, std::string &host_channel_name,
+    std::string &device_channel_name, std::map<std::string, std::string> &all_options) {
+  const Edge *tmp_edge = nullptr;
+  Status ret = GetSplitEdges(topo_end, split_edges_[topo_end], tmp_edge);
+  if (!ret.ok()) { return ret; }
 
-  ADP_LOG(INFO) << "Start to optimize dp_init topological graph";
-  for (Node *topo_end : topo_ends) {
-    // Get all edges that should be replace with HostQueue->DeviceQueue
-    ADP_LOG(INFO) << "Start to find split edges, topo_end node is : " << topo_end->name() << ", op is "
-              << topo_end->type_string();
-    const Edge *tmp_edge = nullptr;
-    TF_DO_CHECK_OK(GetSplitEdges(topo_end, split_edges_[topo_end], tmp_edge), ERROR);
-
-    const string DEFAULT_DEVICE = topo_end->def().device();
-    // Start optimize graph
-    // Insert Host and Device queue
-    ADP_LOG(INFO) << "Start to add host and device queue on split edges";
-    std::string host_queue_name;
-    std::string device_queue_name;
-    TF_DO_CHECK_OK(InsertChannelQueue(topo_end, host_queue_name, device_queue_name, all_options), ERROR);
-    ADP_LOG(INFO) << "host queue name is " << host_queue_name;
-    ADP_LOG(INFO) << "device queue name is " << device_queue_name;
-    // Remove all split edges
-    ADP_LOG(INFO) << "Start to remove split edges";
-    RemoveSplitEdges(topo_end);
-
-    // Make a copy of graph for pruned GE
-    ADP_LOG(INFO) << "Start to prune GE graph";
-    std::unique_ptr<Graph> graph_ge(new (std::nothrow) Graph(OpRegistry::Global()));
-    if (graph_ge == nullptr) {
-      ADP_LOG(ERROR) << "new graph ge failed";
-      LOG(ERROR) << "new graph ge failed";
-      return false;
-    }
-    CopyGraph(*graph_, &*graph_ge);
-    // Prune visiable GE graph
-    std::unordered_set<const Node *> visiable_ge;
-    for (const Node *n : graph_ge->op_nodes()) {
-      if (IsMakeIteratorNode(n) && n->name() == topo_end->name()) {
-        visiable_ge.emplace(n);
-        break;
-      }
-    }
-    TF_DO_CHECK_OK(RemoveNotSupportDataset(&*graph_ge, device_queue_name, topo_end->name()), ERROR);
+  // Start optimize graph
+  // Insert Host and Device queue
+  ADP_LOG(INFO) << "Start to add host and device queue on split edges";
+  ret = InsertChannelQueue(topo_end, host_channel_name, device_channel_name, all_options);
+  if (!ret.ok()) { return ret; }
+  ADP_LOG(INFO) << "host queue name is " << host_channel_name << ", device queue name is " << device_channel_name;
 
-    ADP_LOG(INFO) << "Start to to PruneForReverseReachability.";
-    PruneForReverseReachability(&*graph_ge, visiable_ge);
-    // add function_def begin
-    ADP_LOG(INFO) << "Start to add function_def for GEOP's func";
-    FunctionDefLibrary fdeflib;
-    for (auto node : graph_ge->nodes()) {
-      std::vector<string> node_funcs;
-      if (GetNodeFuncs(flib, node, node_funcs)) {
-        ADP_LOG(INFO) << "Node [" << node->name() << "] has func:";
-        for (const auto &func : node_funcs) {
-          FunctionDef *fdef = fdeflib.add_function();
-          if (flib->Find(func) == nullptr) {
-            ADP_LOG(ERROR) << "function def is nullptr";
-            LOG(ERROR) << "function def is nullptr";
-            return false;
-          }
-          *fdef = *(flib->Find(func));
-        }
-      }
-    }
+  RemoveSplitEdges(topo_end);
+  return ret;
+}
 
-    // Add required function for GEOPDataset->func(GEOP->func2) topo graph
-    std::string fn_dpop = GetRandomName("dpop_function");
-    std::string fn_geop = GetRandomName("geop_function");
-    std::string fn_geop_dataset = GetRandomName("geopdataset_function");
-    std::string iterator_name = "";
-    for (auto in_node : topo_end->in_nodes()) {
-      if (in_node == nullptr) {
-        ADP_LOG(ERROR) << "topo end node is nullptr";
-        LOG(ERROR) << "topo end node is nullptr";
-        return false;
-      }
-      ADP_LOG(INFO) << "in_node name is " << in_node->name();
-      if (IsIteratorNode(in_node)) {
-        iterator_name = in_node->name();
-        ADP_LOG(INFO) << "iterator name is " << iterator_name;
-        break;
-      }
+Status DpTfToGEConversionPassImpl::BuildDeviceDpGraph(Node *topo_end, Graph *device_graph,
+                                                      const std::string device_channel_name) {
+  // Make a copy of graph for pruned GE
+  ADP_LOG(INFO) << "Start to prune GE graph";
+  CopyGraph(*graph_, device_graph);
+  // Prune visiable GE graph
+  std::unordered_set<const Node *> visiable_ge;
+  for (const Node *n : device_graph->op_nodes()) {
+    if (IsMakeIteratorNode(n) && n->name() == topo_end->name()) {
+      visiable_ge.emplace(n);
+      break;
     }
-    if (iterator_name.empty()) {
-      ADP_LOG(ERROR) << "There is no connection between MakeIteraotr and IteratorV2";
-      LOG(ERROR) << "There is no connection between MakeIteraotr and IteratorV2";
-      return false;
+  }
+  Status ret = RemoveNotSupportDataset(device_graph, device_channel_name, topo_end->name());
+  if (!ret.ok()) { return ret; }
+
+  ADP_LOG(INFO) << "Start to to PruneForReverseReachability.";
+  PruneForReverseReachability(device_graph, visiable_ge);
+  return ret;
+}
+
+Status DpTfToGEConversionPassImpl::AddAttr2DeviceNodes(Node *topo_end, Graph *device_graph) {
+  std::string iterator_name;
+  for (auto in_node : topo_end->in_nodes()) {
+    REQUIRES_NOT_NULL(in_node);
+    ADP_LOG(INFO) << "in_node name is " << in_node->name();
+    if (IsIteratorNode(in_node)) {
+      iterator_name = in_node->name();
+      ADP_LOG(INFO) << "iterator name is " << iterator_name;
+      break;
     }
-    // Add dp custom kernel label
-    for (auto node : graph_ge->nodes()) {
-      if (node->type_string() == "DeviceQueueDataset") { node->AddAttr(DP_ITERATOR_MARK, iterator_name); }
-      if (std::find(CUSTOMIZE_DATASET_LIST.begin(), CUSTOMIZE_DATASET_LIST.end(), node->type_string())
-          != CUSTOMIZE_DATASET_LIST.end()) {
-        ADP_LOG(INFO) << node->name() << " is " << node->type_string() << ", need to add label.";
-        node->AddAttr("_kernel", "dp");
-        node->AddAttr(DP_ITERATOR_MARK, iterator_name);
-      }
+  }
+  if (iterator_name.empty()) {
+    ADP_LOG(ERROR) << "There is no connection between MakeIteraotr and IteratorV2";
+    return errors::Internal("There is no connection between MakeIteraotr and IteratorV2");
+  }
+  // Add dp custom kernel label
+  for (auto node : device_graph->nodes()) {
+    REQUIRES_NOT_NULL(node);
+    if (node->type_string() == "DeviceQueueDataset") { node->AddAttr(DP_ITERATOR_MARK, iterator_name); }
+    if (std::find(CUSTOMIZE_DATASET_LIST.begin(), CUSTOMIZE_DATASET_LIST.end(), node->type_string())
+        != CUSTOMIZE_DATASET_LIST.end()) {
+      ADP_LOG(INFO) << node->name() << " is " << node->type_string() << ", need to add label.";
+      node->AddAttr("_kernel", "dp");
+      node->AddAttr(DP_ITERATOR_MARK, iterator_name);
     }
-    // Convert GE graph to GEOP function body
-    {
-      ADP_LOG(INFO) << "Start to convert GE graph to geop function";
-      FunctionDef *fd = fdeflib.add_function();
-      TF_DO_CHECK_OK(GraphToFunctionDef(*graph_ge, fn_dpop, fd), ERROR);
+  }
+  return Status::OK();
+}
+
+Status DpTfToGEConversionPassImpl::AddGeopNodeFunctionDef(FunctionDefLibrary &fdeflib,
+                                                          const std::string &fn_geop,
+                                                          const std::string &fn_dpop,
+                                                          const string &default_device) {
+  // Add DPOP node(visable only by function of geop)
+  string func_def_str;
+  fdeflib.SerializeToString(&func_def_str);
+
+  // DPOP node should created by function of geop
+  ADP_LOG(INFO) << "Start to convert dpop node to geop function";
+  FunctionDef *fd = fdeflib.add_function();
+  REQUIRES_NOT_NULL(fd);
+  REQUIRES_NOT_NULL(fd->mutable_signature());
+  fd->mutable_signature()->set_name(fn_geop);
+  NodeDef *n = fd->add_node_def();
+  REQUIRES_NOT_NULL(n);
+  NameAttrList f_attr;
+  f_attr.set_name(fn_dpop);
+  *f_attr.mutable_attr() = n->attr();
+  TF_CHECK_OK(NodeDefBuilder(fn_dpop, "DPOP")
+                  .Input(EMPTY_DEF_INPUT)  // No partition dp_init graph on GE
+                  .Device(default_device)
+                  .Attr("function", f_attr)  // dpop funcion
+                  .Attr("func_def", func_def_str)
+                  .Attr("Tin", EMPTY_TYPE)
+                  .Attr("Tout", EMPTY_TYPE)
+                  .Attr("Tout", EMPTY_TYPE)
+                  .Finalize(n));  // n is created by function of geop function
+  return Status::OK();
+}
+
+Status DpTfToGEConversionPassImpl::AddGeopDatasetFunctionDef(FunctionDefLibrary &fdeflib,
+                                                             const std::string &fn_geop,
+                                                             const std::string &fn_geop_dataset,
+                                                             const string &default_device,
+                                                             std::map<std::string, std::string> all_options) {
+  // GEOP node should created by function of geopDataset
+  ADP_LOG(INFO) << "Start to convert geop node to geopdataset function";
+  FunctionDef *fd = fdeflib.add_function();
+  REQUIRES_NOT_NULL(fd);
+  REQUIRES_NOT_NULL(fd->mutable_signature());
+  fd->mutable_signature()->set_name(fn_geop_dataset);
+
+  NodeDef *n = fd->add_node_def();
+  REQUIRES_NOT_NULL(n);
+  NameAttrList f_attr;
+  f_attr.set_name(fn_geop);
+  *f_attr.mutable_attr() = n->attr();
+  TF_CHECK_OK(NodeDefBuilder(GetRandomName("GeOp"), "GeOp")
+                  .Input(EMPTY_DEF_INPUT)  // No partition dp_init graph on GE
+                  .Device(default_device)
+                  .Attr("function", f_attr)  // geop funcion
+                  .Attr("Tin", EMPTY_TYPE)
+                  .Attr("Tout", EMPTY_TYPE)
+                  .Attr("Tout", EMPTY_TYPE)
+                  .Attr("_enableDP", true)
+                  .Finalize(n));  // n is created by function of geopDataset function
+  std::string attr_name;
+  for (auto option : all_options) {
+    attr_name = std::string("_") + option.first;
+    AddNodeAttr(attr_name, option.second, n);
+  }
+  AddNodeAttr("_NpuOptimizer", "NpuOptimizer", n);
+  return Status::OK();
+}
+
+Status DpTfToGEConversionPassImpl::BuildGeOpDatasetFunction(FunctionDefLibrary &fdeflib,
+                                                            Graph *device_graph,
+                                                            const std::string &fn_geop_dataset,
+                                                            const string &default_device,
+                                                            std::map<std::string, std::string> all_options) {
+  // Convert GE graph to GEOP function body
+  Status ret = Status::OK();
+  std::string fn_dpop = GetRandomName("dpop_function");
+  {
+    ADP_LOG(INFO) << "Start to convert GE graph to geop function";
+    FunctionDef *fd = fdeflib.add_function();
+    ret = GraphToFunctionDef(*device_graph, fn_dpop, fd);
+    if (!ret.ok()) {
+      ADP_LOG(ERROR) << "GraphToFunctionDef failed:" << ret.error_message();
+      return ret;
     }
-    // Add DPOP node(visable only by function of geop)
-    {
-      string func_def_str;
-      fdeflib.SerializeToString(&func_def_str);
+  }
+  std::string fn_geop = GetRandomName("geop_function");
+  ret = AddGeopNodeFunctionDef(fdeflib, fn_geop, fn_dpop, default_device);
+  if (!ret.ok()) { return ret; }
+  ret = AddGeopDatasetFunctionDef(fdeflib, fn_geop, fn_geop_dataset, default_device, all_options);
+  if (!ret.ok()) { return ret; }
+  return ret;
+}
 
-      // DPOP node should created by function of geop
-      ADP_LOG(INFO) << "Start to convert dpop node to geop function";
-      FunctionDef *fd = fdeflib.add_function();
-      if (fd == nullptr || fd->mutable_signature() == nullptr) {
-        ADP_LOG(ERROR) << "fd is nullptr";
-        LOG(ERROR) << "fd is nullptr";
-        return false;
-      }
-      fd->mutable_signature()->set_name(fn_geop);
-      NodeDef *n = fd->add_node_def();
-      if (n == nullptr) {
-        ADP_LOG(ERROR) << "fd node def is nullptr";
-        LOG(ERROR) << "fd node def is nullptr";
-        return false;
-      }
-      NameAttrList f_attr;
-      f_attr.set_name(fn_dpop);
-      *f_attr.mutable_attr() = n->attr();
-      TF_CHECK_OK(NodeDefBuilder(fn_dpop, "DPOP")
-                      .Input(EMPTY_DEF_INPUT)  // No partition dp_init graph on GE
-                      .Device(DEFAULT_DEVICE)
-                      .Attr("function", f_attr)  // dpop funcion
-                      .Attr("func_def", func_def_str)
-                      .Attr("Tin", EMPTY_TYPE)
-                      .Attr("Tout", EMPTY_TYPE)
-                      .Attr("Tout", EMPTY_TYPE)
-                      .Finalize(n));  // n is created by function of geop function
-    }
-    {
+Status DpTfToGEConversionPassImpl::AddGeOpDatasetFunctionLibrary(FunctionLibraryDefinition *flib,
+                                                                 Node *topo_end,
+                                                                 const std::string &device_channel_name,
+                                                                 const std::string &fn_geop_dataset,
+                                                                 std::map<std::string, std::string> &all_options) {
+    FunctionDefLibrary fdeflib;
+    char *is_new = std::getenv("IS_NEW");
+    if (is_new != nullptr && strcmp("1", is_new) == 0 && device_channel_name.empty()) {
       // GEOP node should created by function of geopDataset
-      ADP_LOG(INFO) << "Start to convert geop node to geopdataset function";
+      ADP_LOG(INFO) << "No Dataset node can be computed in device, GeOpDataset func is null.";
       FunctionDef *fd = fdeflib.add_function();
-      if (fd == nullptr || fd->mutable_signature() == nullptr) {
-        ADP_LOG(ERROR) << "fd is nullptr";
-        LOG(ERROR) << "fd is nullptr";
-        return false;
-      }
+      REQUIRES_NOT_NULL(fd);
+      REQUIRES_NOT_NULL(fd->mutable_signature());
       fd->mutable_signature()->set_name(fn_geop_dataset);
-      NodeDef *n = fd->add_node_def();
-      if (n == nullptr) {
-        ADP_LOG(ERROR) << "fd node def is nullptr";
-        LOG(ERROR) << "fd node def is nullptr";
-        return false;
-      }
-      NameAttrList f_attr;
-      f_attr.set_name(fn_geop);
-      *f_attr.mutable_attr() = n->attr();
-      TF_CHECK_OK(NodeDefBuilder(GetRandomName("GeOp"), "GeOp")
-                      .Input(EMPTY_DEF_INPUT)  // No partition dp_init graph on GE
-                      .Device(DEFAULT_DEVICE)
-                      .Attr("function", f_attr)  // geop funcion
-                      .Attr("Tin", EMPTY_TYPE)
-                      .Attr("Tout", EMPTY_TYPE)
-                      .Attr("Tout", EMPTY_TYPE)
-                      .Attr("_enableDP", true)
-                      .Finalize(n));  // n is created by function of geopDataset function
-      std::string attr_name = "";
-      for (auto option : all_options) {
-        attr_name = std::string("_") + option.first;
-        AddNodeAttr(attr_name, option.second, n);
+    } else {
+      // Make a copy of graph for pruned GE
+      ADP_LOG(INFO) << "Start to prune GE graph";
+      std::unique_ptr<Graph> device_graph(new (std::nothrow) Graph(OpRegistry::Global()));
+      REQUIRES_NOT_NULL(device_graph);
+      Status ret = BuildDeviceDpGraph(topo_end, device_graph.get(), device_channel_name);
+      if (!ret.ok()) { return ret; }
+
+      // add function_def begin
+      ADP_LOG(INFO) << "Start to add function_def for GEOP's func";
+      for (auto node : device_graph->nodes()) {
+        std::vector<string> node_funcs;
+        if (GetNodeFuncs(flib, node, node_funcs)) {
+          REQUIRES_NOT_NULL(flib);
+          ADP_LOG(INFO) << "Node [" << node->name() << "] has func:";
+          for (const auto &func : node_funcs) {
+            FunctionDef *fdef = fdeflib.add_function();
+            REQUIRES_NOT_NULL(flib->Find(func));
+            *fdef = *(flib->Find(func));
+          }
+        }
       }
-      AddNodeAttr("_NpuOptimizer", "NpuOptimizer", n);
+      ret = AddAttr2DeviceNodes(topo_end, device_graph.get());
+      if (!ret.ok()) { return ret; }
+
+      const string kDefaultDevice = topo_end->def().device();
+      ret = BuildGeOpDatasetFunction(fdeflib, device_graph.get(), fn_geop_dataset, kDefaultDevice, all_options);
+      if (!ret.ok()) { return ret; }
     }
+
     // Update graph function libray
     ADP_LOG(INFO) << "Start to add geop and geopdataset function in graph library";
     // Not a must, just for Tensorbord viewing convenience
     graph_->AddFunctionLibrary(fdeflib);
     flib->AddLibrary(fdeflib);
 
-    // Add GEOPDataset node to graph_
-    std::vector<const Edge *> topo_end_input_edges(topo_end->in_edges().begin(), topo_end->in_edges().end());
-    Node *dpgroup_dataset_node = nullptr;
-    Node *geop_dataset_node = nullptr;
-    std::unordered_set<Node *> isolated_nodes;
-    {
-      ADP_LOG(INFO) << "Start to add geopdataset node in graph";
-      const Node *n = nullptr;
-      for (const Edge *e : topo_end_input_edges) {
-        if (IsIteratorNode(e->src())) { n = e->src(); }
-      }
-      if (n == nullptr) {
-        ADP_LOG(ERROR) << "edge src is nullptr";
-        LOG(ERROR) << "edge src is nullptr";
-        return false;
-      }
-      auto m_src = n->def().attr();
-
-      NameAttrList f_attr;
-      f_attr.set_name(fn_geop_dataset);
-      // Combine all host queue dataset with GEOPDataset
-      std::vector<NodeBuilder::NodeOut> inputs;
-      for (Node *n : graph_->op_nodes()) {
-        // host tf makeiterator add dp label
-        if (IsMakeIteratorNode(n)) { n->AddAttr("_kernel", "dp"); }
-        if (n->type_string() == "HostQueueDataset" && n->name() == host_queue_name) {
-          // 0: Host queue always generate one dataset
-          ADP_LOG(INFO) << "inputs add node : name is " << n->name() << ", op is " << n->type_string();
-          inputs.push_back(NodeBuilder::NodeOut(n, 0));
-        }
-        if (n->type_string().compare("DeviceQueueDataset") == 0 && n->name() == device_queue_name) {
-          isolated_nodes.insert(n);
-        }
-      }
+    return Status::OK();
+}
 
-      TF_CHECK_OK(NodeBuilder(GetRandomName("DPGroupDataset"), "DPGroupDataset")
-                      .Input(inputs)  // All host queue flow into geopDataset for driver
-                      .Device(DEFAULT_DEVICE)
-                      .Attr("output_types", m_src["output_types"])
-                      .Attr("output_shapes", m_src["output_shapes"])
-                      .Finalize(&*graph_,
-                                &dpgroup_dataset_node));  // Finalize geopDataset in graph_
-      TF_CHECK_OK(NodeBuilder(GetRandomName("GeopDataset"), "GEOPDataset")
-                      .Device(DEFAULT_DEVICE)
-                      .Attr("f", f_attr)  // geopDataset function
-                      .Finalize(&*graph_,
-                                &geop_dataset_node));  // Finalize geopDataset in graph_
-
-      for (Node *n : graph_->op_nodes()) {
-        if (n->type_string() == "HostQueueDataset" && n->name() == host_queue_name) {
-          graph_->RemoveEdge(*(n->in_edges().begin()));
-          graph_->AddEdge(geop_dataset_node, 0, n, 0);
-        }
-      }
+Status DpTfToGEConversionPassImpl::AddGeOpDatasetAndDpGroupDataset(Node *topo_end,
+                                                                   const std::string &fn_geop_dataset,
+                                                                   const std::string &host_channel_name,
+                                                                   const std::string &device_channel_name) {
+  // Add GEOPDataset node to graph_
+  std::vector<const Edge *> topo_end_input_edges(topo_end->in_edges().begin(), topo_end->in_edges().end());
+
+  ADP_LOG(INFO) << "Start to add geopdataset node in graph";
+  const Node *iterator_node = nullptr;
+  for (const Edge *e : topo_end_input_edges) {
+    REQUIRES_NOT_NULL(e);
+    if (IsIteratorNode(e->src())) { iterator_node = e->src(); }
+  }
+
+  // Combine all host queue dataset with GEOPDataset
+  std::vector<NodeBuilder::NodeOut> inputs;
+  std::unordered_set<Node *> isolated_nodes;
+  for (Node *n : graph_->op_nodes()) {
+    REQUIRES_NOT_NULL(n);
+    // host tf makeiterator add dp label
+    if (IsMakeIteratorNode(n)) { n->AddAttr("_kernel", "dp"); }
+    if (n->type_string() == "HostQueueDataset" && n->name() == host_channel_name) {
+      // 0: Host queue always generate one dataset
+      ADP_LOG(INFO) << "inputs add node : name is " << n->name() << ", op is " << n->type_string();
+      inputs.push_back(NodeBuilder::NodeOut(n, 0));
+    }
+    if (n->type_string() == "DeviceQueueDataset" && n->name() == device_channel_name) {
+      isolated_nodes.insert(n);
     }
-    // Remove all edges flow to MakeIterator except the one from IteratorV2
-    ADP_LOG(INFO) << "Start to combine geopdataset with iterator node and remove "
-                 "orignal edges";
+  }
+
+  Node *dpgroup_dataset_node = nullptr;
+  REQUIRES_NOT_NULL(iterator_node);
+  auto m_src = iterator_node->def().attr();
+  TF_CHECK_OK(NodeBuilder(GetRandomName("DPGroupDataset"), "DPGroupDataset")
+                  .Input(inputs)  // All host queue flow into geopDataset for driver
+                  .Device(iterator_node->def().device())
+                  .Attr("output_types", m_src["output_types"])
+                  .Attr("output_shapes", m_src["output_shapes"])
+                  .Finalize(graph_,
+                            &dpgroup_dataset_node));  // Finalize geopDataset in graph_
+
+  NameAttrList f_attr;
+  f_attr.set_name(fn_geop_dataset);
+  Node *geop_dataset_node = nullptr;
+  TF_CHECK_OK(NodeBuilder(GetRandomName("GeopDataset"), "GEOPDataset")
+                  .Device(iterator_node->def().device())
+                  .Attr("f", f_attr)  // geopDataset function
+                  .Finalize(graph_,
+                            &geop_dataset_node));  // Finalize geopDataset in graph_
 
-    // We must copy all topoend input edges as we can't modify it when combine
-    // geopdataset an topoend
+  for (Node *n : graph_->op_nodes()) {
+    if (n->type_string() == "HostQueueDataset" && n->name() == host_channel_name) {
+      graph_->RemoveEdge(*(n->in_edges().begin()));
+      graph_->AddEdge(geop_dataset_node, 0, n, 0);
+    }
+  }
+  // Remove all edges flow to MakeIterator except the one from IteratorV2
+  ADP_LOG(INFO) << "Start to combine geopdataset with iterator node and remove "
+               "orignal edges";
+
+  // We must copy all topoend input edges as we can't modify it when combine
+  // geopdataset an topoend
+  char *is_new = std::getenv("IS_NEW");
+  if (is_new != nullptr && strcmp("1", is_new) == 0 && device_channel_name.empty()) {
+    CHECK_NOTNULL(graph_->AddEdge(dpgroup_dataset_node, 0, topo_end, 0));
+  } else {
     for (const Edge *e : topo_end_input_edges) {
+      ADP_LOG(INFO) << "node:" << topo_end->name() << ", input node is:" << e->src()->name();
       if (!IsIteratorNode(e->src())) {
         CHECK_NOTNULL(graph_->AddEdge(dpgroup_dataset_node, 0, e->dst(), e->dst_input()));
         ADP_LOG(INFO) << "Remove_" << GetEdgeName(e);
         graph_->RemoveEdge(e);
       }
     }
-    // Prune for the final optimized graph
-    ADP_LOG(INFO) << "Start to prune final optimized graph";
-
-    RemoveIsolatedNode(&*graph_, isolated_nodes);
-    ADP_LOG(INFO) << "Start to assign unassigned node on default device";
-    // We do pass after assign, so we must assign all new added nodes
-    for (Node *n : (*g)->op_nodes()) {
-      if (n->assigned_device_name().empty()) {
-        // Use device of MakeIterator node as default
-        n->set_assigned_device_name(DEFAULT_DEVICE);
-        ADP_LOG(INFO) << "Assigned node [" << n->name() << "] on device [" << n->assigned_device_name() << "]";
-      }
+  }
+
+  // Prune for the final optimized graph
+  ADP_LOG(INFO) << "Start to prune final optimized graph";
+
+  RemoveIsolatedNode(graph_, isolated_nodes);
+  ADP_LOG(INFO) << "Start to assign unassigned node on default device";
+  // We do pass after assign, so we must assign all new added nodes
+  for (Node *n : graph_->op_nodes()) {
+    if (n->assigned_device_name().empty()) {
+      // Use device of MakeIterator node as default
+      n->set_assigned_device_name(iterator_node->def().device());
+      ADP_LOG(INFO) << "Assigned node [" << n->name() << "] on device [" << n->assigned_device_name() << "]";
     }
   }
+  return Status::OK();
+}
+
+bool DpTfToGEConversionPassImpl::RunPass(std::unique_ptr<Graph> *g, FunctionLibraryDefinition *flib,
+                                         std::map<std::string, std::string> all_options) {
+  ADP_LOG(INFO) << ">>>> DpTfToGEConversionPassImpl::RunPass <<<<";
+  // Convert just for convenient access
+  split_edges_.clear();
+  graph_ = &**g;
+  flib_def_ = &(*g)->flib_def();
+
+  // Find split edges from subgraphs, which MakeIterator connect to Itearator op
+  std::vector<Node *> topo_ends;
+  GetTopoEndsNodes(topo_ends);
+  // After traversal, topo_ends should store MakeIterator Nodes.
+  if (topo_ends.empty()) {
+    ADP_LOG(INFO) << "Do not find MakeIterator <- IteratorV2 connects in the graph,"
+              << " pass datapreprocess pass.";
+    return true;
+  }
+
+  const char *need_print = getenv("PRINT_MODEL");
+  if (nullptr != need_print && strcmp("1", need_print) == 0) {
+    GraphDef before_graphdef;
+    (*g)->ToGraphDef(&before_graphdef);
+    string pre_model_path = GetDumpPath() + "BeforeSubGraph_dp_";
+    string pmodel_path = pre_model_path + std::to_string(graph_run_num_) + ".pbtxt";
+    TF_DO_CHECK_OK(WriteTextProto(Env::Default(), pmodel_path, before_graphdef), ERROR);
+  }
+
+  ADP_LOG(INFO) << "Start to optimize dp_init topological graph";
+  for (Node *topo_end : topo_ends) {
+    // Get all edges that should be replace with HostQueue->DeviceQueue
+    ADP_LOG(INFO) << "Start to find split edges, topo_end node is : " << topo_end->name() << ", op is "
+              << topo_end->type_string();
+    std::string host_channel_name;
+    std::string device_channel_name;
+    TF_DO_CHECK_OK(AddDataTransDatasets(topo_end, host_channel_name, device_channel_name, all_options), ERROR);
+    std::string fn_geop_dataset = GetRandomName("geopdataset_function");
+    TF_DO_CHECK_OK(AddGeOpDatasetFunctionLibrary(flib, topo_end, device_channel_name, fn_geop_dataset, all_options), ERROR);
+    TF_DO_CHECK_OK(AddGeOpDatasetAndDpGroupDataset(topo_end, fn_geop_dataset, host_channel_name, device_channel_name), ERROR);
+  }
 
   ADP_LOG(INFO) << "End optimize dp_init topological graph";
   if (need_print != nullptr && strcmp("1", need_print) == 0) {
@@ -867,18 +970,18 @@ Status DpTfToGEConversionPassImpl::ProcessGraph(std::unique_ptr<Graph> *graph, F
 
   if (graph == nullptr) { return Status::OK(); }
 
-  std::string queue_name;
+  std::string channel_name;
   for (Node *n : graph->get()->nodes()) {
     REQUIRES_NOT_NULL(n);
     if (n->type_string() == "Iterator" || n->type_string() == "IteratorV2") {
-      queue_name = n->name();
+      channel_name = n->name();
     }
     if (n->attrs().Find("_NoNeedOptimize")) {
       ADP_LOG(INFO) << "Found mark of noneed optimize on node [" << n->name() << "], skip DpTfToGEConversionPass.";
       return Status::OK();
     }
   }
-  NpuAttrs::SetUseAdpStatus(queue_name, false);
+  NpuAttrs::SetUseAdpStatus(channel_name, false);
 
   std::map<std::string, std::string> all_options;
   std::map<std::string, std::string> pass_options;
@@ -889,10 +992,10 @@ Status DpTfToGEConversionPassImpl::ProcessGraph(std::unique_ptr<Graph> *graph, F
     if (n->type_string() == "DvppDataset") {
       uint32_t device_id = 0;
       (void)GetEnvDeviceID(device_id);
-      n->AddAttr("queue_name", "device" + std::to_string(device_id) + "_" + queue_name);
-      NpuAttrs::SetUseAdpStatus(queue_name, true);
-      ADP_LOG(INFO) << "The graph include DvppDataset, set queue_name:"
-                    << queue_name << ", skip DpTfToGEConversionPass.";
+      n->AddAttr("channel_name", "device" + std::to_string(device_id) + "_" + channel_name);
+      NpuAttrs::SetUseAdpStatus(channel_name, true);
+      ADP_LOG(INFO) << "The graph include DvppDataset, set channel_name:"
+                    << channel_name << ", skip DpTfToGEConversionPass.";
       return Status::OK();
     }
     if (n->attrs().Find("_NpuOptimizer")) {
diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc
index ed3c57bd7..19cf6eb49 100644
--- a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc
+++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc
@@ -520,7 +520,6 @@ Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates,
   bool hasIteratorOp = false;
   bool hasMakeIteratorOp = false;
   bool hasOutfeedDequeueOp = false;
-  bool hasStopOutfeedDequeueOp = false;
   for (Node *node : graph.op_nodes()) {
     sortedNodes.push_back(node);
     if (node->type_string().find("MakeIterator") != string::npos) {
@@ -530,12 +529,10 @@ Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates,
       hasIteratorOp = true;
     } else if (node->type_string() == "OutfeedDequeueOp") {
       hasOutfeedDequeueOp = true;
-    } else if (node->type_string() == "StopOutfeedDequeueOp") {
-      hasStopOutfeedDequeueOp = true;
     }
   }
 
-  if (hasStopOutfeedDequeueOp || hasOutfeedDequeueOp) {
+  if (hasOutfeedDequeueOp) {
     candidates->clear();
     ADP_LOG(INFO) << "hostcall subgraph will run on host.";
     return Status::OK();
@@ -925,7 +922,6 @@ Status MarkForPartition(std::unique_ptr<Graph> *graphIn, int &clusterNum, bool m
           || !NodeIsCandidateForClustering(dst, &npuSupportCandidates)) {
         continue;
       }
-
       if (is_set_lazy_recompile && src->type_string() == "IteratorGetNext" && enable_dp) {
         graph_options["is_dynamic_getnext"] = "1";
         continue;
diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_hook.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_hook.py
index 5880cb37a..59a04b162 100644
--- a/tf_adapter/python/npu_bridge/estimator/npu/npu_hook.py
+++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_hook.py
@@ -172,7 +172,6 @@ class _SIGNAL(object):
 
 class _OpQueueContext(object):
     """Manages work queue and thread for a infeed/outfeed thread."""
-
     def __init__(self, name, target, args):
         self._name = name
         self._queue = Queue.Queue()
diff --git a/tf_adapter/tests/CMakeLists.txt b/tf_adapter/tests/CMakeLists.txt
index 4115712e7..b8bb2ff2d 100644
--- a/tf_adapter/tests/CMakeLists.txt
+++ b/tf_adapter/tests/CMakeLists.txt
@@ -33,6 +33,7 @@ add_subdirectory(depends/alog)
 add_subdirectory(depends/datatransfer)
 add_subdirectory(depends/ge_runner)
 add_subdirectory(depends/aoe)
+add_subdirectory(depends/ascendcl)
 
 if (ENABLE_TFADAPTER_UT)
     add_subdirectory(ut)
diff --git a/tf_adapter/tests/depends/ascendcl/CMakeLists.txt b/tf_adapter/tests/depends/ascendcl/CMakeLists.txt
new file mode 100644
index 000000000..5143d4bae
--- /dev/null
+++ b/tf_adapter/tests/depends/ascendcl/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+project(ascendcl_stub)
+
+file(GLOB_RECURSE SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+    "src/*.cc"
+)
+
+include_directories(${TFADAPTER_DIR}/inc/
+                    ${CMAKE_CURRENT_SOURCE_DIR}/src
+)
+
+add_library(ascendcl_stub SHARED ${SRC_FILES})
+target_link_libraries(ascendcl_stub PRIVATE
+    $<BUILD_INTERFACE:intf_pub>
+)
\ No newline at end of file
diff --git a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc
new file mode 100644
index 000000000..4b99a32d8
--- /dev/null
+++ b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc
@@ -0,0 +1,268 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "acl/acl_tdt.h"
+#include "ascendcl_stub.h"
+#include "acl/acl_rt.h"
+#include <map>
+#include <mutex>
+
+namespace {
+    std::mutex aclChannleMutex;
+    std::map<std::string, acltdtChannelHandle *> aclChannleMap;
+    std::map<std::string, aclDataType> aclDataTypeStrMap =
+    {
+        {"bool",     ACL_BOOL},
+        {"int8",     ACL_INT8},
+        {"uint8",    ACL_UINT8},
+        {"half",     ACL_FLOAT16},
+        {"int16",    ACL_INT16},
+        {"uint16",   ACL_UINT16},
+        {"float",    ACL_FLOAT},
+        {"int32",    ACL_INT32},
+        {"uint32",   ACL_UINT32},
+        {"int64",    ACL_INT64},
+        {"uint64",   ACL_UINT64},
+        {"double",   ACL_DOUBLE},
+        {"string",   ACL_STRING}
+    };
+}
+
+namespace acl {
+    void GetTensorDimsString(const int64_t *dims, size_t dimNum, std::string &dimsStr)
+    {
+        for (size_t i = 0; i < dimNum; ++i) {
+            dimsStr += std::to_string(dims[i]);
+            if (i + 1 == dimNum) {
+                break;
+            }
+            dimsStr.push_back(',');
+        }
+        dimsStr += "]";
+    }
+}
+
+aclError acltdtDestroyChannel(acltdtChannelHandle *handle) {
+    if (handle == nullptr) {
+        return ACL_ERROR_INVALID_PARAM;
+    }
+    return ACL_SUCCESS;
+}
+
+acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name) {
+    acltdtChannelHandle *handle = new(std::nothrow) acltdtChannelHandle(deviceId, name);
+    {
+        std::unique_lock<std::mutex> lk(aclChannleMutex);
+        aclChannleMap[name] = handle;
+    }
+    return handle;
+}
+
+aclError aclrtSetDevice(int32_t deviceId){
+    return ACL_SUCCESS;
+}
+
+acltdtChannelHandle *acltdtCreateChannelWithCapacity(uint32_t deviceId,
+                                                     const char *name,
+                                                     size_t capacity) {
+    acltdtChannelHandle *handle = new(std::nothrow) acltdtChannelHandle(deviceId, name);
+    {
+        std::unique_lock<std::mutex> lk(aclChannleMutex);
+        aclChannleMap[name] = handle;
+    }
+    return handle;
+}
+
+acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index) {
+    if ((dataset == nullptr) || (index >= dataset->blobs.size())) {
+        return nullptr;
+    }
+
+    return dataset->blobs[index];
+}
+
+aclError acltdtDestroyDataItem(acltdtDataItem *dataItem) {
+    if (dataItem == nullptr) {
+        return ACL_ERROR_INVALID_PARAM;
+    }
+    delete dataItem;
+    return ACL_SUCCESS;
+}
+
+size_t acltdtGetDatasetSize(const acltdtDataset *dataset) {
+    if (dataset == nullptr) {
+        return 0;
+    }
+    return dataset->blobs.size();
+}
+
+aclError acltdtDestroyDataset(acltdtDataset *dataset) {
+    if (dataset == nullptr) {
+        return ACL_ERROR_INVALID_PARAM;
+    }
+    delete dataset;
+    return ACL_SUCCESS;
+}
+
+acltdtDataset *acltdtCreateDataset() {
+    return new(std::nothrow) acltdtDataset();
+}
+
+aclError acltdtReceiveTensor(const acltdtChannelHandle *handle,
+                             acltdtDataset *dataset,
+                             int32_t timeout) {
+    if (handle->recvName.empty()) {
+        return ACL_ERROR_INVALID_PARAM;
+    }
+    if (handle->recvName == "train") {
+        acltdtDataItem *acl_data = acltdtCreateDataItem(ACL_TENSOR_DATA_END_OF_SEQUENCE, nullptr, 0, ACL_BOOL /* whatever */, nullptr, 0);
+        if (acltdtAddDataItem(dataset, acl_data) != ACL_ERROR_NONE) {
+            if (acltdtDestroyDataItem(acl_data) != ACL_ERROR_NONE) {
+                return ACL_ERROR_FAILURE;
+            }
+        }
+    } else {
+        std::string vaue_str = "print message!!";
+        std::string *value = &vaue_str;
+        // for scalar type, *dims is nullptr and dim_num is 0
+        acltdtDataItem *acl_data = acltdtCreateDataItem(ACL_TENSOR_DATA_TENSOR, nullptr, 0, ACL_STRING,
+                                      const_cast<char *>(value->c_str()), value->size());
+        if (acltdtAddDataItem(dataset, acl_data) != ACL_ERROR_NONE) {
+            if (acltdtDestroyDataItem(acl_data) != ACL_ERROR_NONE) {
+                return ACL_ERROR_FAILURE;
+            }
+        }
+        int32_t value_int = 1;
+        acltdtDataItem *acl_int_data = acltdtCreateDataItem(
+            ACL_TENSOR_DATA_TENSOR, nullptr,
+            0, ACL_INT32, &value_int, 4);
+        if (acltdtAddDataItem(dataset, acl_int_data) != ACL_ERROR_NONE) {
+            if (acltdtDestroyDataItem(acl_int_data) != ACL_ERROR_NONE) {
+                return ACL_ERROR_FAILURE;
+            }
+        }
+    }
+    return ACL_SUCCESS;
+}
+
+acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType,
+                                     const int64_t *dims,
+                                     size_t dimNum,
+                                     aclDataType dataType,
+                                     void *data,
+                                     size_t size) {
+    if ((dims == nullptr && dimNum != 0) || (dims != nullptr && dimNum == 0)) {
+        return nullptr;
+    }
+    std::string dimsStr = "[";
+    acl::GetTensorDimsString(dims, dimNum, dimsStr);
+    std::string typeStr;
+    for (const auto &item: aclDataTypeStrMap) {
+        if (item.second == dataType) {
+            typeStr = item.first;
+            break;
+        }
+    }
+    if (typeStr.empty()) {
+        return nullptr;
+    }
+    std::shared_ptr<void> dataPtr;
+    dataPtr.reset(data, [](const void *p) {});
+    return new(std::nothrow) acltdtDataItem(tdtType, dims, dimNum, dimsStr, dataType, typeStr, dataPtr, size);
+}
+
+aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem) {
+    if (dataset == nullptr || dataItem == nullptr) {
+        return ACL_ERROR_INVALID_PARAM;
+    }
+    if (dataset->freeSelf) {
+        return ACL_ERROR_FEATURE_UNSUPPORTED;
+    }
+    dataset->blobs.push_back(dataItem);
+    return ACL_SUCCESS;
+}
+
+aclError acltdtSendTensor(const acltdtChannelHandle *handle,
+                          const acltdtDataset *dataset,
+                          int32_t timeout) {
+    if (dataset == nullptr || handle == nullptr) {
+        return ACL_ERROR_INVALID_PARAM;
+    }
+    return ACL_SUCCESS;
+}
+
+acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem) {
+    if (dataItem == nullptr) {
+        return ACL_TENSOR_DATA_UNDEFINED;
+    }
+    return dataItem->tdtType;
+}
+
+aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem) {
+    if (dataItem == nullptr) {
+        return ACL_DT_UNDEFINED;
+    }
+    return dataItem->dataType;
+}
+
+size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem) {
+    if (dataItem == nullptr) {
+        return 0;
+    }
+    return dataItem->dims.size();
+}
+
+size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem) {
+    if (dataItem == nullptr) {
+        return 0;
+    }
+    return dataItem->dataLen;
+}
+
+void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem) {
+    if (dataItem == nullptr) {
+        return nullptr;
+    }
+    return dataItem->dataPtr.get();
+}
+
+aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum) {
+    if (dataItem == nullptr) {
+        return ACL_TENSOR_DATA_UNDEFINED;
+    }
+    // check dims and dimNum
+    if ((dims == nullptr && dimNum != 0) || (dims != nullptr && dimNum == 0)) {
+        return ACL_ERROR_INVALID_PARAM;
+    }
+
+    if (dimNum < dataItem->dims.size()) {
+        return ACL_ERROR_INVALID_PARAM;
+    }
+
+    for (size_t i = 0; i < dataItem->dims.size(); ++i) {
+        dims[i] = dataItem->dims[i];
+    }
+
+    return ACL_SUCCESS;
+}
+
+aclError acltdtStopChannel(acltdtChannelHandle *handle)
+{
+    if (handle == nullptr) {
+        return ACL_TENSOR_DATA_UNDEFINED;
+    }
+    return ACL_SUCCESS;
+}
\ No newline at end of file
diff --git a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h
new file mode 100644
index 000000000..c418e4d15
--- /dev/null
+++ b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h
@@ -0,0 +1,82 @@
+/**
+* @file tensor_data_transfer.h
+*
+* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#ifndef DEPENDS_ASCENDCL_STUB_H
+#define DEPENDS_ASCENDCL_STUB_H
+#include <string.h>
+#include <string>
+#include <vector>
+#include <memory>
+
+#include "acl/acl_base.h"
+#include "acl/acl_tdt.h"
+
+struct acltdtDataItem {
+    acltdtDataItem(acltdtTensorType tdtType,
+        const int64_t *dims, size_t dimNum, const std::string &dimsStr,
+        aclDataType type, const std::string &typeStr,
+        std::shared_ptr<void> tensorData, size_t size)
+    {
+        this->tdtType = tdtType;
+        for (size_t i = 0; i < dimNum; ++i) {
+            this->dims.push_back(dims[i]);
+        }
+        this->dimsStr = dimsStr;
+        this->dataType = type;
+        this->dataTypeStr = typeStr;
+        this->dataLen = size;
+        this->dataPtr = tensorData;
+    }
+    acltdtDataItem() = default;
+    ~acltdtDataItem() = default;
+    acltdtTensorType tdtType;
+    std::vector<int64_t> dims;
+    std::string dimsStr;
+    aclDataType dataType;
+    std::string dataTypeStr;
+    size_t dataLen;
+    std::shared_ptr<void> dataPtr;
+};
+
+struct acltdtDataset {
+    acltdtDataset()  : freeSelf(false) {};
+    ~acltdtDataset()
+    {
+        if (freeSelf) {
+            for (auto iter = blobs.begin(); iter != blobs.end(); ++iter) {
+                (void)acltdtDestroyDataItem(*iter);
+            }
+        }
+    }
+    std::vector<acltdtDataItem *> blobs;
+    bool freeSelf;
+};
+
+struct acltdtChannelHandle {
+    acltdtChannelHandle(uint32_t deviceId, const char *channelName)
+    {
+        devId = deviceId;
+        if (channelName != nullptr) {
+            name = channelName;
+            size_t prefixLen = sizeof("TF_RECEIVE_") - 1;
+            if (0 == strncmp(channelName, "TF_RECEIVE_", prefixLen)) {
+                recvName = channelName + prefixLen;
+            }
+        }
+    }
+    acltdtChannelHandle() = default;
+    ~acltdtChannelHandle() = default;
+    std::string name;
+    std::string recvName;
+    uint32_t devId;
+};
+
+#endif //ACL_TENSOR_DATA_TRANSFER_H
+
diff --git a/tf_adapter/tests/st/CMakeLists.txt b/tf_adapter/tests/st/CMakeLists.txt
index 302442238..b7462d700 100644
--- a/tf_adapter/tests/st/CMakeLists.txt
+++ b/tf_adapter/tests/st/CMakeLists.txt
@@ -25,12 +25,17 @@ file(GLOB_RECURSE ST_SOURCES
     ${TFADAPTER_DIR}/tf_adapter/kernels/*.cc
     "optimizers/testcase/om_partition_subgraphs_pass_test.cc"
     "optimizers/testcase/get_attr_optimize_pass_test.cc"
+    "optimizers/testcase/dp_tf_ge_conversion_pass_test.cc"
     "kernels/testcase/ocr_ops_test.cc"
     "kernels/testcase/non_zero_with_value_ops_test.cc"
     "kernels/testcase/geop_npu_test.cc"
-    "util/testcase/*.cc"
+    "kernels/testcase/infeed_outfeed_test.cc"
     "kernels/testcase/layer_norm_ops_test.cc"
     "kernels/testcase/layer_norm_grad_ops_test.cc"
+    "kernels/testcase/dataset/host_queue_dats_set_st.cc"
+    "kernels/testcase/dataset/function_testlib.cc"
+    "kernels/testcase/dataset/dataset_test_base.cc"
+    "util/testcase/*.cc"
 )
 
 add_executable(tfadapter_stest
@@ -65,7 +70,6 @@ add_dependencies(tfadapter_stest aoe_tuning)
 
 target_link_libraries(tfadapter_stest PUBLIC
     $<BUILD_INTERFACE:intf_pub>
-    gtest gtest_main c_sec mmpa_stub indextransform_stub alog_stub datatransfer_stub ge_runner_stub
+    gtest gtest_main c_sec mmpa_stub indextransform_stub alog_stub datatransfer_stub ge_runner_stub ascendcl_stub
     ${PYTHON_LIB_PATH} -lrt -ldl -lgcov
 )
-
diff --git a/tf_adapter/tests/st/kernels/pbtxt/geop_dpop.pbtxt b/tf_adapter/tests/st/kernels/pbtxt/geop_dpop.pbtxt
new file mode 100644
index 000000000..2b67e2a6d
--- /dev/null
+++ b/tf_adapter/tests/st/kernels/pbtxt/geop_dpop.pbtxt
@@ -0,0 +1,697 @@
+node {
+  name: "GeOp1_0_dp"
+  op: "GeOp"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "Tin"
+    value {
+      list {
+      }
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      list {
+      }
+    }
+  }
+  attr {
+    key: "_NpuOptimizer"
+    value {
+      s: "NpuOptimizer"
+    }
+  }
+  attr {
+    key: "_auto_tune_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_buffer_optimize"
+    value {
+      s: "l2_optimize"
+    }
+  }
+  attr {
+    key: "_compress_weight_conf"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_debug_dir"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_distribute_config"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_do_npu_optimizer"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_dump_debug_mode"
+    value {
+      s: "all"
+    }
+  }
+  attr {
+    key: "_dump_mode"
+    value {
+      s: "output"
+    }
+  }
+  attr {
+    key: "_dump_path"
+    value {
+      s: "./"
+    }
+  }
+  attr {
+    key: "_dump_step"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_dynamic_dims"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_dynamic_graph_execute_mode"
+    value {
+      s: "dynamic_execute"
+    }
+  }
+  attr {
+    key: "_dynamic_input"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_dynamic_node_type"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_enable_compress_weight"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_enable_data_pre_proc"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_enable_dump"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_enable_dump_debug"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_enable_exception_dump"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_enable_scope_fusion_passes"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_enable_small_channel"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_fusion_switch_file"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_graph_run_mode"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_hcom_multi_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_hcom_parallel"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_in_out_pair"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_in_out_pair_flag"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_input_shape"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_is_tailing_optimization"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_iterations_per_loop"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_job"
+    value {
+      s: "localhost"
+    }
+  }
+  attr {
+    key: "_local_device_list"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_local_rank_id"
+    value {
+      s: "-1"
+    }
+  }
+  attr {
+    key: "_lower_functional_ops"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_mix_compile_mode"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_mstune_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_compiler_cache_dir"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_compiler_cache_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_debug_level"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_op_select_implmode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_tune_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_optypelist_for_implmode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_precision_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_profiling_mode"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_profiling_options"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_session_device_id"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_stream_max_parallel_num"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_task_index"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_use_off_line"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_variable_format_optimize"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_work_path"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "function"
+    value {
+      func {
+        name: "geop_function_D9x45pM0kZ0"
+      }
+    }
+  }
+}
+library {
+  function {
+    signature {
+      name: "geop_function_D9x45pM0kZ0"
+    }
+    node_def {
+      name: "dpop_function_FFvj93e0XnN"
+      op: "DPOP"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "Tin"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "data_format"
+        value {
+          s: "NHWC"
+        }
+      }
+      attr {
+        key: "func_def"
+        value {
+          s: "\n\271\001\n;\n\037__inference_Dataset_map_func_11\022\n\n\006args_0\030\002\032\014\n\010identity\030\002\0326\n\005add/y\022\005Const*\031\n\005value\022\020B\016\010\002\022\0002\010\000\000\000\000\000\000\360?*\013\n\005dtype\022\0020\002\032-\n\003add\022\005AddV2\032\016add/y:output:0\032\006args_0*\007\n\001T\022\0020\002\"\023\n\010identity\022\007add:z:0\n\356\016\n\033\n\031dpop_function_FFvj93e0XnN\032p\n\nbatch_size\022\005Const\",/job:localhost/replica:0/task:0/device:CPU:0*\022\n\005value\022\tB\007\010\t\022\000R\001\002*\013\n\005dtype\022\0020\t2\014\n\nbatch_size\032x\n\016drop_remainder\022\005Const\",/job:localhost/replica:0/task:0/device:CPU:0*\022\n\005value\022\tB\007\010\n\022\000Z\001\000*\013\n\005dtype\022\0020\n2\020\n\016drop_remainder\032\365\002\n\nIteratorV2\022\nIteratorV2\",/job:localhost/replica:0/task:0/device:CPU:0*\033\n\024_iterations_per_loop\022\003\022\0011*\024\n\r_use_off_line\022\003\022\0011*\030\n\021_mix_compile_mode\022\003\022\0010*\021\n\013shared_name\022\002\022\000*\"\n\routput_shapes\022\021\n\017:\r\022\013\010\377\377\377\377\377\377\377\377\377\001*\037\n\006_class\022\025\n\023\022\021loc:@MakeIterator*\037\n\r_NpuOptimizer\022\016\022\014NpuOptimizer*\017\n\tcontainer\022\002\022\000*\025\n\014output_types\022\005\n\0032\001\002*\023\n\004_job\022\013\022\tlocalhost*\034\n\025_enable_data_pre_proc\022\003\022\00112\014\n\nIteratorV2\032\357\002\n\016BatchDatasetV2\022\016BatchDatasetV2\032^DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1:handle:0\032\023batch_size:output:0\032\027drop_remainder:output:0\",/job:localhost/replica:0/task:0/device:CPU:0*\017\n\007_kernel\022\004\022\002dp*\036\n\016_iterator_name\022\014\022\nIteratorV2*\025\n\014output_types\022\005\n\0032\001\002*\023\n\rparallel_copy\022\002(\000*\"\n\routput_shapes\022\021\n\017:\r\022\013\010\377\377\377\377\377\377\377\377\377\0012\020\n\016BatchDatasetV2\032\240\002\n\nMapDataset\022\nMapDataset\032\027BatchDatasetV2:handle:0\",/job:localhost/replica:0/task:0/device:CPU:0*\"\n\routput_shapes\022\021\n\017:\r\022\013\010\377\377\377\377\377\377\377\377\377\001*(\n\001f\022#R!\n\037__inference_Dataset_map_func_11*\036\n\030use_inter_op_parallelism\022\002(\001*\025\n\014output_types\022\005\n\0032\001\002*\020\n\nTarguments\022\002\n\000*\032\n\024preserve_cardinality\022\002(\0002\014\n\nMapDataset\032\265\001\n\014MakeIterator\022\014MakeIterator\032\023MapDataset:handle:0\032\023IteratorV2:handle:0\",/job:localhost/replica:0/task:0/device:CPU:0*\017\n\007_kernel\022\004\022\002dp*\036\n\016_iterator_name\022\014\022\nIteratorV22\016\n\014MakeIterator\032\235\003\nUDeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1\022\022DeviceQueueDataset\",/job:localhost/replica:0/task:0/device:CPU:0*\025\n\routput_shapes\022\004\n\002:\000*[\n\014channel_name\022K\022IQueue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1*\036\n\016_iterator_name\022\014\022\nIteratorV2*\025\n\014output_types\022\005\n\0032\001\0022W\nUDeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1"
+        }
+      }
+      attr {
+        key: "function"
+        value {
+          func {
+            name: "dpop_function_FFvj93e0XnN"
+          }
+        }
+      }
+    }
+  }
+  function {
+    signature {
+      name: "dpop_function_FFvj93e0XnN"
+    }
+    node_def {
+      name: "batch_size"
+      op: "Const"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_INT64
+            tensor_shape {
+            }
+            int64_val: 2
+          }
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "batch_size"
+      }
+    }
+    node_def {
+      name: "drop_remainder"
+      op: "Const"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "dtype"
+        value {
+          type: DT_BOOL
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_BOOL
+            tensor_shape {
+            }
+            bool_val: false
+          }
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "drop_remainder"
+      }
+    }
+    node_def {
+      name: "IteratorV2"
+      op: "IteratorV2"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "_NpuOptimizer"
+        value {
+          s: "NpuOptimizer"
+        }
+      }
+      attr {
+        key: "_class"
+        value {
+          list {
+            s: "loc:@MakeIterator"
+          }
+        }
+      }
+      attr {
+        key: "_enable_data_pre_proc"
+        value {
+          s: "1"
+        }
+      }
+      attr {
+        key: "_iterations_per_loop"
+        value {
+          s: "1"
+        }
+      }
+      attr {
+        key: "_job"
+        value {
+          s: "localhost"
+        }
+      }
+      attr {
+        key: "_mix_compile_mode"
+        value {
+          s: "0"
+        }
+      }
+      attr {
+        key: "_use_off_line"
+        value {
+          s: "1"
+        }
+      }
+      attr {
+        key: "container"
+        value {
+          s: ""
+        }
+      }
+      attr {
+        key: "output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "output_types"
+        value {
+          list {
+            type: DT_DOUBLE
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: ""
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "IteratorV2"
+      }
+    }
+    node_def {
+      name: "BatchDatasetV2"
+      op: "BatchDatasetV2"
+      input: "DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1:handle:0"
+      input: "batch_size:output:0"
+      input: "drop_remainder:output:0"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "_iterator_name"
+        value {
+          s: "IteratorV2"
+        }
+      }
+      attr {
+        key: "_kernel"
+        value {
+          s: "dp"
+        }
+      }
+      attr {
+        key: "output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "output_types"
+        value {
+          list {
+            type: DT_DOUBLE
+          }
+        }
+      }
+      attr {
+        key: "parallel_copy"
+        value {
+          b: false
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "BatchDatasetV2"
+      }
+    }
+    node_def {
+      name: "MapDataset"
+      op: "MapDataset"
+      input: "BatchDatasetV2:handle:0"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "Targuments"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference_Dataset_map_func_11"
+          }
+        }
+      }
+      attr {
+        key: "output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "output_types"
+        value {
+          list {
+            type: DT_DOUBLE
+          }
+        }
+      }
+      attr {
+        key: "preserve_cardinality"
+        value {
+          b: false
+        }
+      }
+      attr {
+        key: "use_inter_op_parallelism"
+        value {
+          b: true
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "MapDataset"
+      }
+    }
+    node_def {
+      name: "MakeIterator"
+      op: "MakeIterator"
+      input: "MapDataset:handle:0"
+      input: "IteratorV2:handle:0"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "_iterator_name"
+        value {
+          s: "IteratorV2"
+        }
+      }
+      attr {
+        key: "_kernel"
+        value {
+          s: "dp"
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "MakeIterator"
+      }
+    }
+    node_def {
+      name: "DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1"
+      op: "DeviceQueueDataset"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "_iterator_name"
+        value {
+          s: "IteratorV2"
+        }
+      }
+      attr {
+        key: "channel_name"
+        value {
+          s: "Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1"
+        }
+      }
+      attr {
+        key: "output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "output_types"
+        value {
+          list {
+            type: DT_DOUBLE
+          }
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1"
+      }
+    }
+  }
+}
+versions {
+  producer: 134
+}
diff --git a/tf_adapter/tests/st/kernels/testcase/dataset/dataset_test_base.cc b/tf_adapter/tests/st/kernels/testcase/dataset/dataset_test_base.cc
new file mode 100644
index 000000000..ba2f05661
--- /dev/null
+++ b/tf_adapter/tests/st/kernels/testcase/dataset/dataset_test_base.cc
@@ -0,0 +1,704 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/data/dataset_test_base.h"
+
+#include "tensorflow/core/common_runtime/executor.h"
+#include "tensorflow/core/framework/cancellation.h"
+#include "tensorflow/core/framework/versions.pb.h"
+#include "tensorflow/core/lib/io/record_writer.h"
+#include "iostream"
+using namespace std;
+namespace tensorflow {
+namespace data {
+
+string ToString(CompressionType compression_type) {
+  switch (compression_type) {
+    case CompressionType::ZLIB:
+      return "ZLIB";
+    case CompressionType::GZIP:
+      return "GZIP";
+    case CompressionType::RAW:
+      return "RAW";
+    case CompressionType::UNCOMPRESSED:
+      return "";
+  }
+}
+
+io::ZlibCompressionOptions GetZlibCompressionOptions(
+    CompressionType compression_type) {
+  switch (compression_type) {
+    case CompressionType::ZLIB:
+      return io::ZlibCompressionOptions::DEFAULT();
+    case CompressionType::GZIP:
+      return io::ZlibCompressionOptions::GZIP();
+    case CompressionType::RAW:
+      return io::ZlibCompressionOptions::RAW();
+    case CompressionType::UNCOMPRESSED:
+      LOG(WARNING) << "ZlibCompressionOptions does not have an option for "
+                   << ToString(compression_type);
+      return io::ZlibCompressionOptions::DEFAULT();
+  }
+}
+
+Status WriteDataToFile(const string& filename, const char* data) {
+  return WriteDataToFile(filename, data, CompressionParams());
+}
+
+Status WriteDataToFile(const string& filename, const char* data,
+                       const CompressionParams& params) {
+  Env* env = Env::Default();
+  std::unique_ptr<WritableFile> file_writer;
+  TF_RETURN_IF_ERROR(env->NewWritableFile(filename, &file_writer));
+  if (params.compression_type == CompressionType::UNCOMPRESSED) {
+    TF_RETURN_IF_ERROR(file_writer->Append(data));
+  } else if (params.compression_type == CompressionType::ZLIB ||
+             params.compression_type == CompressionType::GZIP ||
+             params.compression_type == CompressionType::RAW) {
+    auto zlib_compression_options =
+        GetZlibCompressionOptions(params.compression_type);
+    io::ZlibOutputBuffer out(file_writer.get(), params.input_buffer_size,
+                             params.output_buffer_size,
+                             zlib_compression_options);
+    TF_RETURN_IF_ERROR(out.Init());
+    TF_RETURN_IF_ERROR(out.Append(data));
+    TF_RETURN_IF_ERROR(out.Flush());
+    TF_RETURN_IF_ERROR(out.Close());
+  } else {
+    return tensorflow::errors::InvalidArgument(
+        "Unsupported compression_type: ", ToString(params.compression_type));
+  }
+
+  TF_RETURN_IF_ERROR(file_writer->Flush());
+  TF_RETURN_IF_ERROR(file_writer->Close());
+
+  return Status::OK();
+}
+
+Status WriteDataToTFRecordFile(const string& filename,
+                               const std::vector<absl::string_view>& records,
+                               const CompressionParams& params) {
+  Env* env = Env::Default();
+  std::unique_ptr<WritableFile> file_writer;
+  TF_RETURN_IF_ERROR(env->NewWritableFile(filename, &file_writer));
+  auto options = io::RecordWriterOptions::CreateRecordWriterOptions(
+      ToString(params.compression_type));
+  options.zlib_options.input_buffer_size = params.input_buffer_size;
+  io::RecordWriter record_writer(file_writer.get(), options);
+  for (const auto& record : records) {
+    TF_RETURN_IF_ERROR(record_writer.WriteRecord(record));
+  }
+  TF_RETURN_IF_ERROR(record_writer.Flush());
+  TF_RETURN_IF_ERROR(record_writer.Close());
+  TF_RETURN_IF_ERROR(file_writer->Flush());
+  TF_RETURN_IF_ERROR(file_writer->Close());
+  return Status::OK();
+}
+
+template <typename T>
+Status IsEqual(const Tensor& t1, const Tensor& t2) {
+  if (t1.dtype() != t2.dtype()) {
+    return tensorflow::errors::Internal(
+        "Two tensors have different dtypes: ", DataTypeString(t1.dtype()),
+        " vs. ", DataTypeString(t2.dtype()));
+  }
+  if (!t1.IsSameSize(t2)) {
+    return tensorflow::errors::Internal(
+        "Two tensors have different shapes: ", t1.shape().DebugString(),
+        " vs. ", t2.shape().DebugString());
+  }
+
+  auto flat_t1 = t1.flat<T>();
+  auto flat_t2 = t2.flat<T>();
+  auto length = flat_t1.size();
+
+  for (int i = 0; i < length; ++i) {
+    if (flat_t1(i) != flat_t2(i)) {
+      return tensorflow::errors::Internal(
+          "Two tensors have different values "
+          "at [",
+          i, "]: ", flat_t1(i), " vs. ", flat_t2(i));
+    }
+  }
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::ExpectEqual(const Tensor& a, const Tensor& b) {
+  switch (a.dtype()) {
+#define CASE(DT)                           \
+  case DataTypeToEnum<DT>::value:          \
+    TF_RETURN_IF_ERROR(IsEqual<DT>(a, b)); \
+    break;
+    TF_CALL_NUMBER_TYPES(CASE);
+    TF_CALL_tstring(CASE);
+    TF_CALL_uint32(CASE);
+    TF_CALL_uint64(CASE);
+    // TODO(feihugis): figure out how to support variant tensors.
+#undef CASE
+    default:
+      return errors::Internal("Unsupported dtype: ", a.dtype());
+  }
+  return Status::OK();
+}
+
+template <typename T>
+bool compare(const Tensor& t1, const Tensor& t2) {
+  auto flat_t1 = t1.flat<T>();
+  auto flat_t2 = t2.flat<T>();
+  auto length = std::min(flat_t1.size(), flat_t2.size());
+  for (int i = 0; i < length; ++i) {
+    if (flat_t1(i) < flat_t2(i)) return true;
+    if (flat_t1(i) > flat_t2(i)) return false;
+  }
+  return flat_t1.size() < length;
+}
+
+Status DatasetOpsTestBase::ExpectEqual(std::vector<Tensor> produced_tensors,
+                                       std::vector<Tensor> expected_tensors,
+                                       bool compare_order) {
+  if (produced_tensors.size() != expected_tensors.size()) {
+    return Status(tensorflow::errors::Internal(
+        "The two tensor vectors have different size (", produced_tensors.size(),
+        " v.s. ", expected_tensors.size(), ")"));
+  }
+
+  if (produced_tensors.empty()) return Status::OK();
+  if (produced_tensors[0].dtype() != expected_tensors[0].dtype()) {
+    return Status(tensorflow::errors::Internal(
+        "The two tensor vectors have different dtypes (",
+        produced_tensors[0].dtype(), " v.s. ", expected_tensors[0].dtype(),
+        ")"));
+  }
+
+  if (!compare_order) {
+    const DataType& dtype = produced_tensors[0].dtype();
+    switch (dtype) {
+#define CASE(DT)                                                \
+  case DT:                                                      \
+    std::sort(produced_tensors.begin(), produced_tensors.end(), \
+              compare<EnumToDataType<DT>::Type>);               \
+    std::sort(expected_tensors.begin(), expected_tensors.end(), \
+              compare<EnumToDataType<DT>::Type>);               \
+    break;
+      CASE(DT_FLOAT);
+      CASE(DT_DOUBLE);
+      CASE(DT_INT32);
+      CASE(DT_UINT8);
+      CASE(DT_INT16);
+      CASE(DT_INT8);
+      CASE(DT_STRING);
+      CASE(DT_INT64);
+      CASE(DT_BOOL);
+      CASE(DT_QINT8);
+      CASE(DT_QUINT8);
+      CASE(DT_QINT32);
+      CASE(DT_QINT16);
+      CASE(DT_QUINT16);
+      CASE(DT_UINT16);
+      CASE(DT_HALF);
+      CASE(DT_UINT32);
+      CASE(DT_UINT64);
+      // TODO(feihugis): support other dtypes.
+#undef CASE
+      default:
+        return errors::Internal("Unsupported dtype: ", dtype);
+    }
+  }
+
+  for (int i = 0; i < produced_tensors.size(); ++i) {
+    TF_RETURN_IF_ERROR(DatasetOpsTestBase::ExpectEqual(produced_tensors[i],
+                                                       expected_tensors[i]));
+  }
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateTensorSliceDatasetKernel(
+    StringPiece node_name, const DataTypeVector& dtypes,
+    const std::vector<PartialTensorShape>& shapes,
+    std::unique_ptr<OpKernel>* tensor_slice_dataset_kernel) {
+  std::vector<string> components;
+  components.reserve(dtypes.size());
+  for (int i = 0; i < dtypes.size(); ++i) {
+    // Create the placeholder names for the input components of
+    // `TensorSliceDataset`.
+    components.emplace_back(strings::StrCat("component_", i));
+  }
+  NodeDef node_def = test::function::NDef(
+      node_name, "TensorSliceDataset", components,
+      {{"Toutput_types", dtypes}, {"output_shapes", shapes}});
+  TF_RETURN_IF_ERROR(CreateOpKernel(node_def, tensor_slice_dataset_kernel));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateTensorSliceDataset(
+    StringPiece node_name, std::vector<Tensor>* const components,
+    DatasetBase** tensor_slice_dataset) {
+  std::unique_ptr<OpKernel> tensor_slice_dataset_kernel;
+  DataTypeVector dtypes;
+  dtypes.reserve(components->size());
+  std::vector<PartialTensorShape> shapes;
+  shapes.reserve(components->size());
+  for (const auto& t : *components) {
+    dtypes.push_back(t.dtype());
+    gtl::InlinedVector<int64, 4> partial_dim_sizes;
+    for (int i = 1; i < t.dims(); ++i) {
+      partial_dim_sizes.push_back(t.dim_size(i));
+    }
+    shapes.emplace_back(std::move(partial_dim_sizes));
+  }
+  TF_RETURN_IF_ERROR(CreateTensorSliceDatasetKernel(
+      node_name, dtypes, shapes, &tensor_slice_dataset_kernel));
+  gtl::InlinedVector<TensorValue, 4> inputs;
+  for (auto& tensor : *components) {
+    inputs.emplace_back(&tensor);
+  }
+  TF_RETURN_IF_ERROR(CheckOpKernelInput(*tensor_slice_dataset_kernel, inputs));
+  std::unique_ptr<OpKernelContext> context;
+  TF_RETURN_IF_ERROR(CreateOpKernelContext(tensor_slice_dataset_kernel.get(),
+                                           &inputs, &context));
+  TF_RETURN_IF_ERROR(
+      RunOpKernel(tensor_slice_dataset_kernel.get(), context.get()));
+  TF_RETURN_IF_ERROR(
+      GetDatasetFromContext(context.get(), 0, tensor_slice_dataset));
+  return Status::OK();
+}
+
+// Create a `RangeDataset` dataset as a variant tensor.
+Status DatasetOpsTestBase::MakeRangeDataset(
+    const Tensor& start, const Tensor& stop, const Tensor& step,
+    const DataTypeVector& output_types,
+    const std::vector<PartialTensorShape>& output_shapes,
+    Tensor* range_dataset) {
+  GraphConstructorOptions graph_opts;
+  graph_opts.allow_internal_ops = true;
+  graph_opts.expect_device_spec = false;
+  TF_RETURN_IF_ERROR(
+      RunFunction(test::function::MakeRangeDataset(),
+                  /*attrs*/
+                  {{RangeDatasetOp::kOutputTypes, output_types},
+                   {RangeDatasetOp::kOutputShapes, output_shapes}},
+                  /*inputs*/ {start, stop, step}, graph_opts,
+                  /*rets*/ {range_dataset}));
+  return Status::OK();
+}
+
+// Create a `RangeDataset` dataset as a variant tensor.
+Status DatasetOpsTestBase::MakeRangeDataset(
+    const RangeDatasetParams& range_dataset_params, Tensor* range_dataset) {
+  GraphConstructorOptions graph_opts;
+  graph_opts.allow_internal_ops = true;
+  graph_opts.expect_device_spec = false;
+  TF_RETURN_IF_ERROR(RunFunction(
+      test::function::MakeRangeDataset(),
+      /*attrs*/
+      {{RangeDatasetOp::kOutputTypes, range_dataset_params.output_dtypes},
+       {RangeDatasetOp::kOutputShapes, range_dataset_params.output_shapes}},
+      /*inputs*/
+      {range_dataset_params.start, range_dataset_params.stop,
+       range_dataset_params.step},
+      graph_opts,
+      /*rets*/ {range_dataset}));
+  return Status::OK();
+}
+
+// Create a `TakeDataset` dataset as a variant tensor.
+Status DatasetOpsTestBase::MakeTakeDataset(
+    const Tensor& input_dataset, int64 count,
+    const DataTypeVector& output_types,
+    const std::vector<PartialTensorShape>& output_shapes,
+    Tensor* take_dataset) {
+  GraphConstructorOptions graph_opts;
+  graph_opts.allow_internal_ops = true;
+  graph_opts.expect_device_spec = false;
+
+  Tensor count_tensor = CreateTensor<int64>(TensorShape({}), {count});
+  TF_RETURN_IF_ERROR(
+      RunFunction(test::function::MakeTakeDataset(),
+                  /*attrs*/
+                  {{TakeDatasetOp::kOutputTypes, output_types},
+                   {TakeDatasetOp::kOutputShapes, output_shapes}},
+                  /*inputs*/ {input_dataset, count_tensor}, graph_opts,
+                  /*rets*/ {take_dataset}));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateOpKernel(
+    const NodeDef& node_def, std::unique_ptr<OpKernel>* op_kernel) {
+  OpKernel* kernel;
+  TF_RETURN_IF_ERROR(tensorflow::CreateOpKernel(device_type_, device_.get(),
+                                                allocator_, flr_, node_def,
+                                                TF_GRAPH_DEF_VERSION, &kernel));
+  op_kernel->reset(kernel);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateDatasetContext(
+    OpKernel* const dateset_kernel,
+    gtl::InlinedVector<TensorValue, 4>* const inputs,
+    std::unique_ptr<OpKernelContext>* dataset_context) {
+  TF_RETURN_IF_ERROR(CheckOpKernelInput(*dateset_kernel, *inputs));
+  TF_RETURN_IF_ERROR(
+      CreateOpKernelContext(dateset_kernel, inputs, dataset_context));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateDataset(OpKernel* kernel,
+                                         OpKernelContext* context,
+                                         DatasetBase** const dataset) {
+  TF_RETURN_IF_ERROR(RunOpKernel(kernel, context));
+  // Assume that DatasetOp has only one output.
+  DCHECK_EQ(context->num_outputs(), 1);
+  TF_RETURN_IF_ERROR(GetDatasetFromContext(context, 0, dataset));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::RestoreIterator(
+    IteratorContext* ctx, IteratorStateReader* reader,
+    const string& output_prefix, const DatasetBase& dataset,
+    std::unique_ptr<IteratorBase>* iterator) {
+  TF_RETURN_IF_ERROR(dataset.MakeIterator(ctx, output_prefix, iterator));
+  TF_RETURN_IF_ERROR((*iterator)->Restore(ctx, reader));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateIteratorContext(
+    OpKernelContext* const op_context,
+    std::unique_ptr<IteratorContext>* iterator_context) {
+  IteratorContext::Params params(op_context);
+  params.resource_mgr = op_context->resource_manager();
+  function_handle_cache_ = absl::make_unique<FunctionHandleCache>(flr_);
+  params.function_handle_cache = function_handle_cache_.get();
+  params.cancellation_manager = cancellation_manager_.get();
+  *iterator_context = absl::make_unique<IteratorContext>(params);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::GetDatasetFromContext(OpKernelContext* context,
+                                                 int output_index,
+                                                 DatasetBase** const dataset) {
+  Tensor* output = context->mutable_output(output_index);
+  Status status = GetDatasetFromVariantTensor(*output, dataset);
+  (*dataset)->Ref();
+  return status;
+}
+
+Status DatasetOpsTestBase::InitThreadPool(int thread_num) {
+  if (thread_num < 1) {
+    return errors::InvalidArgument(
+        "The `thread_num` argument should be positive but got: ", thread_num);
+  }
+  thread_pool_ = absl::make_unique<thread::ThreadPool>(
+      Env::Default(), ThreadOptions(), "test_thread_pool", thread_num);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::InitFunctionLibraryRuntime(
+    const std::vector<FunctionDef>& flib, int cpu_num) {
+  if (cpu_num < 1) {
+    return errors::InvalidArgument(
+        "The `cpu_num` argument should be positive but got: ", cpu_num);
+  }
+  SessionOptions options;
+  auto* device_count = options.config.mutable_device_count();
+  device_count->insert({"CPU", cpu_num});
+  std::vector<std::unique_ptr<Device>> devices;
+  TF_RETURN_IF_ERROR(DeviceFactory::AddDevices(
+      options, "/job:localhost/replica:0/task:0", &devices));
+  device_mgr_ = absl::make_unique<DeviceMgr>(std::move(devices));
+  resource_mgr_ = absl::make_unique<ResourceMgr>("default_container");
+
+  FunctionDefLibrary proto;
+  for (const auto& fdef : flib) *(proto.add_function()) = fdef;
+  lib_def_ =
+      absl::make_unique<FunctionLibraryDefinition>(OpRegistry::Global(), proto);
+
+  OptimizerOptions opts;
+  pflr_ = absl::make_unique<ProcessFunctionLibraryRuntime>(
+      device_mgr_.get(), Env::Default(), TF_GRAPH_DEF_VERSION, lib_def_.get(),
+      opts, thread_pool_.get(), nullptr /* cluster_flr */);
+  flr_ = pflr_->GetFLR("/job:localhost/replica:0/task:0/cpu:0");
+  if (thread_pool_ == nullptr) {
+    runner_ = [](std::function<void()> fn) { fn(); };
+  } else {
+    runner_ = [this](std::function<void()> fn) {
+      thread_pool_->Schedule(std::move(fn));
+    };
+  }
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::RunOpKernel(OpKernel* op_kernel,
+                                       OpKernelContext* context) {
+  device_->Compute(op_kernel, context);
+  return context->status();
+}
+
+Status DatasetOpsTestBase::RunFunction(
+    const FunctionDef& fdef, test::function::Attrs attrs,
+    const std::vector<Tensor>& args,
+    const GraphConstructorOptions& graph_options, std::vector<Tensor*> rets) {
+  std::unique_ptr<Executor> exec;
+  InstantiationResult result;
+  auto GetOpSig = [](const string& op, const OpDef** sig) {
+    return OpRegistry::Global()->LookUpOpDef(op, sig);
+  };
+  TF_RETURN_IF_ERROR(InstantiateFunction(fdef, attrs, GetOpSig, &result));
+
+  DataTypeVector arg_types = result.arg_types;
+  DataTypeVector ret_types = result.ret_types;
+
+  std::unique_ptr<Graph> g(new Graph(OpRegistry::Global()));
+  TF_RETURN_IF_ERROR(
+      ConvertNodeDefsToGraph(graph_options, result.nodes, g.get()));
+
+  const int version = g->versions().producer();
+  LocalExecutorParams params;
+  params.function_library = flr_;
+  params.device = device_.get();
+  params.create_kernel = [this, version](const NodeDef& ndef,
+                                         OpKernel** kernel) {
+    return CreateNonCachedKernel(device_.get(), this->flr_, ndef, version,
+                                 kernel);
+  };
+  params.delete_kernel = [](OpKernel* kernel) {
+    DeleteNonCachedKernel(kernel);
+  };
+  params.rendezvous_factory = [](const int64, const DeviceMgr* device_mgr,
+                                 Rendezvous** r) {
+    *r = new IntraProcessRendezvous(device_mgr);
+    return Status::OK();
+  };
+
+  Executor* cur_exec;
+  TF_RETURN_IF_ERROR(NewLocalExecutor(params, std::move(g), &cur_exec));
+  exec.reset(cur_exec);
+  FunctionCallFrame frame(arg_types, ret_types);
+  TF_RETURN_IF_ERROR(frame.SetArgs(args));
+  Executor::Args exec_args;
+  exec_args.call_frame = &frame;
+  exec_args.runner = runner_;
+  TF_RETURN_IF_ERROR(exec->Run(exec_args));
+  std::vector<Tensor> computed;
+  TF_RETURN_IF_ERROR(frame.GetRetvals(&computed));
+  if (computed.size() != rets.size()) {
+    return errors::InvalidArgument(
+        "The result does not match the expected number of return outpus",
+        ". Expected: ", rets.size(), ". Actual: ", computed.size());
+  }
+  for (int i = 0; i < rets.size(); ++i) {
+    *(rets[i]) = computed[i];
+  }
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateOpKernelContext(
+    OpKernel* kernel, gtl::InlinedVector<TensorValue, 4>* inputs,
+    std::unique_ptr<OpKernelContext>* context) {
+  params_ = absl::make_unique<OpKernelContext::Params>();
+  cancellation_manager_ = absl::make_unique<CancellationManager>();
+  params_->cancellation_manager = cancellation_manager_.get();
+  params_->device = device_.get();
+  params_->frame_iter = FrameAndIter(0, 0);
+  params_->function_library = flr_;
+  params_->inputs = inputs;
+  params_->op_kernel = kernel;
+  params_->resource_manager = resource_mgr_.get();
+  params_->runner = &runner_;
+  checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper;
+  slice_reader_cache_ =
+      absl::make_unique<checkpoint::TensorSliceReaderCacheWrapper>();
+  params_->slice_reader_cache = slice_reader_cache_.get();
+  step_container_ =
+      absl::make_unique<ScopedStepContainer>(0, [](const string&) {});
+  params_->step_container = step_container_.get();
+
+  // Set the allocator attributes for the outputs.
+  allocator_attrs_.clear();
+  for (int index = 0; index < params_->op_kernel->num_outputs(); index++) {
+    AllocatorAttributes attr;
+    const bool on_host =
+        (params_->op_kernel->output_memory_types()[index] == HOST_MEMORY);
+    attr.set_on_host(on_host);
+    allocator_attrs_.emplace_back(attr);
+  }
+  params_->output_attr_array = gtl::vector_as_array(&allocator_attrs_);
+
+  *context = absl::make_unique<OpKernelContext>(params_.get());
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateSerializationContext(
+    std::unique_ptr<SerializationContext>* context) {
+  *context =
+      absl::make_unique<SerializationContext>(SerializationContext::Params{});
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckOpKernelInput(
+    const OpKernel& kernel, const gtl::InlinedVector<TensorValue, 4>& inputs) {
+  if (kernel.input_types().size() != inputs.size()) {
+    cout<<"++"<<kernel.input_types().size()<<" : "<<inputs.size()<<endl;
+    return errors::Internal("The number of input elements should be ",
+                            kernel.input_types().size(),
+                            ", but got: ", inputs.size());
+  }
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::AddDatasetInput(
+    gtl::InlinedVector<TensorValue, 4>* inputs, DataTypeVector input_types,
+    DataType dtype, const TensorShape& shape) {
+  if (input_types.size() < inputs->size()) {
+    return errors::InvalidArgument("Adding more inputs than types: ",
+                                   inputs->size(), " vs. ", input_types.size());
+  }
+  bool is_ref = IsRefType(input_types[inputs->size()]);
+  std::unique_ptr<Tensor> input =
+      absl::make_unique<Tensor>(allocator_, dtype, shape);
+
+  if (is_ref) {
+    DataType expected_dtype = RemoveRefType(input_types[inputs->size()]);
+    if (expected_dtype != dtype) {
+      return errors::InvalidArgument("The input data type is ", dtype,
+                                     " , but expected: ", expected_dtype);
+    }
+    inputs->push_back({&lock_for_refs_, input.get()});
+  } else {
+    if (input_types[inputs->size()] != dtype) {
+      return errors::InvalidArgument(
+          "The input data type is ", dtype,
+          " , but expected: ", input_types[inputs->size()]);
+    }
+    inputs->push_back({nullptr, input.get()});
+  }
+
+  // TODO(jsimsa): Figure out how to avoid using a member variable to garbage
+  // collect the inputs.
+  tensors_.push_back(std::move(input));
+
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckIteratorGetNext(
+    const std::vector<Tensor>& expected_outputs, bool compare_order) {
+  bool end_of_sequence = false;
+  std::vector<Tensor> out_tensors;
+  while (!end_of_sequence) {
+    std::vector<Tensor> next;
+    TF_RETURN_IF_ERROR(
+        iterator_->GetNext(iterator_ctx_.get(), &next, &end_of_sequence));
+    out_tensors.insert(out_tensors.end(), next.begin(), next.end());
+  }
+
+  TF_EXPECT_OK(ExpectEqual(out_tensors, expected_outputs,
+                           /*compare_order=*/compare_order));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckDatasetNodeName(
+    const string& expected_dataset_node_name) {
+  EXPECT_EQ(dataset_->node_name(), expected_dataset_node_name);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckDatasetTypeString(
+    const string& expected_type_str) {
+  EXPECT_EQ(dataset_->type_string(), expected_type_str);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckDatasetOutputDtypes(
+    const DataTypeVector& expected_output_dtypes) {
+  TF_EXPECT_OK(
+      VerifyTypesMatch(dataset_->output_dtypes(), expected_output_dtypes));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckDatasetOutputShapes(
+    const std::vector<PartialTensorShape>& expected_output_shapes) {
+  TF_EXPECT_OK(VerifyShapesCompatible(dataset_->output_shapes(),
+                                      expected_output_shapes));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckDatasetCardinality(int expected_cardinality) {
+  EXPECT_EQ(dataset_->Cardinality(), expected_cardinality);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckIteratorOutputDtypes(
+    const DataTypeVector& expected_output_dtypes) {
+  TF_EXPECT_OK(
+      VerifyTypesMatch(iterator_->output_dtypes(), expected_output_dtypes));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckIteratorOutputShapes(
+    const std::vector<PartialTensorShape>& expected_output_shapes) {
+  TF_EXPECT_OK(VerifyShapesCompatible(iterator_->output_shapes(),
+                                      expected_output_shapes));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckIteratorPrefix(
+    const string& expected_iterator_prefix) {
+  EXPECT_EQ(iterator_->prefix(), expected_iterator_prefix);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckIteratorSaveAndRestore(
+    const string& iterator_prefix, const std::vector<Tensor>& expected_outputs,
+    const std::vector<int>& breakpoints) {
+  std::unique_ptr<IteratorBase> iterator;
+  TF_RETURN_IF_ERROR(
+      dataset_->MakeIterator(iterator_ctx_.get(), iterator_prefix, &iterator));
+  std::unique_ptr<SerializationContext> serialization_ctx;
+  TF_RETURN_IF_ERROR(CreateSerializationContext(&serialization_ctx));
+  bool end_of_sequence = false;
+  std::vector<Tensor> out_tensors;
+  int cur_iteration = 0;
+  auto expected_outputs_it = expected_outputs.begin();
+  for (int breakpoint : breakpoints) {
+    VariantTensorData data;
+    VariantTensorDataWriter writer(&data);
+    TF_EXPECT_OK(iterator->Save(serialization_ctx.get(), &writer));
+    TF_RETURN_IF_ERROR(writer.Flush());
+    VariantTensorDataReader reader(&data);
+    TF_EXPECT_OK(RestoreIterator(iterator_ctx_.get(), &reader, iterator_prefix,
+                                 *dataset_, &iterator));
+
+    while (cur_iteration <= breakpoint) {
+      TF_RETURN_IF_ERROR(iterator->GetNext(iterator_ctx_.get(), &out_tensors,
+                                           &end_of_sequence));
+      if (!end_of_sequence) {
+        EXPECT_NE(expected_outputs_it, expected_outputs.end());
+        TF_EXPECT_OK(ExpectEqual(out_tensors.back(), *expected_outputs_it));
+        expected_outputs_it++;
+      }
+      cur_iteration++;
+    }
+
+    if (breakpoint >= expected_outputs.size()) {
+      EXPECT_TRUE(end_of_sequence);
+      EXPECT_EQ(expected_outputs_it, expected_outputs.end());
+    } else {
+      EXPECT_FALSE(end_of_sequence);
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tf_adapter/tests/st/kernels/testcase/dataset/function_testlib.cc b/tf_adapter/tests/st/kernels/testcase/dataset/function_testlib.cc
new file mode 100644
index 000000000..ad82bb6f6
--- /dev/null
+++ b/tf_adapter/tests/st/kernels/testcase/dataset/function_testlib.cc
@@ -0,0 +1,649 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/function_testlib.h"
+
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/versions.pb.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/public/version.h"
+
+namespace tensorflow {
+namespace test {
+namespace function {
+
+typedef FunctionDefHelper FDH;
+
+GraphDef GDef(gtl::ArraySlice<NodeDef> nodes,
+              gtl::ArraySlice<FunctionDef> funcs) {
+  GraphDef g;
+  VersionDef* versions = g.mutable_versions();
+  versions->set_producer(TF_GRAPH_DEF_VERSION);
+  versions->set_min_consumer(TF_GRAPH_DEF_VERSION_MIN_CONSUMER);
+  for (const auto& n : nodes) {
+    *(g.add_node()) = n;
+  }
+  auto lib = g.mutable_library();
+  for (const auto& f : funcs) {
+    *(lib->add_function()) = f;
+  }
+  return g;
+}
+
+// Helper to construct a NodeDef.
+NodeDef NDef(StringPiece name, StringPiece op, gtl::ArraySlice<string> inputs,
+             gtl::ArraySlice<std::pair<string, FDH::AttrValueWrapper>> attrs,
+             const string& device) {
+  NodeDef n;
+  n.set_name(string(name));
+  n.set_op(string(op));
+  for (const auto& in : inputs) n.add_input(in);
+  n.set_device(device);
+  for (auto na : attrs) n.mutable_attr()->insert({na.first, na.second.proto});
+  return n;
+}
+
+FunctionDef NonZero() {
+  return FDH::Define(
+      // Name
+      "NonZero",
+      // Args
+      {"x:T"},
+      // Return values
+      {"y:T"},
+      // Attr def
+      {"T:{float, double, int32, int64, string}"},
+      // Nodes
+      {
+          {{"y"}, "Identity", {"x"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef IsZero() {
+  const Tensor kZero = test::AsScalar<int64>(0);
+  return FDH::Define(
+      // Name
+      "IsZero",
+      // Args
+      {"x: T"},
+      // Return values
+      {"equal: bool"},
+      // Attr def
+      {"T:{float, double, int32, int64, string}"},
+      {
+          {{"zero"}, "Const", {}, {{"value", kZero}, {"dtype", DT_INT64}}},
+          {{"cast"}, "Cast", {"zero"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"equal"}, "Equal", {"x", "cast"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef RandomUniform() {
+  const Tensor kZero = test::AsScalar<int64>(0);
+
+  return FDH::Define(
+      // Name
+      "RandomUniform",
+      // Args
+      {"x: T"},
+      // Return values
+      {"random_uniform: int64"},
+      // Attr def
+      {"T:{float, double, int32, int64, string}"},
+      {{{"random_uniform/shape"},
+        "Const",
+        {},
+        {{"value", kZero}, {"dtype", DT_INT64}}},
+       {{"random_uniform"},
+        "RandomUniform",
+        {"random_uniform/shape"},
+        {{"T", DT_INT32},
+         {"Tout", DT_FLOAT},
+         {"seed", 87654321},
+         {"seed2", 42}}}});
+}
+
+FunctionDef XTimesTwo() {
+  const Tensor kTwo = test::AsScalar<int64>(2);
+  return FDH::Define(
+      // Name
+      "XTimesTwo",
+      // Args
+      {"x: T"},
+      // Return values
+      {"y: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}},
+          {{"scale"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"y"}, "Mul", {"x", "scale"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef TwoDeviceMult() {
+  const Tensor kTwo = test::AsScalar<int64>(2);
+  const Tensor kThree = test::AsScalar<int64>(3);
+  return FDH::Create(
+      // Name
+      "TwoDeviceMult",
+      // Args
+      {"x: T"},
+      // Return values
+      {"y_cpu: T", "y_gpu: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}},
+          {{"num_3"}, "Const", {}, {{"value", kThree}, {"dtype", DT_INT64}}},
+          {{"factor_2"},
+           "Cast",
+           {"num_2:output:0"},
+           {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"factor_3"},
+           "Cast",
+           {"num_3:output:0"},
+           {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"y_cpu"},
+           "Mul",
+           {"x", "factor_2:y:0"},
+           {{"T", "$T"}},
+           {},
+           "/device:CPU:0"},
+          {{"y_gpu"},
+           "Mul",
+           {"x", "factor_3:y:0"},
+           {{"T", "$T"}},
+           {},
+           "/device:GPU:0"},
+      },
+      {{"y_cpu", "y_cpu:z:0"}, {"y_gpu", "y_gpu:z:0"}});
+}
+
+FunctionDef TwoDeviceInputOutput() {
+  const Tensor kTwo = test::AsScalar<float>(2);
+  const Tensor kThree = test::AsScalar<float>(3);
+  return FDH::Create(
+      // Name
+      "TwoDeviceInputOutput",
+      // Args
+      {"x1: T", "x2: T"},
+      // Return values
+      {"y_cpu: T", "y_gpu: T"},
+      // Attr def
+      {"T: {float}"},
+      // Nodes
+      {
+          {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}},
+          {{"num_3"}, "Const", {}, {{"value", kThree}, {"dtype", DT_FLOAT}}},
+          {{"y_cpu"},
+           "Mul",
+           {"x1", "num_2:output:0"},
+           {{"T", "$T"}},
+           {},
+           "/device:CPU:0"},
+          {{"y_gpu"},
+           "Mul",
+           {"x2", "num_3:output:0"},
+           {{"T", "$T"}},
+           {},
+           "/device:GPU:0"},
+      },
+      {{"y_cpu", "y_cpu:z:0"}, {"y_gpu", "y_gpu:z:0"}});
+}
+
+FunctionDef FuncWithListInput() {
+  const Tensor kTwo = test::AsScalar<float>(2);
+  return FDH::Create(
+      // Name
+      "FuncWithListInput",
+      // Args
+      {"x1: N * T"},
+      // Return values
+      {},
+      // Attr def
+      {"T: {float}", "N: int >= 1"},
+      // Nodes
+      {
+          {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}},
+      },
+      {});
+}
+
+FunctionDef FuncWithListOutput() {
+  const Tensor kTwo = test::AsScalar<float>(2);
+  return FDH::Create(
+      // Name
+      "FuncWithListOutput",
+      // Args
+      {},
+      // Return values
+      {"y: N * T"},
+      // Attr def
+      {"T: {float}", "N: int >= 1"},
+      // Nodes
+      {
+          {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}},
+      },
+      {{"y", "num_2:output:0"}});
+}
+
+FunctionDef XAddX() {
+  return FDH::Define(
+      // Name
+      "XAddX",
+      // Args
+      {"x: T"},
+      // Return values
+      {"y: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"y"}, "Add", {"x", "x"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef XAddY() {
+  return FDH::Define(
+      // Name
+      "XAddY",
+      // Args
+      {"x: T", "y: T"},
+      // Return values
+      {"z: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"z"}, "Add", {"x", "y"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef XTimesTwoInt32() {
+  const Tensor kTwo = test::AsScalar<int64>(2);
+  return FDH::Define(
+      // Name
+      "XTimesTwoInt32",
+      // Args
+      {"x: int32"},
+      // Return values
+      {"y: int32"}, {},
+      // Nodes
+      {
+          {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}},
+          {{"scale"},
+           "Cast",
+           {"two"},
+           {{"SrcT", DT_INT64}, {"DstT", DT_INT32}}},
+          {{"y"}, "Mul", {"x", "scale"}, {{"T", DT_INT32}}},
+      });
+}
+
+FunctionDef XTimesFour() {
+  return FDH::Create(
+      // Name
+      "XTimesFour",
+      // Args
+      {"x: T"},
+      // Return values
+      {"y: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"x2"}, "XTimesTwo", {"x"}, {{"T", "$T"}}},
+          {{"y"}, "XTimesTwo", {"x2:y:0"}, {{"T", "$T"}}},
+      },
+      {{"y", "y:y:0"}});
+}
+
+FunctionDef XTimes16() {
+  return FDH::Create(
+      // Name
+      "XTimes16",
+      // Args
+      {"x: T"},
+      // Return values
+      {"y: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"x4"}, "XTimesFour", {"x"}, {{"T", "$T"}}},
+          {{"y"}, "XTimesFour", {"x4:y:0"}, {{"T", "$T"}}},
+      },
+      {{"y", "y:y:0"}});
+}
+
+FunctionDef WXPlusB() {
+  return FDH::Define(
+      // Name
+      "WXPlusB",
+      // Args
+      {"w: T", "x: T", "b: T"},
+      // Return values
+      {"y: T"},
+      // Attr def
+      {"T: {float, double}"},
+      // Nodes
+      {{{"mm"},
+        "MatMul",
+        {"w", "x"},
+        {{"T", "$T"},
+         {"transpose_a", false},
+         {"transpose_b", false},
+         {"_kernel", "eigen"}}},
+       {{"y"}, "Add", {"mm", "b"}, {{"T", "$T"}}}});
+}
+
+FunctionDef Swap() {
+  return FDH::Define(
+      // Name
+      "Swap",
+      // Args
+      {"i0: T", "i1: T"},
+      // Return values
+      {"o0: T", "o1: T"},
+      // Attr def
+      {"T: {float, double, resource}"},
+      // Nodes
+      {{{"o0"}, "Identity", {"i1"}, {{"T", "$T"}}},
+       {{"o1"}, "Identity", {"i0"}, {{"T", "$T"}}}});
+}
+
+FunctionDef EmptyBodySwap() {
+  return FDH::Create(
+      // Name
+      "EmptyBodySwap",
+      // Args
+      {"i0: T", "i1: T"},
+      // Return values
+      {"o0: T", "o1: T"},
+      // Attr def
+      {"T: {float, double, resource}"},
+      // Nodes
+      {},
+      // Output mapping
+      {{"o0", "i1"}, {"o1", "i0"}});
+}
+
+FunctionDef ResourceOutput() {
+  const Tensor kTwo = test::AsScalar<float>(2);
+  return FDH::Create(
+      // Name
+      "ResourceOutput",
+      // Args
+      {"x: float", "y: resource"},
+      // Return values
+      {"y_out: resource", "two_x: float"},
+      // Attr def
+      {},
+      // Nodes
+      {
+          {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}},
+          {{"mul"}, "Mul", {"x", "two:output:0"}, {{"T", DT_FLOAT}}, {}},
+      },
+      {{"y_out", "y"}, {"two_x", "mul:z:0"}});
+}
+
+FunctionDef ResourceIdentity() {
+  return FDH::Create(
+      // Name
+      "ResourceIdentity",
+      // Args
+      {"x: resource"},
+      // Return values
+      {"y: resource"},
+      // Attr def
+      {},
+      // Nodes
+      {},
+      // Output mapping
+      {{"y", "x"}});
+}
+
+FunctionDef ReadResourceVariable() {
+  return FDH::Create(
+      // Name
+      "ReadResourceVariable",
+      // Args
+      {"x: resource"},
+      // Return values
+      {"y: float"},
+      // Attr def
+      {},
+      // Nodes
+      {
+          {{"read"}, "ReadVariableOp", {"x"}, {{"dtype", DT_FLOAT}}, {}},
+      },
+      {{"y", "read:value:0"}});
+}
+
+FunctionDef InvalidControlFlow() {
+  return FDH::Create(
+      // Name
+      "InvalidControlFlow",
+      // Args
+      {"i: int32"},
+      // Return values
+      {"o: int32"},
+      // Attr def
+      {},
+      // Nodes
+      {{{"enter"}, "Enter", {"i"}, {{"T", DT_INT32}, {"frame_name", "while"}}},
+       {{"add"}, "Add", {"enter:output", "i"}, {{"T", DT_INT32}}}},
+      // Output mapping
+      {{"o", "add:z"}});
+}
+
+FunctionDef LessThanOrEqualToN(int64 N) {
+  const Tensor kN = test::AsScalar<int64>(N);
+  return FDH::Define(
+      // Name
+      "LessThanOrEqualToN",
+      // Args
+      {"x: T"},
+      // Return values
+      {"z: bool"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"N"}, "Const", {}, {{"value", kN}, {"dtype", DT_INT64}}},
+          {{"y"}, "Cast", {"N"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"z"}, "LessEqual", {"x", "y"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef XPlusOneXTimesY() {
+  const Tensor kOne = test::AsScalar<int64>(1);
+  return FDH::Define(
+      // Name
+      "XPlusOneXTimesY",
+      // Args
+      {"x: T", "y: T"},
+      // Return values
+      {"s: T", "t: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {{{"one"}, "Const", {}, {{"value", kOne}, {"dtype", DT_INT64}}},
+       {{"increment"}, "Cast", {"one"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+       {{"s"}, "Add", {"x", "increment"}, {{"T", "$T"}}},
+       {{"t"}, "Mul", {"x", "y"}, {{"T", "$T"}}}});
+}
+
+FunctionDef XYXLessThanOrEqualToN(int64 N) {
+  const Tensor kN = test::AsScalar<int64>(N);
+  return FDH::Define(
+      // Name
+      "XYXLessThanOrEqualToN",
+      // Args
+      {"x: T", "y: T"},
+      // Return values
+      {"z: bool"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"N"}, "Const", {}, {{"value", kN}, {"dtype", DT_INT64}}},
+          {{"N1"}, "Cast", {"N"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"z"}, "LessEqual", {"x", "N1"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef RandomUniformLess() {
+  const Tensor kZero = test::AsScalar<int32>(0);
+  const Tensor kOne = test::AsScalar<int32>(1);
+  const Tensor k005 = test::AsScalar<float>(0.05);
+
+  return FDH::Define(
+      // Name
+      "RandomUniformLess",
+      // Args
+      {"arg0: int64"},
+      // Return values
+      {"strided_slice: bool"},
+      // Attr def
+      {"T:{float, double, int32, int64, string}"},
+      {{{"random_uniform/shape"},
+        "Const",
+        {},
+        {{"value", kZero}, {"dtype", DT_INT32}}},
+
+       {{"random_uniform/RandomUniform"},
+        "RandomUniform",
+        {"random_uniform/shape"},
+        {{"T", DT_INT32}, {"Tout", DT_FLOAT}, {"seed", 0}, {"seed2", 0}}},
+
+       {{"Less/y"}, "Const", {}, {{"value", k005}, {"dtype", DT_FLOAT}}},
+
+       {{"Less"},
+        "Less",
+        {"random_uniform/RandomUniform", "Less/y"},
+        {{"T", DT_FLOAT}}},
+
+       {{"strided_slice/stack"},
+        "Const",
+        {},
+        {{"value", kZero}, {"dtype", DT_INT32}}},
+
+       {{"strided_slice/stack_1"},
+        "Const",
+        {},
+        {{"value", kOne}, {"dtype", DT_INT32}}},
+
+       {{"strided_slice/stack_2"},
+        "Const",
+        {},
+        {{"value", kOne}, {"dtype", DT_INT32}}},
+
+       {{"strided_slice"},
+        "StridedSlice",
+        {"Less", "strided_slice/stack", "strided_slice/stack_1",
+         "strided_slice/stack_2"},
+        {{"Index", DT_INT32},
+         {"T", DT_BOOL},
+         {"begin_mask", 0},
+         {"ellipsis_mask", 0},
+         {"end_mask", 0},
+         {"new_axis_mask", 0},
+         {"shrink_axis_mask", 0}}}});
+}
+
+FunctionDef MakeRangeDataset() {
+  return FDH::Define(
+      // Name
+      "MakeRangeDataset",
+      // Args
+      {"start: int64", "stop: int64", "step: int64"},
+      // Return values
+      {"y:variant"},
+      // Attr def
+      {"output_types: list(type) >= 1", "output_shapes: list(shape) >= 1"},
+      // Nodes
+      {{{"y"},
+        "RangeDataset",
+        {"start", "stop", "step"},
+        {{"output_types", "$output_types"},
+         {"output_shapes", "$output_shapes"}}}});
+}
+
+FunctionDef MakeTakeDataset() {
+  return FDH::Define(
+      // Name
+      "TakeDataset",
+      // Args
+      {"input_dataset: variant", "count: int64"},
+      // Return values
+      {"y:variant"},
+      // Attr def
+      {"output_types: list(type) >= 1", "output_shapes: list(shape) >= 1"},
+      // Nodes
+      {{{"y"},
+        "TakeDataset",
+        {"input_dataset", "count"},
+        {{"output_types", "$output_types"},
+         {"output_shapes", "$output_shapes"}}}});
+}
+
+FunctionDef MakeTensorSliceDataset() {
+  return FDH::Define(
+      // Name
+      "MakeTensorSliceDataset",
+      // Args
+      {"x: Toutput_types"},
+      // Return values
+      {"y: variant"},
+      // Attr def
+      {"Toutput_types: list(type) >= 1", "output_shapes: list(shape) >= 1"},
+      // Nodes
+      {{{"y"},
+        "TensorSliceDataset",
+        {"x"},
+        {{"Toutput_types", "$Toutput_types"},
+         {"output_shapes", "$output_shapes"}}}});
+}
+
+FunctionDef Unique() {
+  return FDH::Create(
+      // Name
+      "GetUnique",
+      // Args
+      {"x:T"},
+      // Return values
+      {"y:T", "idx: out_idx"},
+      // Attr def
+      {"T: type", "out_idx: {int32, int64} = DT_INT32"},
+      // Nodes
+      {
+          {{"result"}, "Unique", {"x"}, {{"T", "$T"}, {"out_idx", "$out_idx"}}},
+      },
+      {{"y", "result:y:0"}, {"idx", "result:idx:0"}});
+}
+
+void FunctionTestSchedClosure(std::function<void()> fn) {
+  static thread::ThreadPool* w =
+      new thread::ThreadPool(Env::Default(), "Test", 8);
+  w->Schedule(std::move(fn));
+}
+
+}  // end namespace function
+}  // end namespace test
+}  // end namespace tensorflow
diff --git a/tf_adapter/tests/st/kernels/testcase/dataset/host_queue_dats_set_st.cc b/tf_adapter/tests/st/kernels/testcase/dataset/host_queue_dats_set_st.cc
new file mode 100644
index 000000000..05088e851
--- /dev/null
+++ b/tf_adapter/tests/st/kernels/testcase/dataset/host_queue_dats_set_st.cc
@@ -0,0 +1,203 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#define protected public
+#include "gtest/gtest.h"
+#include "tensorflow/core/graph/graph_def_builder.h"
+#include "tensorflow/core/kernels/data/dataset_test_base.h"
+class HostQueueDatasetOp;
+namespace tensorflow {
+namespace data {
+namespace {
+
+static constexpr char kNodeName[] = "host_queue_dataset";
+static constexpr const char *const kChannelName = "channel_name";
+static constexpr const char *const kOutputTypes = "output_types";
+static constexpr const char *const kOutputShapes = "output_shapes";
+
+class HostQueueDatasetOpTest : public DatasetOpsTestBase {
+ protected:
+  // Creates `TensorSliceDataset` variant tensor from the input vector of
+  // tensors.
+  Status CreateTensorSliceDatasetTensor(
+      std::vector<Tensor> *const tensor_vector, Tensor *dataset_tensor) {
+    DatasetBase *tensor_slice_dataset;
+    TF_RETURN_IF_ERROR(CreateTensorSliceDataset(
+        "tensor_slice_node", tensor_vector, &tensor_slice_dataset));
+    TF_RETURN_IF_ERROR(
+        StoreDatasetInVariantTensor(tensor_slice_dataset, dataset_tensor));
+    return Status::OK();
+  }
+
+  // Create a new `HostQueueDataset` op kernel.
+  Status CreateHostQueueDatasetKernel(
+      const DataTypeVector &output_types,
+      const std::vector<PartialTensorShape> &output_shapes,
+      std::unique_ptr<OpKernel> *op_kernel, std::string _local_rank_id) {
+    name_utils::OpNameParams params;
+
+    NodeDef node_def =
+        test::function::NDef(kNodeName, name_utils::OpName("HostQueue", params),
+                             {"geop_dataset", "input_dataset"},
+                             {{"channel_name", "channel_001"},
+                              {"output_types", output_types},
+                              {"_local_rank_id", _local_rank_id},
+                              {"_local_device_list", "{0,-1}"},
+                              {"output_shapes", output_shapes}});
+    TF_RETURN_IF_ERROR(CreateOpKernel(node_def, op_kernel));
+    return Status::OK();
+  }
+
+  // Create a new `HostQueueDataset` op kernel context.
+  Status CreateHostQueueDatasetContext(
+      OpKernel *op_kernel, gtl::InlinedVector<TensorValue, 4> *const inputs,
+      std::unique_ptr<OpKernelContext> *context) {
+    TF_RETURN_IF_ERROR(CheckOpKernelInput(*op_kernel, *inputs));
+    TF_RETURN_IF_ERROR(CreateOpKernelContext(op_kernel, inputs, context));
+    return Status::OK();
+  }
+
+ protected:
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+};
+
+struct TestCase {
+  std::vector<Tensor> input_tensors;
+  std::vector<Tensor> expected_outputs;
+  DataTypeVector expected_output_dtypes;
+  std::vector<PartialTensorShape> expected_output_shapes;
+};
+
+TestCase NormalizeTestCase() {
+  return {
+      /// input_tensors , expected_outputs , expected_output_dtypes,
+      /// expected_output_shapes
+      {CreateTensor<int64>(TensorShape{10, 1}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9})},
+      {CreateTensor<int64>(TensorShape{1}, {0})},
+      {DT_INT64},
+      {PartialTensorShape({1})},
+  };
+}
+
+TEST_F(HostQueueDatasetOpTest, iterator_getnext) {
+  int thread_num = 2, cpu_num = 2;
+  TF_ASSERT_OK(InitThreadPool(thread_num));
+  TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
+
+  const TestCase &test_case = NormalizeTestCase();
+  Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({}));
+  std::vector<Tensor> inputs_for_tensor_slice_dataset = test_case.input_tensors;
+  TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset,
+                                              &tensor_slice_dataset_tensor));
+
+  gtl::InlinedVector<TensorValue, 4> inputs_for_host_queue_dataset(
+      {TensorValue(&tensor_slice_dataset_tensor),
+       TensorValue(&tensor_slice_dataset_tensor)});
+
+  std::unique_ptr<OpKernel> host_queue_dataset_kernel;
+  TF_ASSERT_OK(CreateHostQueueDatasetKernel(test_case.expected_output_dtypes,
+                                            test_case.expected_output_shapes,
+                                            &host_queue_dataset_kernel, "-1"));
+  std::unique_ptr<OpKernelContext> host_queue_dataset_context;
+  TF_ASSERT_OK(CreateHostQueueDatasetContext(host_queue_dataset_kernel.get(),
+                                             &inputs_for_host_queue_dataset,
+                                             &host_queue_dataset_context));
+  DatasetBase *host_queue_dataset;
+  TF_ASSERT_OK(CreateDataset(host_queue_dataset_kernel.get(),
+                             host_queue_dataset_context.get(),
+                             &host_queue_dataset));
+  core::ScopedUnref scoped_unref(host_queue_dataset);
+
+  EXPECT_EQ(host_queue_dataset->node_name(), kNodeName);
+
+  host_queue_dataset->output_dtypes();
+  host_queue_dataset->output_shapes();
+  host_queue_dataset->DebugString();
+
+  SerializationContext context(SerializationContext::Params{});
+  GraphDefBuilder b;
+  DatasetBase::DatasetGraphDefBuilder db(&b);
+  Node *output;
+  host_queue_dataset->AsGraphDefInternal(&context, &db, &output);
+
+  std::unique_ptr<IteratorContext> iterator_context;
+  TF_ASSERT_OK(CreateIteratorContext(host_queue_dataset_context.get(),
+                                     &iterator_context));
+  std::unique_ptr<IteratorBase> iterator;
+  TF_ASSERT_OK(host_queue_dataset->MakeIterator(iterator_context.get(),
+                                                "Iterator", &iterator));
+
+  bool end_of_sequence = false;
+  std::vector<Tensor> out_tensors;
+  sleep(2);
+  TF_EXPECT_OK(iterator->GetNext(iterator_context.get(), &out_tensors,
+                                 &end_of_sequence));
+}
+
+TEST_F(HostQueueDatasetOpTest, iterator_getnext02) {
+  int thread_num = 2, cpu_num = 2;
+  TF_ASSERT_OK(InitThreadPool(thread_num));
+  TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
+
+  const TestCase &test_case = NormalizeTestCase();
+  Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({}));
+  std::vector<Tensor> inputs_for_tensor_slice_dataset = test_case.input_tensors;
+  TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset,
+                                              &tensor_slice_dataset_tensor));
+
+  gtl::InlinedVector<TensorValue, 4> inputs_for_host_queue_dataset(
+      {TensorValue(&tensor_slice_dataset_tensor),
+       TensorValue(&tensor_slice_dataset_tensor)});
+
+  std::unique_ptr<OpKernel> host_queue_dataset_kernel;
+  TF_ASSERT_OK(CreateHostQueueDatasetKernel(test_case.expected_output_dtypes,
+                                            test_case.expected_output_shapes,
+                                            &host_queue_dataset_kernel, "0"));
+  std::unique_ptr<OpKernelContext> host_queue_dataset_context;
+  TF_ASSERT_OK(CreateHostQueueDatasetContext(host_queue_dataset_kernel.get(),
+                                             &inputs_for_host_queue_dataset,
+                                             &host_queue_dataset_context));
+  DatasetBase *host_queue_dataset;
+  TF_ASSERT_OK(CreateDataset(host_queue_dataset_kernel.get(),
+                             host_queue_dataset_context.get(),
+                             &host_queue_dataset));
+  core::ScopedUnref scoped_unref(host_queue_dataset);
+
+  EXPECT_EQ(host_queue_dataset->node_name(), kNodeName);
+
+  host_queue_dataset->output_dtypes();
+  host_queue_dataset->output_shapes();
+  host_queue_dataset->DebugString();
+
+  SerializationContext context(SerializationContext::Params{});
+  GraphDefBuilder b;
+  DatasetBase::DatasetGraphDefBuilder db(&b);
+  Node *output;
+  host_queue_dataset->AsGraphDefInternal(&context, &db, &output);
+
+  std::unique_ptr<IteratorContext> iterator_context;
+  TF_ASSERT_OK(CreateIteratorContext(host_queue_dataset_context.get(),
+                                     &iterator_context));
+  std::unique_ptr<IteratorBase> iterator;
+  TF_ASSERT_OK(host_queue_dataset->MakeIterator(iterator_context.get(),
+                                                "Iterator", &iterator));
+
+  bool end_of_sequence = false;
+  std::vector<Tensor> out_tensors;
+  sleep(2);
+  TF_EXPECT_OK(iterator->GetNext(iterator_context.get(), &out_tensors,
+                                 &end_of_sequence));
+}
+
+}  // namespace
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc
index 2645fee89..6bc6eddec 100644
--- a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc
+++ b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc
@@ -142,6 +142,12 @@ TEST_F(GeOpTest, GeOpDynamicInputTest) {
   EXPECT_TRUE(attrs.find("_dynamic_input") != attrs.end());
   EXPECT_TRUE(!attrs["_dynamic_input"].s().empty());
 }
+TEST_F(GeOpTest, GeOpDynamicInputGetNextTest) {
+  NodeDef node_def;
+  std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_input_lazy_recompile.pbtxt";
+  gtl::InlinedVector<TensorValue, 4> inputs;
+  EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp11_0").ok());
+}
 TEST_F(GeOpTest, GeOpDynamicInput1Test) {
   NodeDef node_def;
   std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_execute.pbtxt";
@@ -294,6 +300,11 @@ TEST_F(GeOpTest, GeOpNpuOnnxGraphOpNoModelTest) {
   gtl::InlinedVector<TensorValue, 4> inputs{TensorValue(&in)};
   EXPECT_TRUE(GeOpRunGraphAsync(grph_pbtxt_path, inputs, node_def, "GeOp91_0").ok());
 }
-
+TEST_F(GeOpTest, GeOpDpOpTest) {
+  NodeDef node_def;
+  std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dpop.pbtxt";
+  gtl::InlinedVector<TensorValue, 4> inputs;
+  EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0_dp").ok());
+}
 }
 } //end tensorflow
\ No newline at end of file
diff --git a/tf_adapter/tests/st/kernels/testcase/infeed_outfeed_test.cc b/tf_adapter/tests/st/kernels/testcase/infeed_outfeed_test.cc
new file mode 100644
index 000000000..21bc49f19
--- /dev/null
+++ b/tf_adapter/tests/st/kernels/testcase/infeed_outfeed_test.cc
@@ -0,0 +1,76 @@
+#include "tensorflow/core/common_runtime/device_factory.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/public/version.h"
+#include <stdlib.h>
+#include "gtest/gtest.h"
+
+
+namespace tensorflow {
+namespace {
+
+#define TF_ASSERT_OK(statement) \
+  ASSERT_EQ(::tensorflow::Status::OK(), (statement))
+
+#define TF_EXPECT_OK(statement) \
+  EXPECT_EQ(::tensorflow::Status::OK(), (statement))
+
+class DummyDevice : public DeviceBase {
+ public:
+  DummyDevice(Env* env, bool save) : DeviceBase(env), save_(save) {}
+  bool RequiresRecordingAccessedTensors() const override { return save_; }
+  Allocator* GetAllocator(AllocatorAttributes /*attr*/) override { return cpu_allocator(); }
+ private:
+  bool save_;
+};
+}
+class InfeedOutfeedTest : public testing::Test {
+ protected:
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+};
+
+TEST_F(InfeedOutfeedTest, LogSummaryTest)  {
+  DataType data_type = DT_INT32;
+  std::initializer_list<int64> dims = {};
+  TensorShapeProto shape_proto;
+  TensorShape(dims).AsProto(&shape_proto);
+
+  std::string channel_name = "_npu_log";
+
+  NodeDef outfeed_node;
+  tensorflow::AttrValue output_shapes;
+  tensorflow::AttrValue output_types;
+  *(output_shapes.mutable_list()->add_shape()) = shape_proto;
+  *(output_shapes.mutable_list()->add_shape()) = shape_proto;
+  output_types.mutable_list()->add_type(DT_STRING);
+  output_types.mutable_list()->add_type(DT_INT32);
+  TF_ASSERT_OK(NodeDefBuilder("out_feed", "OutfeedDequeueOp")
+                   .Attr("channel_name", channel_name)
+                   .Attr("output_types", output_types)
+                   .Attr("output_shapes", output_shapes)
+                   .Finalize(&outfeed_node));
+
+  DeviceType device_type = DEVICE_CPU;
+  Env* env = Env::Default();
+  auto device = absl::make_unique<DummyDevice>(env, false);
+
+  Status status;
+  std::unique_ptr<OpKernel> op(CreateOpKernel(device_type, device.get(),
+                                              cpu_allocator(), outfeed_node,
+                                              TF_GRAPH_DEF_VERSION, &status));
+  TF_ASSERT_OK(status);
+
+  OpKernelContext::Params params;
+  params.device = device.get();
+  params.op_kernel = op.get();
+  std::unique_ptr<CancellationManager> cancellation_manager = absl::make_unique<CancellationManager>();
+  params.cancellation_manager = cancellation_manager.get();
+
+  OpKernelContext ctx(&params);
+  op->Compute(&ctx);
+  TF_EXPECT_OK(ctx.status());
+
+}
+} //end tensorflow
\ No newline at end of file
diff --git a/tf_adapter/tests/st/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc b/tf_adapter/tests/st/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc
index 5a69c0b3e..b77674cfb 100644
--- a/tf_adapter/tests/st/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc
+++ b/tf_adapter/tests/st/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc
@@ -6,6 +6,7 @@
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
+#include <stdlib.h>
 
 namespace tensorflow {
 namespace {
@@ -95,5 +96,16 @@ TEST_F(DpOptimizationPassTest, DatasetNotInDeviceTest) {
     "HostQueueDataset->DPGroupDataset;GEOPDataset->HostQueueDataset;DPGroupDataset->MakeIterator";
   EXPECT_EQ(DoRunDpOptimizationPassTest(), target_graph);
 }
+TEST_F(DpOptimizationPassTest, NewDatasetNotInDeviceTest) {
+  string org_graph_def_path = "tf_adapter/tests/ut/optimizers/pbtxt/dp_test_no_dataset_in_device.pbtxt";
+  setenv("IS_NEW", "1", true);
+  InitGraph(org_graph_def_path);
+  std::string target_graph = "Const->TensorSliceDataset;TensorSliceDataset->BatchDatasetV2;Const->BatchDatasetV2:1;"\
+      "Const->BatchDatasetV2:2;BatchDatasetV2->RepeatDataset;Const->RepeatDataset:1;RepeatDataset->OptimizeDataset;"\
+      "Const->OptimizeDataset:1;OptimizeDataset->ModelDataset;IteratorV2->MakeIterator:1;ModelDataset->HostQueueDataset:1;"\
+      "HostQueueDataset->DPGroupDataset;GEOPDataset->HostQueueDataset;DPGroupDataset->MakeIterator";
+  EXPECT_EQ(DoRunDpOptimizationPassTest(), target_graph);
+  unsetenv("IS_NEW");
+}
 } // end namespace
 } // end tensorflow
diff --git a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc
index 0d0b7c0a0..0ad6bfb3c 100644
--- a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc
+++ b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc
@@ -82,7 +82,6 @@ TEST_F(GePluginTest, MallocSharedMemOKTest) {
   EXPECT_EQ(ret, 0);
 }
 TEST_F(GePluginTest, NpuCloseTest) {
-  NpuAttrs::SetUseTdtStatus(0, true);
   NpuClose();
 }
 TEST_F(GePluginTest, RdmaInitAndRegisterFail1Test) {
diff --git a/tf_adapter/tests/ut/CMakeLists.txt b/tf_adapter/tests/ut/CMakeLists.txt
index 0bc4fa6ed..f8e65d2c5 100644
--- a/tf_adapter/tests/ut/CMakeLists.txt
+++ b/tf_adapter/tests/ut/CMakeLists.txt
@@ -23,9 +23,11 @@ file(GLOB_RECURSE UT_SOURCES
     ${TFADAPTER_DIR}/tf_adapter/optimizers/*.cc
     ${TFADAPTER_DIR}/tf_adapter/util/*.cc
     ${TFADAPTER_DIR}/tf_adapter/kernels/*.cc
+
     "optimizers/testcase/*.cc"
     "util/testcase/*.cc"
     "kernels/testcase/*.cc"
+    "kernels/testcase/dataset/*.cc"
     "common/testcase/*.cc"
 )
 
@@ -61,7 +63,6 @@ add_dependencies(tfadapter_utest aoe_tuning)
 
 target_link_libraries(tfadapter_utest PUBLIC
     $<BUILD_INTERFACE:intf_pub>
-    gtest gtest_main c_sec mmpa_stub indextransform_stub alog_stub datatransfer_stub ge_runner_stub
+    gtest gtest_main c_sec mmpa_stub indextransform_stub alog_stub datatransfer_stub ge_runner_stub ascendcl_stub
     ${PYTHON_LIB_PATH} -lrt -ldl -lgcov
 )
-
diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_dpop.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_dpop.pbtxt
new file mode 100644
index 000000000..2b67e2a6d
--- /dev/null
+++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_dpop.pbtxt
@@ -0,0 +1,697 @@
+node {
+  name: "GeOp1_0_dp"
+  op: "GeOp"
+  device: "/job:localhost/replica:0/task:0/device:CPU:0"
+  attr {
+    key: "Tin"
+    value {
+      list {
+      }
+    }
+  }
+  attr {
+    key: "Tout"
+    value {
+      list {
+      }
+    }
+  }
+  attr {
+    key: "_NpuOptimizer"
+    value {
+      s: "NpuOptimizer"
+    }
+  }
+  attr {
+    key: "_auto_tune_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_buffer_optimize"
+    value {
+      s: "l2_optimize"
+    }
+  }
+  attr {
+    key: "_compress_weight_conf"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_debug_dir"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_distribute_config"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_do_npu_optimizer"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_dump_debug_mode"
+    value {
+      s: "all"
+    }
+  }
+  attr {
+    key: "_dump_mode"
+    value {
+      s: "output"
+    }
+  }
+  attr {
+    key: "_dump_path"
+    value {
+      s: "./"
+    }
+  }
+  attr {
+    key: "_dump_step"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_dynamic_dims"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_dynamic_graph_execute_mode"
+    value {
+      s: "dynamic_execute"
+    }
+  }
+  attr {
+    key: "_dynamic_input"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_dynamic_node_type"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_enable_compress_weight"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_enable_data_pre_proc"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_enable_dump"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_enable_dump_debug"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_enable_exception_dump"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_enable_scope_fusion_passes"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_enable_small_channel"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_fusion_switch_file"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_graph_run_mode"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_hcom_multi_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_hcom_parallel"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_in_out_pair"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_in_out_pair_flag"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_input_shape"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_is_tailing_optimization"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_iterations_per_loop"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_job"
+    value {
+      s: "localhost"
+    }
+  }
+  attr {
+    key: "_local_device_list"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_local_rank_id"
+    value {
+      s: "-1"
+    }
+  }
+  attr {
+    key: "_lower_functional_ops"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_mix_compile_mode"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_mstune_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_compiler_cache_dir"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_compiler_cache_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_debug_level"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_op_select_implmode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_op_tune_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_optypelist_for_implmode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_precision_mode"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_profiling_mode"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_profiling_options"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_session_device_id"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_stream_max_parallel_num"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "_task_index"
+    value {
+      s: "0"
+    }
+  }
+  attr {
+    key: "_use_off_line"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_variable_format_optimize"
+    value {
+      s: "1"
+    }
+  }
+  attr {
+    key: "_work_path"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "data_format"
+    value {
+      s: "NHWC"
+    }
+  }
+  attr {
+    key: "function"
+    value {
+      func {
+        name: "geop_function_D9x45pM0kZ0"
+      }
+    }
+  }
+}
+library {
+  function {
+    signature {
+      name: "geop_function_D9x45pM0kZ0"
+    }
+    node_def {
+      name: "dpop_function_FFvj93e0XnN"
+      op: "DPOP"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "Tin"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "data_format"
+        value {
+          s: "NHWC"
+        }
+      }
+      attr {
+        key: "func_def"
+        value {
+          s: "\n\271\001\n;\n\037__inference_Dataset_map_func_11\022\n\n\006args_0\030\002\032\014\n\010identity\030\002\0326\n\005add/y\022\005Const*\031\n\005value\022\020B\016\010\002\022\0002\010\000\000\000\000\000\000\360?*\013\n\005dtype\022\0020\002\032-\n\003add\022\005AddV2\032\016add/y:output:0\032\006args_0*\007\n\001T\022\0020\002\"\023\n\010identity\022\007add:z:0\n\356\016\n\033\n\031dpop_function_FFvj93e0XnN\032p\n\nbatch_size\022\005Const\",/job:localhost/replica:0/task:0/device:CPU:0*\022\n\005value\022\tB\007\010\t\022\000R\001\002*\013\n\005dtype\022\0020\t2\014\n\nbatch_size\032x\n\016drop_remainder\022\005Const\",/job:localhost/replica:0/task:0/device:CPU:0*\022\n\005value\022\tB\007\010\n\022\000Z\001\000*\013\n\005dtype\022\0020\n2\020\n\016drop_remainder\032\365\002\n\nIteratorV2\022\nIteratorV2\",/job:localhost/replica:0/task:0/device:CPU:0*\033\n\024_iterations_per_loop\022\003\022\0011*\024\n\r_use_off_line\022\003\022\0011*\030\n\021_mix_compile_mode\022\003\022\0010*\021\n\013shared_name\022\002\022\000*\"\n\routput_shapes\022\021\n\017:\r\022\013\010\377\377\377\377\377\377\377\377\377\001*\037\n\006_class\022\025\n\023\022\021loc:@MakeIterator*\037\n\r_NpuOptimizer\022\016\022\014NpuOptimizer*\017\n\tcontainer\022\002\022\000*\025\n\014output_types\022\005\n\0032\001\002*\023\n\004_job\022\013\022\tlocalhost*\034\n\025_enable_data_pre_proc\022\003\022\00112\014\n\nIteratorV2\032\357\002\n\016BatchDatasetV2\022\016BatchDatasetV2\032^DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1:handle:0\032\023batch_size:output:0\032\027drop_remainder:output:0\",/job:localhost/replica:0/task:0/device:CPU:0*\017\n\007_kernel\022\004\022\002dp*\036\n\016_iterator_name\022\014\022\nIteratorV2*\025\n\014output_types\022\005\n\0032\001\002*\023\n\rparallel_copy\022\002(\000*\"\n\routput_shapes\022\021\n\017:\r\022\013\010\377\377\377\377\377\377\377\377\377\0012\020\n\016BatchDatasetV2\032\240\002\n\nMapDataset\022\nMapDataset\032\027BatchDatasetV2:handle:0\",/job:localhost/replica:0/task:0/device:CPU:0*\"\n\routput_shapes\022\021\n\017:\r\022\013\010\377\377\377\377\377\377\377\377\377\001*(\n\001f\022#R!\n\037__inference_Dataset_map_func_11*\036\n\030use_inter_op_parallelism\022\002(\001*\025\n\014output_types\022\005\n\0032\001\002*\020\n\nTarguments\022\002\n\000*\032\n\024preserve_cardinality\022\002(\0002\014\n\nMapDataset\032\265\001\n\014MakeIterator\022\014MakeIterator\032\023MapDataset:handle:0\032\023IteratorV2:handle:0\",/job:localhost/replica:0/task:0/device:CPU:0*\017\n\007_kernel\022\004\022\002dp*\036\n\016_iterator_name\022\014\022\nIteratorV22\016\n\014MakeIterator\032\235\003\nUDeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1\022\022DeviceQueueDataset\",/job:localhost/replica:0/task:0/device:CPU:0*\025\n\routput_shapes\022\004\n\002:\000*[\n\014channel_name\022K\022IQueue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1*\036\n\016_iterator_name\022\014\022\nIteratorV2*\025\n\014output_types\022\005\n\0032\001\0022W\nUDeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1"
+        }
+      }
+      attr {
+        key: "function"
+        value {
+          func {
+            name: "dpop_function_FFvj93e0XnN"
+          }
+        }
+      }
+    }
+  }
+  function {
+    signature {
+      name: "dpop_function_FFvj93e0XnN"
+    }
+    node_def {
+      name: "batch_size"
+      op: "Const"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_INT64
+            tensor_shape {
+            }
+            int64_val: 2
+          }
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "batch_size"
+      }
+    }
+    node_def {
+      name: "drop_remainder"
+      op: "Const"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "dtype"
+        value {
+          type: DT_BOOL
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_BOOL
+            tensor_shape {
+            }
+            bool_val: false
+          }
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "drop_remainder"
+      }
+    }
+    node_def {
+      name: "IteratorV2"
+      op: "IteratorV2"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "_NpuOptimizer"
+        value {
+          s: "NpuOptimizer"
+        }
+      }
+      attr {
+        key: "_class"
+        value {
+          list {
+            s: "loc:@MakeIterator"
+          }
+        }
+      }
+      attr {
+        key: "_enable_data_pre_proc"
+        value {
+          s: "1"
+        }
+      }
+      attr {
+        key: "_iterations_per_loop"
+        value {
+          s: "1"
+        }
+      }
+      attr {
+        key: "_job"
+        value {
+          s: "localhost"
+        }
+      }
+      attr {
+        key: "_mix_compile_mode"
+        value {
+          s: "0"
+        }
+      }
+      attr {
+        key: "_use_off_line"
+        value {
+          s: "1"
+        }
+      }
+      attr {
+        key: "container"
+        value {
+          s: ""
+        }
+      }
+      attr {
+        key: "output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "output_types"
+        value {
+          list {
+            type: DT_DOUBLE
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: ""
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "IteratorV2"
+      }
+    }
+    node_def {
+      name: "BatchDatasetV2"
+      op: "BatchDatasetV2"
+      input: "DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1:handle:0"
+      input: "batch_size:output:0"
+      input: "drop_remainder:output:0"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "_iterator_name"
+        value {
+          s: "IteratorV2"
+        }
+      }
+      attr {
+        key: "_kernel"
+        value {
+          s: "dp"
+        }
+      }
+      attr {
+        key: "output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "output_types"
+        value {
+          list {
+            type: DT_DOUBLE
+          }
+        }
+      }
+      attr {
+        key: "parallel_copy"
+        value {
+          b: false
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "BatchDatasetV2"
+      }
+    }
+    node_def {
+      name: "MapDataset"
+      op: "MapDataset"
+      input: "BatchDatasetV2:handle:0"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "Targuments"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference_Dataset_map_func_11"
+          }
+        }
+      }
+      attr {
+        key: "output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "output_types"
+        value {
+          list {
+            type: DT_DOUBLE
+          }
+        }
+      }
+      attr {
+        key: "preserve_cardinality"
+        value {
+          b: false
+        }
+      }
+      attr {
+        key: "use_inter_op_parallelism"
+        value {
+          b: true
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "MapDataset"
+      }
+    }
+    node_def {
+      name: "MakeIterator"
+      op: "MakeIterator"
+      input: "MapDataset:handle:0"
+      input: "IteratorV2:handle:0"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "_iterator_name"
+        value {
+          s: "IteratorV2"
+        }
+      }
+      attr {
+        key: "_kernel"
+        value {
+          s: "dp"
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "MakeIterator"
+      }
+    }
+    node_def {
+      name: "DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1"
+      op: "DeviceQueueDataset"
+      device: "/job:localhost/replica:0/task:0/device:CPU:0"
+      attr {
+        key: "_iterator_name"
+        value {
+          s: "IteratorV2"
+        }
+      }
+      attr {
+        key: "channel_name"
+        value {
+          s: "Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1"
+        }
+      }
+      attr {
+        key: "output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "output_types"
+        value {
+          list {
+            type: DT_DOUBLE
+          }
+        }
+      }
+      experimental_debug_info {
+        original_node_names: "DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1"
+      }
+    }
+  }
+}
+versions {
+  producer: 134
+}
diff --git a/tf_adapter/tests/ut/kernels/testcase/dataset/dataset_test_base.cc b/tf_adapter/tests/ut/kernels/testcase/dataset/dataset_test_base.cc
new file mode 100644
index 000000000..ba2f05661
--- /dev/null
+++ b/tf_adapter/tests/ut/kernels/testcase/dataset/dataset_test_base.cc
@@ -0,0 +1,704 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/data/dataset_test_base.h"
+
+#include "tensorflow/core/common_runtime/executor.h"
+#include "tensorflow/core/framework/cancellation.h"
+#include "tensorflow/core/framework/versions.pb.h"
+#include "tensorflow/core/lib/io/record_writer.h"
+#include "iostream"
+using namespace std;
+namespace tensorflow {
+namespace data {
+
+string ToString(CompressionType compression_type) {
+  switch (compression_type) {
+    case CompressionType::ZLIB:
+      return "ZLIB";
+    case CompressionType::GZIP:
+      return "GZIP";
+    case CompressionType::RAW:
+      return "RAW";
+    case CompressionType::UNCOMPRESSED:
+      return "";
+  }
+}
+
+io::ZlibCompressionOptions GetZlibCompressionOptions(
+    CompressionType compression_type) {
+  switch (compression_type) {
+    case CompressionType::ZLIB:
+      return io::ZlibCompressionOptions::DEFAULT();
+    case CompressionType::GZIP:
+      return io::ZlibCompressionOptions::GZIP();
+    case CompressionType::RAW:
+      return io::ZlibCompressionOptions::RAW();
+    case CompressionType::UNCOMPRESSED:
+      LOG(WARNING) << "ZlibCompressionOptions does not have an option for "
+                   << ToString(compression_type);
+      return io::ZlibCompressionOptions::DEFAULT();
+  }
+}
+
+Status WriteDataToFile(const string& filename, const char* data) {
+  return WriteDataToFile(filename, data, CompressionParams());
+}
+
+Status WriteDataToFile(const string& filename, const char* data,
+                       const CompressionParams& params) {
+  Env* env = Env::Default();
+  std::unique_ptr<WritableFile> file_writer;
+  TF_RETURN_IF_ERROR(env->NewWritableFile(filename, &file_writer));
+  if (params.compression_type == CompressionType::UNCOMPRESSED) {
+    TF_RETURN_IF_ERROR(file_writer->Append(data));
+  } else if (params.compression_type == CompressionType::ZLIB ||
+             params.compression_type == CompressionType::GZIP ||
+             params.compression_type == CompressionType::RAW) {
+    auto zlib_compression_options =
+        GetZlibCompressionOptions(params.compression_type);
+    io::ZlibOutputBuffer out(file_writer.get(), params.input_buffer_size,
+                             params.output_buffer_size,
+                             zlib_compression_options);
+    TF_RETURN_IF_ERROR(out.Init());
+    TF_RETURN_IF_ERROR(out.Append(data));
+    TF_RETURN_IF_ERROR(out.Flush());
+    TF_RETURN_IF_ERROR(out.Close());
+  } else {
+    return tensorflow::errors::InvalidArgument(
+        "Unsupported compression_type: ", ToString(params.compression_type));
+  }
+
+  TF_RETURN_IF_ERROR(file_writer->Flush());
+  TF_RETURN_IF_ERROR(file_writer->Close());
+
+  return Status::OK();
+}
+
+Status WriteDataToTFRecordFile(const string& filename,
+                               const std::vector<absl::string_view>& records,
+                               const CompressionParams& params) {
+  Env* env = Env::Default();
+  std::unique_ptr<WritableFile> file_writer;
+  TF_RETURN_IF_ERROR(env->NewWritableFile(filename, &file_writer));
+  auto options = io::RecordWriterOptions::CreateRecordWriterOptions(
+      ToString(params.compression_type));
+  options.zlib_options.input_buffer_size = params.input_buffer_size;
+  io::RecordWriter record_writer(file_writer.get(), options);
+  for (const auto& record : records) {
+    TF_RETURN_IF_ERROR(record_writer.WriteRecord(record));
+  }
+  TF_RETURN_IF_ERROR(record_writer.Flush());
+  TF_RETURN_IF_ERROR(record_writer.Close());
+  TF_RETURN_IF_ERROR(file_writer->Flush());
+  TF_RETURN_IF_ERROR(file_writer->Close());
+  return Status::OK();
+}
+
+template <typename T>
+Status IsEqual(const Tensor& t1, const Tensor& t2) {
+  if (t1.dtype() != t2.dtype()) {
+    return tensorflow::errors::Internal(
+        "Two tensors have different dtypes: ", DataTypeString(t1.dtype()),
+        " vs. ", DataTypeString(t2.dtype()));
+  }
+  if (!t1.IsSameSize(t2)) {
+    return tensorflow::errors::Internal(
+        "Two tensors have different shapes: ", t1.shape().DebugString(),
+        " vs. ", t2.shape().DebugString());
+  }
+
+  auto flat_t1 = t1.flat<T>();
+  auto flat_t2 = t2.flat<T>();
+  auto length = flat_t1.size();
+
+  for (int i = 0; i < length; ++i) {
+    if (flat_t1(i) != flat_t2(i)) {
+      return tensorflow::errors::Internal(
+          "Two tensors have different values "
+          "at [",
+          i, "]: ", flat_t1(i), " vs. ", flat_t2(i));
+    }
+  }
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::ExpectEqual(const Tensor& a, const Tensor& b) {
+  switch (a.dtype()) {
+#define CASE(DT)                           \
+  case DataTypeToEnum<DT>::value:          \
+    TF_RETURN_IF_ERROR(IsEqual<DT>(a, b)); \
+    break;
+    TF_CALL_NUMBER_TYPES(CASE);
+    TF_CALL_tstring(CASE);
+    TF_CALL_uint32(CASE);
+    TF_CALL_uint64(CASE);
+    // TODO(feihugis): figure out how to support variant tensors.
+#undef CASE
+    default:
+      return errors::Internal("Unsupported dtype: ", a.dtype());
+  }
+  return Status::OK();
+}
+
+template <typename T>
+bool compare(const Tensor& t1, const Tensor& t2) {
+  auto flat_t1 = t1.flat<T>();
+  auto flat_t2 = t2.flat<T>();
+  auto length = std::min(flat_t1.size(), flat_t2.size());
+  for (int i = 0; i < length; ++i) {
+    if (flat_t1(i) < flat_t2(i)) return true;
+    if (flat_t1(i) > flat_t2(i)) return false;
+  }
+  return flat_t1.size() < length;
+}
+
+Status DatasetOpsTestBase::ExpectEqual(std::vector<Tensor> produced_tensors,
+                                       std::vector<Tensor> expected_tensors,
+                                       bool compare_order) {
+  if (produced_tensors.size() != expected_tensors.size()) {
+    return Status(tensorflow::errors::Internal(
+        "The two tensor vectors have different size (", produced_tensors.size(),
+        " v.s. ", expected_tensors.size(), ")"));
+  }
+
+  if (produced_tensors.empty()) return Status::OK();
+  if (produced_tensors[0].dtype() != expected_tensors[0].dtype()) {
+    return Status(tensorflow::errors::Internal(
+        "The two tensor vectors have different dtypes (",
+        produced_tensors[0].dtype(), " v.s. ", expected_tensors[0].dtype(),
+        ")"));
+  }
+
+  if (!compare_order) {
+    const DataType& dtype = produced_tensors[0].dtype();
+    switch (dtype) {
+#define CASE(DT)                                                \
+  case DT:                                                      \
+    std::sort(produced_tensors.begin(), produced_tensors.end(), \
+              compare<EnumToDataType<DT>::Type>);               \
+    std::sort(expected_tensors.begin(), expected_tensors.end(), \
+              compare<EnumToDataType<DT>::Type>);               \
+    break;
+      CASE(DT_FLOAT);
+      CASE(DT_DOUBLE);
+      CASE(DT_INT32);
+      CASE(DT_UINT8);
+      CASE(DT_INT16);
+      CASE(DT_INT8);
+      CASE(DT_STRING);
+      CASE(DT_INT64);
+      CASE(DT_BOOL);
+      CASE(DT_QINT8);
+      CASE(DT_QUINT8);
+      CASE(DT_QINT32);
+      CASE(DT_QINT16);
+      CASE(DT_QUINT16);
+      CASE(DT_UINT16);
+      CASE(DT_HALF);
+      CASE(DT_UINT32);
+      CASE(DT_UINT64);
+      // TODO(feihugis): support other dtypes.
+#undef CASE
+      default:
+        return errors::Internal("Unsupported dtype: ", dtype);
+    }
+  }
+
+  for (int i = 0; i < produced_tensors.size(); ++i) {
+    TF_RETURN_IF_ERROR(DatasetOpsTestBase::ExpectEqual(produced_tensors[i],
+                                                       expected_tensors[i]));
+  }
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateTensorSliceDatasetKernel(
+    StringPiece node_name, const DataTypeVector& dtypes,
+    const std::vector<PartialTensorShape>& shapes,
+    std::unique_ptr<OpKernel>* tensor_slice_dataset_kernel) {
+  std::vector<string> components;
+  components.reserve(dtypes.size());
+  for (int i = 0; i < dtypes.size(); ++i) {
+    // Create the placeholder names for the input components of
+    // `TensorSliceDataset`.
+    components.emplace_back(strings::StrCat("component_", i));
+  }
+  NodeDef node_def = test::function::NDef(
+      node_name, "TensorSliceDataset", components,
+      {{"Toutput_types", dtypes}, {"output_shapes", shapes}});
+  TF_RETURN_IF_ERROR(CreateOpKernel(node_def, tensor_slice_dataset_kernel));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateTensorSliceDataset(
+    StringPiece node_name, std::vector<Tensor>* const components,
+    DatasetBase** tensor_slice_dataset) {
+  std::unique_ptr<OpKernel> tensor_slice_dataset_kernel;
+  DataTypeVector dtypes;
+  dtypes.reserve(components->size());
+  std::vector<PartialTensorShape> shapes;
+  shapes.reserve(components->size());
+  for (const auto& t : *components) {
+    dtypes.push_back(t.dtype());
+    gtl::InlinedVector<int64, 4> partial_dim_sizes;
+    for (int i = 1; i < t.dims(); ++i) {
+      partial_dim_sizes.push_back(t.dim_size(i));
+    }
+    shapes.emplace_back(std::move(partial_dim_sizes));
+  }
+  TF_RETURN_IF_ERROR(CreateTensorSliceDatasetKernel(
+      node_name, dtypes, shapes, &tensor_slice_dataset_kernel));
+  gtl::InlinedVector<TensorValue, 4> inputs;
+  for (auto& tensor : *components) {
+    inputs.emplace_back(&tensor);
+  }
+  TF_RETURN_IF_ERROR(CheckOpKernelInput(*tensor_slice_dataset_kernel, inputs));
+  std::unique_ptr<OpKernelContext> context;
+  TF_RETURN_IF_ERROR(CreateOpKernelContext(tensor_slice_dataset_kernel.get(),
+                                           &inputs, &context));
+  TF_RETURN_IF_ERROR(
+      RunOpKernel(tensor_slice_dataset_kernel.get(), context.get()));
+  TF_RETURN_IF_ERROR(
+      GetDatasetFromContext(context.get(), 0, tensor_slice_dataset));
+  return Status::OK();
+}
+
+// Create a `RangeDataset` dataset as a variant tensor.
+Status DatasetOpsTestBase::MakeRangeDataset(
+    const Tensor& start, const Tensor& stop, const Tensor& step,
+    const DataTypeVector& output_types,
+    const std::vector<PartialTensorShape>& output_shapes,
+    Tensor* range_dataset) {
+  GraphConstructorOptions graph_opts;
+  graph_opts.allow_internal_ops = true;
+  graph_opts.expect_device_spec = false;
+  TF_RETURN_IF_ERROR(
+      RunFunction(test::function::MakeRangeDataset(),
+                  /*attrs*/
+                  {{RangeDatasetOp::kOutputTypes, output_types},
+                   {RangeDatasetOp::kOutputShapes, output_shapes}},
+                  /*inputs*/ {start, stop, step}, graph_opts,
+                  /*rets*/ {range_dataset}));
+  return Status::OK();
+}
+
+// Create a `RangeDataset` dataset as a variant tensor.
+Status DatasetOpsTestBase::MakeRangeDataset(
+    const RangeDatasetParams& range_dataset_params, Tensor* range_dataset) {
+  GraphConstructorOptions graph_opts;
+  graph_opts.allow_internal_ops = true;
+  graph_opts.expect_device_spec = false;
+  TF_RETURN_IF_ERROR(RunFunction(
+      test::function::MakeRangeDataset(),
+      /*attrs*/
+      {{RangeDatasetOp::kOutputTypes, range_dataset_params.output_dtypes},
+       {RangeDatasetOp::kOutputShapes, range_dataset_params.output_shapes}},
+      /*inputs*/
+      {range_dataset_params.start, range_dataset_params.stop,
+       range_dataset_params.step},
+      graph_opts,
+      /*rets*/ {range_dataset}));
+  return Status::OK();
+}
+
+// Create a `TakeDataset` dataset as a variant tensor.
+Status DatasetOpsTestBase::MakeTakeDataset(
+    const Tensor& input_dataset, int64 count,
+    const DataTypeVector& output_types,
+    const std::vector<PartialTensorShape>& output_shapes,
+    Tensor* take_dataset) {
+  GraphConstructorOptions graph_opts;
+  graph_opts.allow_internal_ops = true;
+  graph_opts.expect_device_spec = false;
+
+  Tensor count_tensor = CreateTensor<int64>(TensorShape({}), {count});
+  TF_RETURN_IF_ERROR(
+      RunFunction(test::function::MakeTakeDataset(),
+                  /*attrs*/
+                  {{TakeDatasetOp::kOutputTypes, output_types},
+                   {TakeDatasetOp::kOutputShapes, output_shapes}},
+                  /*inputs*/ {input_dataset, count_tensor}, graph_opts,
+                  /*rets*/ {take_dataset}));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateOpKernel(
+    const NodeDef& node_def, std::unique_ptr<OpKernel>* op_kernel) {
+  OpKernel* kernel;
+  TF_RETURN_IF_ERROR(tensorflow::CreateOpKernel(device_type_, device_.get(),
+                                                allocator_, flr_, node_def,
+                                                TF_GRAPH_DEF_VERSION, &kernel));
+  op_kernel->reset(kernel);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateDatasetContext(
+    OpKernel* const dateset_kernel,
+    gtl::InlinedVector<TensorValue, 4>* const inputs,
+    std::unique_ptr<OpKernelContext>* dataset_context) {
+  TF_RETURN_IF_ERROR(CheckOpKernelInput(*dateset_kernel, *inputs));
+  TF_RETURN_IF_ERROR(
+      CreateOpKernelContext(dateset_kernel, inputs, dataset_context));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateDataset(OpKernel* kernel,
+                                         OpKernelContext* context,
+                                         DatasetBase** const dataset) {
+  TF_RETURN_IF_ERROR(RunOpKernel(kernel, context));
+  // Assume that DatasetOp has only one output.
+  DCHECK_EQ(context->num_outputs(), 1);
+  TF_RETURN_IF_ERROR(GetDatasetFromContext(context, 0, dataset));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::RestoreIterator(
+    IteratorContext* ctx, IteratorStateReader* reader,
+    const string& output_prefix, const DatasetBase& dataset,
+    std::unique_ptr<IteratorBase>* iterator) {
+  TF_RETURN_IF_ERROR(dataset.MakeIterator(ctx, output_prefix, iterator));
+  TF_RETURN_IF_ERROR((*iterator)->Restore(ctx, reader));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateIteratorContext(
+    OpKernelContext* const op_context,
+    std::unique_ptr<IteratorContext>* iterator_context) {
+  IteratorContext::Params params(op_context);
+  params.resource_mgr = op_context->resource_manager();
+  function_handle_cache_ = absl::make_unique<FunctionHandleCache>(flr_);
+  params.function_handle_cache = function_handle_cache_.get();
+  params.cancellation_manager = cancellation_manager_.get();
+  *iterator_context = absl::make_unique<IteratorContext>(params);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::GetDatasetFromContext(OpKernelContext* context,
+                                                 int output_index,
+                                                 DatasetBase** const dataset) {
+  Tensor* output = context->mutable_output(output_index);
+  Status status = GetDatasetFromVariantTensor(*output, dataset);
+  (*dataset)->Ref();
+  return status;
+}
+
+Status DatasetOpsTestBase::InitThreadPool(int thread_num) {
+  if (thread_num < 1) {
+    return errors::InvalidArgument(
+        "The `thread_num` argument should be positive but got: ", thread_num);
+  }
+  thread_pool_ = absl::make_unique<thread::ThreadPool>(
+      Env::Default(), ThreadOptions(), "test_thread_pool", thread_num);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::InitFunctionLibraryRuntime(
+    const std::vector<FunctionDef>& flib, int cpu_num) {
+  if (cpu_num < 1) {
+    return errors::InvalidArgument(
+        "The `cpu_num` argument should be positive but got: ", cpu_num);
+  }
+  SessionOptions options;
+  auto* device_count = options.config.mutable_device_count();
+  device_count->insert({"CPU", cpu_num});
+  std::vector<std::unique_ptr<Device>> devices;
+  TF_RETURN_IF_ERROR(DeviceFactory::AddDevices(
+      options, "/job:localhost/replica:0/task:0", &devices));
+  device_mgr_ = absl::make_unique<DeviceMgr>(std::move(devices));
+  resource_mgr_ = absl::make_unique<ResourceMgr>("default_container");
+
+  FunctionDefLibrary proto;
+  for (const auto& fdef : flib) *(proto.add_function()) = fdef;
+  lib_def_ =
+      absl::make_unique<FunctionLibraryDefinition>(OpRegistry::Global(), proto);
+
+  OptimizerOptions opts;
+  pflr_ = absl::make_unique<ProcessFunctionLibraryRuntime>(
+      device_mgr_.get(), Env::Default(), TF_GRAPH_DEF_VERSION, lib_def_.get(),
+      opts, thread_pool_.get(), nullptr /* cluster_flr */);
+  flr_ = pflr_->GetFLR("/job:localhost/replica:0/task:0/cpu:0");
+  if (thread_pool_ == nullptr) {
+    runner_ = [](std::function<void()> fn) { fn(); };
+  } else {
+    runner_ = [this](std::function<void()> fn) {
+      thread_pool_->Schedule(std::move(fn));
+    };
+  }
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::RunOpKernel(OpKernel* op_kernel,
+                                       OpKernelContext* context) {
+  device_->Compute(op_kernel, context);
+  return context->status();
+}
+
+Status DatasetOpsTestBase::RunFunction(
+    const FunctionDef& fdef, test::function::Attrs attrs,
+    const std::vector<Tensor>& args,
+    const GraphConstructorOptions& graph_options, std::vector<Tensor*> rets) {
+  std::unique_ptr<Executor> exec;
+  InstantiationResult result;
+  auto GetOpSig = [](const string& op, const OpDef** sig) {
+    return OpRegistry::Global()->LookUpOpDef(op, sig);
+  };
+  TF_RETURN_IF_ERROR(InstantiateFunction(fdef, attrs, GetOpSig, &result));
+
+  DataTypeVector arg_types = result.arg_types;
+  DataTypeVector ret_types = result.ret_types;
+
+  std::unique_ptr<Graph> g(new Graph(OpRegistry::Global()));
+  TF_RETURN_IF_ERROR(
+      ConvertNodeDefsToGraph(graph_options, result.nodes, g.get()));
+
+  const int version = g->versions().producer();
+  LocalExecutorParams params;
+  params.function_library = flr_;
+  params.device = device_.get();
+  params.create_kernel = [this, version](const NodeDef& ndef,
+                                         OpKernel** kernel) {
+    return CreateNonCachedKernel(device_.get(), this->flr_, ndef, version,
+                                 kernel);
+  };
+  params.delete_kernel = [](OpKernel* kernel) {
+    DeleteNonCachedKernel(kernel);
+  };
+  params.rendezvous_factory = [](const int64, const DeviceMgr* device_mgr,
+                                 Rendezvous** r) {
+    *r = new IntraProcessRendezvous(device_mgr);
+    return Status::OK();
+  };
+
+  Executor* cur_exec;
+  TF_RETURN_IF_ERROR(NewLocalExecutor(params, std::move(g), &cur_exec));
+  exec.reset(cur_exec);
+  FunctionCallFrame frame(arg_types, ret_types);
+  TF_RETURN_IF_ERROR(frame.SetArgs(args));
+  Executor::Args exec_args;
+  exec_args.call_frame = &frame;
+  exec_args.runner = runner_;
+  TF_RETURN_IF_ERROR(exec->Run(exec_args));
+  std::vector<Tensor> computed;
+  TF_RETURN_IF_ERROR(frame.GetRetvals(&computed));
+  if (computed.size() != rets.size()) {
+    return errors::InvalidArgument(
+        "The result does not match the expected number of return outpus",
+        ". Expected: ", rets.size(), ". Actual: ", computed.size());
+  }
+  for (int i = 0; i < rets.size(); ++i) {
+    *(rets[i]) = computed[i];
+  }
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateOpKernelContext(
+    OpKernel* kernel, gtl::InlinedVector<TensorValue, 4>* inputs,
+    std::unique_ptr<OpKernelContext>* context) {
+  params_ = absl::make_unique<OpKernelContext::Params>();
+  cancellation_manager_ = absl::make_unique<CancellationManager>();
+  params_->cancellation_manager = cancellation_manager_.get();
+  params_->device = device_.get();
+  params_->frame_iter = FrameAndIter(0, 0);
+  params_->function_library = flr_;
+  params_->inputs = inputs;
+  params_->op_kernel = kernel;
+  params_->resource_manager = resource_mgr_.get();
+  params_->runner = &runner_;
+  checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper;
+  slice_reader_cache_ =
+      absl::make_unique<checkpoint::TensorSliceReaderCacheWrapper>();
+  params_->slice_reader_cache = slice_reader_cache_.get();
+  step_container_ =
+      absl::make_unique<ScopedStepContainer>(0, [](const string&) {});
+  params_->step_container = step_container_.get();
+
+  // Set the allocator attributes for the outputs.
+  allocator_attrs_.clear();
+  for (int index = 0; index < params_->op_kernel->num_outputs(); index++) {
+    AllocatorAttributes attr;
+    const bool on_host =
+        (params_->op_kernel->output_memory_types()[index] == HOST_MEMORY);
+    attr.set_on_host(on_host);
+    allocator_attrs_.emplace_back(attr);
+  }
+  params_->output_attr_array = gtl::vector_as_array(&allocator_attrs_);
+
+  *context = absl::make_unique<OpKernelContext>(params_.get());
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CreateSerializationContext(
+    std::unique_ptr<SerializationContext>* context) {
+  *context =
+      absl::make_unique<SerializationContext>(SerializationContext::Params{});
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckOpKernelInput(
+    const OpKernel& kernel, const gtl::InlinedVector<TensorValue, 4>& inputs) {
+  if (kernel.input_types().size() != inputs.size()) {
+    cout<<"++"<<kernel.input_types().size()<<" : "<<inputs.size()<<endl;
+    return errors::Internal("The number of input elements should be ",
+                            kernel.input_types().size(),
+                            ", but got: ", inputs.size());
+  }
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::AddDatasetInput(
+    gtl::InlinedVector<TensorValue, 4>* inputs, DataTypeVector input_types,
+    DataType dtype, const TensorShape& shape) {
+  if (input_types.size() < inputs->size()) {
+    return errors::InvalidArgument("Adding more inputs than types: ",
+                                   inputs->size(), " vs. ", input_types.size());
+  }
+  bool is_ref = IsRefType(input_types[inputs->size()]);
+  std::unique_ptr<Tensor> input =
+      absl::make_unique<Tensor>(allocator_, dtype, shape);
+
+  if (is_ref) {
+    DataType expected_dtype = RemoveRefType(input_types[inputs->size()]);
+    if (expected_dtype != dtype) {
+      return errors::InvalidArgument("The input data type is ", dtype,
+                                     " , but expected: ", expected_dtype);
+    }
+    inputs->push_back({&lock_for_refs_, input.get()});
+  } else {
+    if (input_types[inputs->size()] != dtype) {
+      return errors::InvalidArgument(
+          "The input data type is ", dtype,
+          " , but expected: ", input_types[inputs->size()]);
+    }
+    inputs->push_back({nullptr, input.get()});
+  }
+
+  // TODO(jsimsa): Figure out how to avoid using a member variable to garbage
+  // collect the inputs.
+  tensors_.push_back(std::move(input));
+
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckIteratorGetNext(
+    const std::vector<Tensor>& expected_outputs, bool compare_order) {
+  bool end_of_sequence = false;
+  std::vector<Tensor> out_tensors;
+  while (!end_of_sequence) {
+    std::vector<Tensor> next;
+    TF_RETURN_IF_ERROR(
+        iterator_->GetNext(iterator_ctx_.get(), &next, &end_of_sequence));
+    out_tensors.insert(out_tensors.end(), next.begin(), next.end());
+  }
+
+  TF_EXPECT_OK(ExpectEqual(out_tensors, expected_outputs,
+                           /*compare_order=*/compare_order));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckDatasetNodeName(
+    const string& expected_dataset_node_name) {
+  EXPECT_EQ(dataset_->node_name(), expected_dataset_node_name);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckDatasetTypeString(
+    const string& expected_type_str) {
+  EXPECT_EQ(dataset_->type_string(), expected_type_str);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckDatasetOutputDtypes(
+    const DataTypeVector& expected_output_dtypes) {
+  TF_EXPECT_OK(
+      VerifyTypesMatch(dataset_->output_dtypes(), expected_output_dtypes));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckDatasetOutputShapes(
+    const std::vector<PartialTensorShape>& expected_output_shapes) {
+  TF_EXPECT_OK(VerifyShapesCompatible(dataset_->output_shapes(),
+                                      expected_output_shapes));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckDatasetCardinality(int expected_cardinality) {
+  EXPECT_EQ(dataset_->Cardinality(), expected_cardinality);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckIteratorOutputDtypes(
+    const DataTypeVector& expected_output_dtypes) {
+  TF_EXPECT_OK(
+      VerifyTypesMatch(iterator_->output_dtypes(), expected_output_dtypes));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckIteratorOutputShapes(
+    const std::vector<PartialTensorShape>& expected_output_shapes) {
+  TF_EXPECT_OK(VerifyShapesCompatible(iterator_->output_shapes(),
+                                      expected_output_shapes));
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckIteratorPrefix(
+    const string& expected_iterator_prefix) {
+  EXPECT_EQ(iterator_->prefix(), expected_iterator_prefix);
+  return Status::OK();
+}
+
+Status DatasetOpsTestBase::CheckIteratorSaveAndRestore(
+    const string& iterator_prefix, const std::vector<Tensor>& expected_outputs,
+    const std::vector<int>& breakpoints) {
+  std::unique_ptr<IteratorBase> iterator;
+  TF_RETURN_IF_ERROR(
+      dataset_->MakeIterator(iterator_ctx_.get(), iterator_prefix, &iterator));
+  std::unique_ptr<SerializationContext> serialization_ctx;
+  TF_RETURN_IF_ERROR(CreateSerializationContext(&serialization_ctx));
+  bool end_of_sequence = false;
+  std::vector<Tensor> out_tensors;
+  int cur_iteration = 0;
+  auto expected_outputs_it = expected_outputs.begin();
+  for (int breakpoint : breakpoints) {
+    VariantTensorData data;
+    VariantTensorDataWriter writer(&data);
+    TF_EXPECT_OK(iterator->Save(serialization_ctx.get(), &writer));
+    TF_RETURN_IF_ERROR(writer.Flush());
+    VariantTensorDataReader reader(&data);
+    TF_EXPECT_OK(RestoreIterator(iterator_ctx_.get(), &reader, iterator_prefix,
+                                 *dataset_, &iterator));
+
+    while (cur_iteration <= breakpoint) {
+      TF_RETURN_IF_ERROR(iterator->GetNext(iterator_ctx_.get(), &out_tensors,
+                                           &end_of_sequence));
+      if (!end_of_sequence) {
+        EXPECT_NE(expected_outputs_it, expected_outputs.end());
+        TF_EXPECT_OK(ExpectEqual(out_tensors.back(), *expected_outputs_it));
+        expected_outputs_it++;
+      }
+      cur_iteration++;
+    }
+
+    if (breakpoint >= expected_outputs.size()) {
+      EXPECT_TRUE(end_of_sequence);
+      EXPECT_EQ(expected_outputs_it, expected_outputs.end());
+    } else {
+      EXPECT_FALSE(end_of_sequence);
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tf_adapter/tests/ut/kernels/testcase/dataset/function_testlib.cc b/tf_adapter/tests/ut/kernels/testcase/dataset/function_testlib.cc
new file mode 100644
index 000000000..ad82bb6f6
--- /dev/null
+++ b/tf_adapter/tests/ut/kernels/testcase/dataset/function_testlib.cc
@@ -0,0 +1,649 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/function_testlib.h"
+
+#include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/versions.pb.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/public/version.h"
+
+namespace tensorflow {
+namespace test {
+namespace function {
+
+typedef FunctionDefHelper FDH;
+
+GraphDef GDef(gtl::ArraySlice<NodeDef> nodes,
+              gtl::ArraySlice<FunctionDef> funcs) {
+  GraphDef g;
+  VersionDef* versions = g.mutable_versions();
+  versions->set_producer(TF_GRAPH_DEF_VERSION);
+  versions->set_min_consumer(TF_GRAPH_DEF_VERSION_MIN_CONSUMER);
+  for (const auto& n : nodes) {
+    *(g.add_node()) = n;
+  }
+  auto lib = g.mutable_library();
+  for (const auto& f : funcs) {
+    *(lib->add_function()) = f;
+  }
+  return g;
+}
+
+// Helper to construct a NodeDef.
+NodeDef NDef(StringPiece name, StringPiece op, gtl::ArraySlice<string> inputs,
+             gtl::ArraySlice<std::pair<string, FDH::AttrValueWrapper>> attrs,
+             const string& device) {
+  NodeDef n;
+  n.set_name(string(name));
+  n.set_op(string(op));
+  for (const auto& in : inputs) n.add_input(in);
+  n.set_device(device);
+  for (auto na : attrs) n.mutable_attr()->insert({na.first, na.second.proto});
+  return n;
+}
+
+FunctionDef NonZero() {
+  return FDH::Define(
+      // Name
+      "NonZero",
+      // Args
+      {"x:T"},
+      // Return values
+      {"y:T"},
+      // Attr def
+      {"T:{float, double, int32, int64, string}"},
+      // Nodes
+      {
+          {{"y"}, "Identity", {"x"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef IsZero() {
+  const Tensor kZero = test::AsScalar<int64>(0);
+  return FDH::Define(
+      // Name
+      "IsZero",
+      // Args
+      {"x: T"},
+      // Return values
+      {"equal: bool"},
+      // Attr def
+      {"T:{float, double, int32, int64, string}"},
+      {
+          {{"zero"}, "Const", {}, {{"value", kZero}, {"dtype", DT_INT64}}},
+          {{"cast"}, "Cast", {"zero"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"equal"}, "Equal", {"x", "cast"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef RandomUniform() {
+  const Tensor kZero = test::AsScalar<int64>(0);
+
+  return FDH::Define(
+      // Name
+      "RandomUniform",
+      // Args
+      {"x: T"},
+      // Return values
+      {"random_uniform: int64"},
+      // Attr def
+      {"T:{float, double, int32, int64, string}"},
+      {{{"random_uniform/shape"},
+        "Const",
+        {},
+        {{"value", kZero}, {"dtype", DT_INT64}}},
+       {{"random_uniform"},
+        "RandomUniform",
+        {"random_uniform/shape"},
+        {{"T", DT_INT32},
+         {"Tout", DT_FLOAT},
+         {"seed", 87654321},
+         {"seed2", 42}}}});
+}
+
+FunctionDef XTimesTwo() {
+  const Tensor kTwo = test::AsScalar<int64>(2);
+  return FDH::Define(
+      // Name
+      "XTimesTwo",
+      // Args
+      {"x: T"},
+      // Return values
+      {"y: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}},
+          {{"scale"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"y"}, "Mul", {"x", "scale"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef TwoDeviceMult() {
+  const Tensor kTwo = test::AsScalar<int64>(2);
+  const Tensor kThree = test::AsScalar<int64>(3);
+  return FDH::Create(
+      // Name
+      "TwoDeviceMult",
+      // Args
+      {"x: T"},
+      // Return values
+      {"y_cpu: T", "y_gpu: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}},
+          {{"num_3"}, "Const", {}, {{"value", kThree}, {"dtype", DT_INT64}}},
+          {{"factor_2"},
+           "Cast",
+           {"num_2:output:0"},
+           {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"factor_3"},
+           "Cast",
+           {"num_3:output:0"},
+           {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"y_cpu"},
+           "Mul",
+           {"x", "factor_2:y:0"},
+           {{"T", "$T"}},
+           {},
+           "/device:CPU:0"},
+          {{"y_gpu"},
+           "Mul",
+           {"x", "factor_3:y:0"},
+           {{"T", "$T"}},
+           {},
+           "/device:GPU:0"},
+      },
+      {{"y_cpu", "y_cpu:z:0"}, {"y_gpu", "y_gpu:z:0"}});
+}
+
+FunctionDef TwoDeviceInputOutput() {
+  const Tensor kTwo = test::AsScalar<float>(2);
+  const Tensor kThree = test::AsScalar<float>(3);
+  return FDH::Create(
+      // Name
+      "TwoDeviceInputOutput",
+      // Args
+      {"x1: T", "x2: T"},
+      // Return values
+      {"y_cpu: T", "y_gpu: T"},
+      // Attr def
+      {"T: {float}"},
+      // Nodes
+      {
+          {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}},
+          {{"num_3"}, "Const", {}, {{"value", kThree}, {"dtype", DT_FLOAT}}},
+          {{"y_cpu"},
+           "Mul",
+           {"x1", "num_2:output:0"},
+           {{"T", "$T"}},
+           {},
+           "/device:CPU:0"},
+          {{"y_gpu"},
+           "Mul",
+           {"x2", "num_3:output:0"},
+           {{"T", "$T"}},
+           {},
+           "/device:GPU:0"},
+      },
+      {{"y_cpu", "y_cpu:z:0"}, {"y_gpu", "y_gpu:z:0"}});
+}
+
+FunctionDef FuncWithListInput() {
+  const Tensor kTwo = test::AsScalar<float>(2);
+  return FDH::Create(
+      // Name
+      "FuncWithListInput",
+      // Args
+      {"x1: N * T"},
+      // Return values
+      {},
+      // Attr def
+      {"T: {float}", "N: int >= 1"},
+      // Nodes
+      {
+          {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}},
+      },
+      {});
+}
+
+FunctionDef FuncWithListOutput() {
+  const Tensor kTwo = test::AsScalar<float>(2);
+  return FDH::Create(
+      // Name
+      "FuncWithListOutput",
+      // Args
+      {},
+      // Return values
+      {"y: N * T"},
+      // Attr def
+      {"T: {float}", "N: int >= 1"},
+      // Nodes
+      {
+          {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}},
+      },
+      {{"y", "num_2:output:0"}});
+}
+
+FunctionDef XAddX() {
+  return FDH::Define(
+      // Name
+      "XAddX",
+      // Args
+      {"x: T"},
+      // Return values
+      {"y: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"y"}, "Add", {"x", "x"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef XAddY() {
+  return FDH::Define(
+      // Name
+      "XAddY",
+      // Args
+      {"x: T", "y: T"},
+      // Return values
+      {"z: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"z"}, "Add", {"x", "y"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef XTimesTwoInt32() {
+  const Tensor kTwo = test::AsScalar<int64>(2);
+  return FDH::Define(
+      // Name
+      "XTimesTwoInt32",
+      // Args
+      {"x: int32"},
+      // Return values
+      {"y: int32"}, {},
+      // Nodes
+      {
+          {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}},
+          {{"scale"},
+           "Cast",
+           {"two"},
+           {{"SrcT", DT_INT64}, {"DstT", DT_INT32}}},
+          {{"y"}, "Mul", {"x", "scale"}, {{"T", DT_INT32}}},
+      });
+}
+
+FunctionDef XTimesFour() {
+  return FDH::Create(
+      // Name
+      "XTimesFour",
+      // Args
+      {"x: T"},
+      // Return values
+      {"y: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"x2"}, "XTimesTwo", {"x"}, {{"T", "$T"}}},
+          {{"y"}, "XTimesTwo", {"x2:y:0"}, {{"T", "$T"}}},
+      },
+      {{"y", "y:y:0"}});
+}
+
+FunctionDef XTimes16() {
+  return FDH::Create(
+      // Name
+      "XTimes16",
+      // Args
+      {"x: T"},
+      // Return values
+      {"y: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"x4"}, "XTimesFour", {"x"}, {{"T", "$T"}}},
+          {{"y"}, "XTimesFour", {"x4:y:0"}, {{"T", "$T"}}},
+      },
+      {{"y", "y:y:0"}});
+}
+
+FunctionDef WXPlusB() {
+  return FDH::Define(
+      // Name
+      "WXPlusB",
+      // Args
+      {"w: T", "x: T", "b: T"},
+      // Return values
+      {"y: T"},
+      // Attr def
+      {"T: {float, double}"},
+      // Nodes
+      {{{"mm"},
+        "MatMul",
+        {"w", "x"},
+        {{"T", "$T"},
+         {"transpose_a", false},
+         {"transpose_b", false},
+         {"_kernel", "eigen"}}},
+       {{"y"}, "Add", {"mm", "b"}, {{"T", "$T"}}}});
+}
+
+FunctionDef Swap() {
+  return FDH::Define(
+      // Name
+      "Swap",
+      // Args
+      {"i0: T", "i1: T"},
+      // Return values
+      {"o0: T", "o1: T"},
+      // Attr def
+      {"T: {float, double, resource}"},
+      // Nodes
+      {{{"o0"}, "Identity", {"i1"}, {{"T", "$T"}}},
+       {{"o1"}, "Identity", {"i0"}, {{"T", "$T"}}}});
+}
+
+FunctionDef EmptyBodySwap() {
+  return FDH::Create(
+      // Name
+      "EmptyBodySwap",
+      // Args
+      {"i0: T", "i1: T"},
+      // Return values
+      {"o0: T", "o1: T"},
+      // Attr def
+      {"T: {float, double, resource}"},
+      // Nodes
+      {},
+      // Output mapping
+      {{"o0", "i1"}, {"o1", "i0"}});
+}
+
+FunctionDef ResourceOutput() {
+  const Tensor kTwo = test::AsScalar<float>(2);
+  return FDH::Create(
+      // Name
+      "ResourceOutput",
+      // Args
+      {"x: float", "y: resource"},
+      // Return values
+      {"y_out: resource", "two_x: float"},
+      // Attr def
+      {},
+      // Nodes
+      {
+          {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}},
+          {{"mul"}, "Mul", {"x", "two:output:0"}, {{"T", DT_FLOAT}}, {}},
+      },
+      {{"y_out", "y"}, {"two_x", "mul:z:0"}});
+}
+
+FunctionDef ResourceIdentity() {
+  return FDH::Create(
+      // Name
+      "ResourceIdentity",
+      // Args
+      {"x: resource"},
+      // Return values
+      {"y: resource"},
+      // Attr def
+      {},
+      // Nodes
+      {},
+      // Output mapping
+      {{"y", "x"}});
+}
+
+FunctionDef ReadResourceVariable() {
+  return FDH::Create(
+      // Name
+      "ReadResourceVariable",
+      // Args
+      {"x: resource"},
+      // Return values
+      {"y: float"},
+      // Attr def
+      {},
+      // Nodes
+      {
+          {{"read"}, "ReadVariableOp", {"x"}, {{"dtype", DT_FLOAT}}, {}},
+      },
+      {{"y", "read:value:0"}});
+}
+
+FunctionDef InvalidControlFlow() {
+  return FDH::Create(
+      // Name
+      "InvalidControlFlow",
+      // Args
+      {"i: int32"},
+      // Return values
+      {"o: int32"},
+      // Attr def
+      {},
+      // Nodes
+      {{{"enter"}, "Enter", {"i"}, {{"T", DT_INT32}, {"frame_name", "while"}}},
+       {{"add"}, "Add", {"enter:output", "i"}, {{"T", DT_INT32}}}},
+      // Output mapping
+      {{"o", "add:z"}});
+}
+
+FunctionDef LessThanOrEqualToN(int64 N) {
+  const Tensor kN = test::AsScalar<int64>(N);
+  return FDH::Define(
+      // Name
+      "LessThanOrEqualToN",
+      // Args
+      {"x: T"},
+      // Return values
+      {"z: bool"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"N"}, "Const", {}, {{"value", kN}, {"dtype", DT_INT64}}},
+          {{"y"}, "Cast", {"N"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"z"}, "LessEqual", {"x", "y"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef XPlusOneXTimesY() {
+  const Tensor kOne = test::AsScalar<int64>(1);
+  return FDH::Define(
+      // Name
+      "XPlusOneXTimesY",
+      // Args
+      {"x: T", "y: T"},
+      // Return values
+      {"s: T", "t: T"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {{{"one"}, "Const", {}, {{"value", kOne}, {"dtype", DT_INT64}}},
+       {{"increment"}, "Cast", {"one"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+       {{"s"}, "Add", {"x", "increment"}, {{"T", "$T"}}},
+       {{"t"}, "Mul", {"x", "y"}, {{"T", "$T"}}}});
+}
+
+FunctionDef XYXLessThanOrEqualToN(int64 N) {
+  const Tensor kN = test::AsScalar<int64>(N);
+  return FDH::Define(
+      // Name
+      "XYXLessThanOrEqualToN",
+      // Args
+      {"x: T", "y: T"},
+      // Return values
+      {"z: bool"},
+      // Attr def
+      {"T: {float, double, int32, int64}"},
+      // Nodes
+      {
+          {{"N"}, "Const", {}, {{"value", kN}, {"dtype", DT_INT64}}},
+          {{"N1"}, "Cast", {"N"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}},
+          {{"z"}, "LessEqual", {"x", "N1"}, {{"T", "$T"}}},
+      });
+}
+
+FunctionDef RandomUniformLess() {
+  const Tensor kZero = test::AsScalar<int32>(0);
+  const Tensor kOne = test::AsScalar<int32>(1);
+  const Tensor k005 = test::AsScalar<float>(0.05);
+
+  return FDH::Define(
+      // Name
+      "RandomUniformLess",
+      // Args
+      {"arg0: int64"},
+      // Return values
+      {"strided_slice: bool"},
+      // Attr def
+      {"T:{float, double, int32, int64, string}"},
+      {{{"random_uniform/shape"},
+        "Const",
+        {},
+        {{"value", kZero}, {"dtype", DT_INT32}}},
+
+       {{"random_uniform/RandomUniform"},
+        "RandomUniform",
+        {"random_uniform/shape"},
+        {{"T", DT_INT32}, {"Tout", DT_FLOAT}, {"seed", 0}, {"seed2", 0}}},
+
+       {{"Less/y"}, "Const", {}, {{"value", k005}, {"dtype", DT_FLOAT}}},
+
+       {{"Less"},
+        "Less",
+        {"random_uniform/RandomUniform", "Less/y"},
+        {{"T", DT_FLOAT}}},
+
+       {{"strided_slice/stack"},
+        "Const",
+        {},
+        {{"value", kZero}, {"dtype", DT_INT32}}},
+
+       {{"strided_slice/stack_1"},
+        "Const",
+        {},
+        {{"value", kOne}, {"dtype", DT_INT32}}},
+
+       {{"strided_slice/stack_2"},
+        "Const",
+        {},
+        {{"value", kOne}, {"dtype", DT_INT32}}},
+
+       {{"strided_slice"},
+        "StridedSlice",
+        {"Less", "strided_slice/stack", "strided_slice/stack_1",
+         "strided_slice/stack_2"},
+        {{"Index", DT_INT32},
+         {"T", DT_BOOL},
+         {"begin_mask", 0},
+         {"ellipsis_mask", 0},
+         {"end_mask", 0},
+         {"new_axis_mask", 0},
+         {"shrink_axis_mask", 0}}}});
+}
+
+FunctionDef MakeRangeDataset() {
+  return FDH::Define(
+      // Name
+      "MakeRangeDataset",
+      // Args
+      {"start: int64", "stop: int64", "step: int64"},
+      // Return values
+      {"y:variant"},
+      // Attr def
+      {"output_types: list(type) >= 1", "output_shapes: list(shape) >= 1"},
+      // Nodes
+      {{{"y"},
+        "RangeDataset",
+        {"start", "stop", "step"},
+        {{"output_types", "$output_types"},
+         {"output_shapes", "$output_shapes"}}}});
+}
+
+FunctionDef MakeTakeDataset() {
+  return FDH::Define(
+      // Name
+      "TakeDataset",
+      // Args
+      {"input_dataset: variant", "count: int64"},
+      // Return values
+      {"y:variant"},
+      // Attr def
+      {"output_types: list(type) >= 1", "output_shapes: list(shape) >= 1"},
+      // Nodes
+      {{{"y"},
+        "TakeDataset",
+        {"input_dataset", "count"},
+        {{"output_types", "$output_types"},
+         {"output_shapes", "$output_shapes"}}}});
+}
+
+FunctionDef MakeTensorSliceDataset() {
+  return FDH::Define(
+      // Name
+      "MakeTensorSliceDataset",
+      // Args
+      {"x: Toutput_types"},
+      // Return values
+      {"y: variant"},
+      // Attr def
+      {"Toutput_types: list(type) >= 1", "output_shapes: list(shape) >= 1"},
+      // Nodes
+      {{{"y"},
+        "TensorSliceDataset",
+        {"x"},
+        {{"Toutput_types", "$Toutput_types"},
+         {"output_shapes", "$output_shapes"}}}});
+}
+
+FunctionDef Unique() {
+  return FDH::Create(
+      // Name
+      "GetUnique",
+      // Args
+      {"x:T"},
+      // Return values
+      {"y:T", "idx: out_idx"},
+      // Attr def
+      {"T: type", "out_idx: {int32, int64} = DT_INT32"},
+      // Nodes
+      {
+          {{"result"}, "Unique", {"x"}, {{"T", "$T"}, {"out_idx", "$out_idx"}}},
+      },
+      {{"y", "result:y:0"}, {"idx", "result:idx:0"}});
+}
+
+void FunctionTestSchedClosure(std::function<void()> fn) {
+  static thread::ThreadPool* w =
+      new thread::ThreadPool(Env::Default(), "Test", 8);
+  w->Schedule(std::move(fn));
+}
+
+}  // end namespace function
+}  // end namespace test
+}  // end namespace tensorflow
diff --git a/tf_adapter/tests/ut/kernels/testcase/dataset/host_queue_dats_set_ut.cc b/tf_adapter/tests/ut/kernels/testcase/dataset/host_queue_dats_set_ut.cc
new file mode 100644
index 000000000..ce3a9d7fc
--- /dev/null
+++ b/tf_adapter/tests/ut/kernels/testcase/dataset/host_queue_dats_set_ut.cc
@@ -0,0 +1,203 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#define protected public
+#include "gtest/gtest.h"
+#include "tensorflow/core/graph/graph_def_builder.h"
+#include "tensorflow/core/kernels/data/dataset_test_base.h"
+class HostQueueDatasetOp;
+namespace tensorflow {
+namespace data {
+namespace {
+
+static constexpr char kNodeName[] = "host_queue_dataset";
+static constexpr const char *const kChannelName = "channel_name";
+static constexpr const char *const kOutputTypes = "output_types";
+static constexpr const char *const kOutputShapes = "output_shapes";
+
+class HostQueueDatasetOpTest : public DatasetOpsTestBase {
+ protected:
+  // Creates `TensorSliceDataset` variant tensor from the input vector of
+  // tensors.
+  Status CreateTensorSliceDatasetTensor(
+      std::vector<Tensor> *const tensor_vector, Tensor *dataset_tensor) {
+    DatasetBase *tensor_slice_dataset;
+    TF_RETURN_IF_ERROR(CreateTensorSliceDataset(
+        "tensor_slice_node", tensor_vector, &tensor_slice_dataset));
+    TF_RETURN_IF_ERROR(
+        StoreDatasetInVariantTensor(tensor_slice_dataset, dataset_tensor));
+    return Status::OK();
+  }
+
+  // Create a new `HostQueueDataset` op kernel.
+  Status CreateHostQueueDatasetKernel(
+      const DataTypeVector &output_types,
+      const std::vector<PartialTensorShape> &output_shapes,
+      std::unique_ptr<OpKernel> *op_kernel, std::string _local_rank_id) {
+    name_utils::OpNameParams params;
+
+    NodeDef node_def =
+        test::function::NDef(kNodeName, name_utils::OpName("HostQueue", params),
+                             {"geop_dataset", "input_dataset"},
+                             {{"channel_name", "channel_001"},
+                              {"output_types", output_types},
+                              {"_local_rank_id", _local_rank_id},
+                              {"_local_device_list", "{0,-1}"},
+                              {"output_shapes", output_shapes}});
+    TF_RETURN_IF_ERROR(CreateOpKernel(node_def, op_kernel));
+    return Status::OK();
+  }
+
+  // Create a new `HostQueueDataset` op kernel context.
+  Status CreateHostQueueDatasetContext(
+      OpKernel *op_kernel, gtl::InlinedVector<TensorValue, 4> *const inputs,
+      std::unique_ptr<OpKernelContext> *context) {
+    TF_RETURN_IF_ERROR(CheckOpKernelInput(*op_kernel, *inputs));
+    TF_RETURN_IF_ERROR(CreateOpKernelContext(op_kernel, inputs, context));
+    return Status::OK();
+  }
+
+ protected:
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+};
+
+struct TestCase {
+  std::vector<Tensor> input_tensors;
+  std::vector<Tensor> expected_outputs;
+  DataTypeVector expected_output_dtypes;
+  std::vector<PartialTensorShape> expected_output_shapes;
+};
+
+TestCase NormalizeTestCase() {
+  return {
+      // input_tensors expected_outputs expected_output_dtypes
+      // expected_output_shapes
+      {CreateTensor<int64>(TensorShape{10, 1}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9})},
+      {CreateTensor<int64>(TensorShape{1}, {0})},
+      {DT_INT64},
+      {PartialTensorShape({1})},
+  };
+}
+
+TEST_F(HostQueueDatasetOpTest, iterator_getnext) {
+  int thread_num = 2, cpu_num = 2;
+  TF_ASSERT_OK(InitThreadPool(thread_num));
+  TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
+
+  const TestCase &test_case = NormalizeTestCase();
+  Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({}));
+  std::vector<Tensor> inputs_for_tensor_slice_dataset = test_case.input_tensors;
+  TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset,
+                                              &tensor_slice_dataset_tensor));
+
+  gtl::InlinedVector<TensorValue, 4> inputs_for_host_queue_dataset(
+      {TensorValue(&tensor_slice_dataset_tensor),
+       TensorValue(&tensor_slice_dataset_tensor)});
+
+  std::unique_ptr<OpKernel> host_queue_dataset_kernel;
+  TF_ASSERT_OK(CreateHostQueueDatasetKernel(test_case.expected_output_dtypes,
+                                            test_case.expected_output_shapes,
+                                            &host_queue_dataset_kernel, "-1"));
+  std::unique_ptr<OpKernelContext> host_queue_dataset_context;
+  TF_ASSERT_OK(CreateHostQueueDatasetContext(host_queue_dataset_kernel.get(),
+                                             &inputs_for_host_queue_dataset,
+                                             &host_queue_dataset_context));
+  DatasetBase *host_queue_dataset;
+  TF_ASSERT_OK(CreateDataset(host_queue_dataset_kernel.get(),
+                             host_queue_dataset_context.get(),
+                             &host_queue_dataset));
+  core::ScopedUnref scoped_unref(host_queue_dataset);
+
+  EXPECT_EQ(host_queue_dataset->node_name(), kNodeName);
+
+  host_queue_dataset->output_dtypes();
+  host_queue_dataset->output_shapes();
+  host_queue_dataset->DebugString();
+
+  SerializationContext context(SerializationContext::Params{});
+  GraphDefBuilder b;
+  DatasetBase::DatasetGraphDefBuilder db(&b);
+  Node *output;
+  host_queue_dataset->AsGraphDefInternal(&context, &db, &output);
+
+  std::unique_ptr<IteratorContext> iterator_context;
+  TF_ASSERT_OK(CreateIteratorContext(host_queue_dataset_context.get(),
+                                     &iterator_context));
+  std::unique_ptr<IteratorBase> iterator;
+  TF_ASSERT_OK(host_queue_dataset->MakeIterator(iterator_context.get(),
+                                                "Iterator", &iterator));
+
+  bool end_of_sequence = false;
+  std::vector<Tensor> out_tensors;
+  sleep(2);
+  TF_EXPECT_OK(iterator->GetNext(iterator_context.get(), &out_tensors,
+                                 &end_of_sequence));
+}
+
+TEST_F(HostQueueDatasetOpTest, iterator_getnext02) {
+  int thread_num = 2, cpu_num = 2;
+  TF_ASSERT_OK(InitThreadPool(thread_num));
+  TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
+
+  const TestCase &test_case = NormalizeTestCase();
+  Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({}));
+  std::vector<Tensor> inputs_for_tensor_slice_dataset = test_case.input_tensors;
+  TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset,
+                                              &tensor_slice_dataset_tensor));
+
+  gtl::InlinedVector<TensorValue, 4> inputs_for_host_queue_dataset(
+      {TensorValue(&tensor_slice_dataset_tensor),
+       TensorValue(&tensor_slice_dataset_tensor)});
+
+  std::unique_ptr<OpKernel> host_queue_dataset_kernel;
+  TF_ASSERT_OK(CreateHostQueueDatasetKernel(test_case.expected_output_dtypes,
+                                            test_case.expected_output_shapes,
+                                            &host_queue_dataset_kernel, "0"));
+  std::unique_ptr<OpKernelContext> host_queue_dataset_context;
+  TF_ASSERT_OK(CreateHostQueueDatasetContext(host_queue_dataset_kernel.get(),
+                                             &inputs_for_host_queue_dataset,
+                                             &host_queue_dataset_context));
+  DatasetBase *host_queue_dataset;
+  TF_ASSERT_OK(CreateDataset(host_queue_dataset_kernel.get(),
+                             host_queue_dataset_context.get(),
+                             &host_queue_dataset));
+  core::ScopedUnref scoped_unref(host_queue_dataset);
+
+  EXPECT_EQ(host_queue_dataset->node_name(), kNodeName);
+
+  host_queue_dataset->output_dtypes();
+  host_queue_dataset->output_shapes();
+  host_queue_dataset->DebugString();
+
+  SerializationContext context(SerializationContext::Params{});
+  GraphDefBuilder b;
+  DatasetBase::DatasetGraphDefBuilder db(&b);
+  Node *output;
+  host_queue_dataset->AsGraphDefInternal(&context, &db, &output);
+
+  std::unique_ptr<IteratorContext> iterator_context;
+  TF_ASSERT_OK(CreateIteratorContext(host_queue_dataset_context.get(),
+                                     &iterator_context));
+  std::unique_ptr<IteratorBase> iterator;
+  TF_ASSERT_OK(host_queue_dataset->MakeIterator(iterator_context.get(),
+                                                "Iterator", &iterator));
+
+  bool end_of_sequence = false;
+  std::vector<Tensor> out_tensors;
+  sleep(2);
+  TF_EXPECT_OK(iterator->GetNext(iterator_context.get(), &out_tensors,
+                                 &end_of_sequence));
+}
+
+}  // namespace
+}  // namespace data
+}  // namespace tensorflow
diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc
index 2645fee89..cc5ac3047 100644
--- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc
+++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc
@@ -142,6 +142,12 @@ TEST_F(GeOpTest, GeOpDynamicInputTest) {
   EXPECT_TRUE(attrs.find("_dynamic_input") != attrs.end());
   EXPECT_TRUE(!attrs["_dynamic_input"].s().empty());
 }
+TEST_F(GeOpTest, GeOpDynamicInputGetNextTest) {
+  NodeDef node_def;
+  std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_input_lazy_recompile.pbtxt";
+  gtl::InlinedVector<TensorValue, 4> inputs;
+  EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp11_0").ok());
+}
 TEST_F(GeOpTest, GeOpDynamicInput1Test) {
   NodeDef node_def;
   std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_execute.pbtxt";
@@ -250,7 +256,6 @@ TEST_F(GeOpTest, GeOpFuncSubGraphTest) {
   gtl::InlinedVector<TensorValue, 4> inputs{TensorValue(&a)};
   EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp12_0").ok());
 }
-
 TEST_F(GeOpTest, GeOpDynamicDimsTest) {
   NodeDef node_def;
   std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_dims.pbtxt";
@@ -261,14 +266,12 @@ TEST_F(GeOpTest, GeOpDynamicDimsTest) {
   EXPECT_TRUE(attrs.find("_input_shape") != attrs.end());
   EXPECT_TRUE(!attrs["_input_shape"].s().empty());
 }
-
 TEST_F(GeOpTest, GeOpWhileLoopV1Test) {
   NodeDef node_def;
   std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_while_loop.pbtxt";
   gtl::InlinedVector<TensorValue, 4> inputs;
   EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp13_0").ok());
 }
-
 TEST_F(GeOpTest, GeOpWhileLoopV2Test) {
   setenv("ENABLE_FORCE_V2_CONTROL", "1", true);
   NodeDef node_def;
@@ -276,7 +279,6 @@ TEST_F(GeOpTest, GeOpWhileLoopV2Test) {
   gtl::InlinedVector<TensorValue, 4> inputs;
   EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp13_0").ok());
 }
-
 TEST_F(GeOpTest, GeOpNpuOnnxGraphOpTest) {
   NodeDef node_def;
   std::string grph_pbtxt_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_npu_onnx_graph_op.pbtxt";
@@ -285,7 +287,6 @@ TEST_F(GeOpTest, GeOpNpuOnnxGraphOpTest) {
   gtl::InlinedVector<TensorValue, 4> inputs{TensorValue(&in)};
   EXPECT_TRUE(GeOpRunGraphAsync(grph_pbtxt_path, inputs, node_def, "GeOp91_0").ok());
 }
-
 TEST_F(GeOpTest, GeOpNpuOnnxGraphOpNoModelTest) {
   NodeDef node_def;
   std::string grph_pbtxt_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_npu_onnx_graph_op_parse.pbtxt";
@@ -294,6 +295,11 @@ TEST_F(GeOpTest, GeOpNpuOnnxGraphOpNoModelTest) {
   gtl::InlinedVector<TensorValue, 4> inputs{TensorValue(&in)};
   EXPECT_TRUE(GeOpRunGraphAsync(grph_pbtxt_path, inputs, node_def, "GeOp91_0").ok());
 }
-
+TEST_F(GeOpTest, GeOpDpOpTest) {
+  NodeDef node_def;
+  std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dpop.pbtxt";
+  gtl::InlinedVector<TensorValue, 4> inputs;
+  EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0_dp").ok());
+}
 }
 } //end tensorflow
\ No newline at end of file
diff --git a/tf_adapter/tests/ut/kernels/testcase/infeed_outfeed_test.cc b/tf_adapter/tests/ut/kernels/testcase/infeed_outfeed_test.cc
new file mode 100644
index 000000000..21bc49f19
--- /dev/null
+++ b/tf_adapter/tests/ut/kernels/testcase/infeed_outfeed_test.cc
@@ -0,0 +1,76 @@
+#include "tensorflow/core/common_runtime/device_factory.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/public/version.h"
+#include <stdlib.h>
+#include "gtest/gtest.h"
+
+
+namespace tensorflow {
+namespace {
+
+#define TF_ASSERT_OK(statement) \
+  ASSERT_EQ(::tensorflow::Status::OK(), (statement))
+
+#define TF_EXPECT_OK(statement) \
+  EXPECT_EQ(::tensorflow::Status::OK(), (statement))
+
+class DummyDevice : public DeviceBase {
+ public:
+  DummyDevice(Env* env, bool save) : DeviceBase(env), save_(save) {}
+  bool RequiresRecordingAccessedTensors() const override { return save_; }
+  Allocator* GetAllocator(AllocatorAttributes /*attr*/) override { return cpu_allocator(); }
+ private:
+  bool save_;
+};
+}
+class InfeedOutfeedTest : public testing::Test {
+ protected:
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+};
+
+TEST_F(InfeedOutfeedTest, LogSummaryTest)  {
+  DataType data_type = DT_INT32;
+  std::initializer_list<int64> dims = {};
+  TensorShapeProto shape_proto;
+  TensorShape(dims).AsProto(&shape_proto);
+
+  std::string channel_name = "_npu_log";
+
+  NodeDef outfeed_node;
+  tensorflow::AttrValue output_shapes;
+  tensorflow::AttrValue output_types;
+  *(output_shapes.mutable_list()->add_shape()) = shape_proto;
+  *(output_shapes.mutable_list()->add_shape()) = shape_proto;
+  output_types.mutable_list()->add_type(DT_STRING);
+  output_types.mutable_list()->add_type(DT_INT32);
+  TF_ASSERT_OK(NodeDefBuilder("out_feed", "OutfeedDequeueOp")
+                   .Attr("channel_name", channel_name)
+                   .Attr("output_types", output_types)
+                   .Attr("output_shapes", output_shapes)
+                   .Finalize(&outfeed_node));
+
+  DeviceType device_type = DEVICE_CPU;
+  Env* env = Env::Default();
+  auto device = absl::make_unique<DummyDevice>(env, false);
+
+  Status status;
+  std::unique_ptr<OpKernel> op(CreateOpKernel(device_type, device.get(),
+                                              cpu_allocator(), outfeed_node,
+                                              TF_GRAPH_DEF_VERSION, &status));
+  TF_ASSERT_OK(status);
+
+  OpKernelContext::Params params;
+  params.device = device.get();
+  params.op_kernel = op.get();
+  std::unique_ptr<CancellationManager> cancellation_manager = absl::make_unique<CancellationManager>();
+  params.cancellation_manager = cancellation_manager.get();
+
+  OpKernelContext ctx(&params);
+  op->Compute(&ctx);
+  TF_EXPECT_OK(ctx.status());
+
+}
+} //end tensorflow
\ No newline at end of file
diff --git a/tf_adapter/tests/ut/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc b/tf_adapter/tests/ut/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc
index 5a69c0b3e..47111654a 100644
--- a/tf_adapter/tests/ut/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc
+++ b/tf_adapter/tests/ut/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc
@@ -6,6 +6,7 @@
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
+#include <stdlib.h>
 
 namespace tensorflow {
 namespace {
@@ -95,5 +96,16 @@ TEST_F(DpOptimizationPassTest, DatasetNotInDeviceTest) {
     "HostQueueDataset->DPGroupDataset;GEOPDataset->HostQueueDataset;DPGroupDataset->MakeIterator";
   EXPECT_EQ(DoRunDpOptimizationPassTest(), target_graph);
 }
+TEST_F(DpOptimizationPassTest, NewDatasetNotInDeviceTest) {
+  string org_graph_def_path = "tf_adapter/tests/ut/optimizers/pbtxt/dp_test_no_dataset_in_device.pbtxt";
+  setenv("IS_NEW", "1", true);
+  InitGraph(org_graph_def_path);
+  std::string target_graph = "Const->TensorSliceDataset;TensorSliceDataset->BatchDatasetV2;Const->BatchDatasetV2:1;"\
+    "Const->BatchDatasetV2:2;BatchDatasetV2->RepeatDataset;Const->RepeatDataset:1;RepeatDataset->OptimizeDataset;"\
+    "Const->OptimizeDataset:1;OptimizeDataset->ModelDataset;IteratorV2->MakeIterator:1;ModelDataset->HostQueueDataset:1;"\
+    "HostQueueDataset->DPGroupDataset;GEOPDataset->HostQueueDataset;DPGroupDataset->MakeIterator";
+  EXPECT_EQ(DoRunDpOptimizationPassTest(), target_graph);
+  unsetenv("IS_NEW");
+}
 } // end namespace
 } // end tensorflow
diff --git a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc
index 0d0b7c0a0..0ad6bfb3c 100644
--- a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc
+++ b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc
@@ -82,7 +82,6 @@ TEST_F(GePluginTest, MallocSharedMemOKTest) {
   EXPECT_EQ(ret, 0);
 }
 TEST_F(GePluginTest, NpuCloseTest) {
-  NpuAttrs::SetUseTdtStatus(0, true);
   NpuClose();
 }
 TEST_F(GePluginTest, RdmaInitAndRegisterFail1Test) {
diff --git a/tf_adapter/util/acl_channel.cc b/tf_adapter/util/acl_channel.cc
new file mode 100644
index 000000000..38b701022
--- /dev/null
+++ b/tf_adapter/util/acl_channel.cc
@@ -0,0 +1,228 @@
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tf_adapter/common/common.h"
+#include "tf_adapter/common/adp_logger.h"
+#include "tf_adapter/util/acl_channel.h"
+#include "acl/error_codes/rt_error_codes.h"
+#include "securec.h"
+
+namespace tensorflow {
+
+Status MappingTfDtypeToAcl(const tensorflow::DataType tf_type, aclDataType &acl_type) {
+  const static std::map<tensorflow::DataType, aclDataType> type_mapping = {
+      {DT_FLOAT, ACL_FLOAT}, {DT_HALF, ACL_FLOAT16}, {DT_INT8, ACL_INT8}, {DT_INT32, ACL_INT32},
+      {DT_UINT8, ACL_UINT8}, {DT_INT16, ACL_INT16}, {DT_UINT16, ACL_UINT16}, {DT_UINT32, ACL_UINT32},
+      {DT_INT64, ACL_INT64}, {DT_UINT64, ACL_UINT64}, {DT_DOUBLE, ACL_DOUBLE}, {DT_BOOL, ACL_BOOL},
+      {DT_STRING, ACL_STRING}};
+  auto found = type_mapping.find(tf_type);
+  if (found == type_mapping.end()) {
+    return errors::Internal("Unsupported tf data type", DataTypeString(tf_type), " by acl.");
+  }
+  acl_type = found->second;
+  return Status::OK();
+}
+
+Status MappingAclDtypeToTf(const aclDataType &acl_type, tensorflow::DataType &tf_type) {
+  const static std::map<aclDataType, tensorflow::DataType> type_mapping = {
+      {ACL_FLOAT, DT_FLOAT}, {ACL_FLOAT16, DT_HALF}, {ACL_INT8, DT_INT8}, {ACL_INT32, DT_INT32},
+      {ACL_UINT8, DT_UINT8}, {ACL_INT16, DT_INT16}, {ACL_UINT16, DT_UINT16}, {ACL_UINT32, DT_UINT32},
+      {ACL_INT64, DT_INT64}, {ACL_UINT64, DT_UINT64}, {ACL_DOUBLE, DT_DOUBLE}, {ACL_BOOL, DT_BOOL},
+      {ACL_STRING, DT_STRING}};
+  auto found = type_mapping.find(acl_type);
+  if (found == type_mapping.end()) { return errors::Internal("Acl channel receive unsupported data type", acl_type); }
+  tf_type = found->second;
+  return Status::OK();
+}
+
+Status AssembleAclTensor2Tensor(acltdtDataItem *item, std::vector<Tensor> &tensors, bool call_by_channel_receive) {
+  acltdtTensorType acl_type = acltdtGetTensorTypeFromItem(item);
+  if (acl_type == ACL_TENSOR_DATA_END_OF_SEQUENCE) {
+    LOG(INFO) << "Acl channel received end-of-sequence for out-feed op.";
+    return Status::OK();
+  } else if (acl_type == ACL_TENSOR_DATA_ABNORMAL) {
+    LOG(INFO) << "Acl channel received abnormal for out-feed op.";
+    return Status::OK();
+  } else if (acl_type == ACL_TENSOR_DATA_UNDEFINED) {
+    LOG(INFO) << "Acl channel received undefined message type for out-feed op.";
+    return errors::Internal("Acl channel received undefined message type for out-feed op.");
+  }
+  tensorflow::DataType tf_type;
+  TF_RETURN_IF_ERROR(MappingAclDtypeToTf(acltdtGetDataTypeFromItem(item), tf_type));
+  size_t dim_num = acltdtGetDimNumFromItem(item);
+  size_t acl_data_len = acltdtGetDataSizeFromItem(item);
+  char *acl_data = reinterpret_cast<char *>(acltdtGetDataAddrFromItem(item));
+  if (acl_data == nullptr) { return errors::Internal("Acl get data addr from item failed when receive tensor data."); }
+  if (tf_type == DT_STRING) {
+    if (dim_num != 0) { return errors::Internal("Acl channel receive unsupported non-scalar string type"); }
+    Tensor tensor(tf_type, TensorShape({}));
+    tensor.scalar<string>()() = std::move(string(acl_data, acl_data_len));
+    tensors.emplace_back(std::move(tensor));
+  } else if (DataTypeCanUseMemcpy(tf_type)) {
+    std::vector<int64_t> dims;
+    dims.resize(dim_num);
+    if (acltdtGetDimsFromItem(item, dims.data(), dim_num) != ACL_ERROR_NONE) {
+      return errors::Internal("Failed get dim-size from acl channel data");
+    }
+    TensorShape tf_shape;
+    for (auto dim : dims) { tf_shape.AddDim(dim); }
+    Tensor tensor = Tensor(tf_type, tf_shape);
+    auto tensor_data = const_cast<char *>(tensor.tensor_data().data());
+    auto tensor_size = tensor.tensor_data().size();
+    if (tensor_size != acl_data_len) {
+      return errors::Internal("Acl channel receive size mismatch tensor size acl:",
+                              acl_data_len, "vs. tf:", tensor_size);
+    }
+    do {
+      auto copy_size = (tensor_size > SECUREC_MEM_MAX_LEN) ? SECUREC_MEM_MAX_LEN : tensor_size;
+      LOG(INFO) << "tensor data:" << reinterpret_cast<uintptr_t>(tensor_data) << ", tensor_size:" << tensor_size
+                << ", acl_data:" << reinterpret_cast<uintptr_t>(acl_data) << ", copy_size:" << copy_size;
+      if (memcpy_s(tensor_data, tensor_size, acl_data, copy_size) != EOK) {
+        return errors::Internal("Failed copy acl channel data to tensorflow.");
+      }
+      tensor_size -= copy_size;
+      tensor_data += copy_size;
+      acl_data += copy_size;
+    } while (tensor_size > 0);
+    tensors.emplace_back(std::move(tensor));
+  } else { return errors::InvalidArgument("Acl channel receive uncopyable tf data type", DataTypeString(tf_type)); }
+  return Status::OK();
+}
+
+Status AssembleAclDataset2Tensors(acltdtDataset *acl_dataset, std::vector<Tensor> &out_tensors,
+                                  bool call_by_channel_receive) {
+  for (size_t i = 0; i < acltdtGetDatasetSize(acl_dataset); i++) {
+    auto acl_data = acltdtGetDataItem(acl_dataset, i);
+    if (acl_data == nullptr) {
+      return errors::Internal("Acl get tensor data from dataset failed when receive tensor data.");
+    }
+    TF_RETURN_IF_ERROR(AssembleAclTensor2Tensor(acl_data, out_tensors, call_by_channel_receive));
+  }
+  return Status::OK();
+}
+
+Status AssembleTensors2AclDataset(acltdtTensorType acl_type, const std::vector<Tensor> &tensors,
+                                  acltdtDataset **output_acl_dataset) {
+  auto acl_dataset = acltdtCreateDataset();
+  if (acl_dataset == nullptr) { return errors::Internal("Acl create tensor dataset failed"); }
+  auto status = AssembleTensors2AclDataset(acl_type, tensors, acl_dataset);
+  if (!status.ok()) {
+    ADAPTER_LOG_IF_ERROR(DestroyAclDataset(acl_dataset));
+    return status;
+  }
+  *output_acl_dataset = acl_dataset;
+  return Status::OK();
+}
+
+Status AssembleTensors2AclDataset(acltdtTensorType acl_type, const std::vector<Tensor> &tensors,
+                                  acltdtDataset *acl_dataset) {
+  if (TF_PREDICT_FALSE(acl_type != ACL_TENSOR_DATA_TENSOR)) {
+    acltdtDataItem *acl_data = acltdtCreateDataItem(acl_type, nullptr, 0, ACL_BOOL /* whatever */, nullptr, 0);
+    if (acl_data == nullptr) { return errors::Internal("Acl create tensor item failed when send end-of-sequence."); }
+    if (acltdtAddDataItem(acl_dataset, acl_data) != ACL_ERROR_NONE) {
+      if (acltdtDestroyDataItem(acl_data) != ACL_ERROR_NONE) {
+        LOG(ERROR) << "Acl destroy tensor data item failed when send data with type "
+                   << (acl_type == ACL_TENSOR_DATA_END_OF_SEQUENCE ? "ACL_TENSOR_DATA_END_OF_SEQUENCE"
+                                                                   : "ACL_TENSOR_DATA_ABNORMAL");
+      }
+      return errors::Internal("Acl add tensor data to dataset failed when send data with type ", acl_type);
+    }
+    return Status::OK();
+  }
+  for (auto &tensor : tensors) {
+    aclDataType acl_data_type;
+    TF_RETURN_IF_ERROR(MappingTfDtypeToAcl(tensor.dtype(), acl_data_type));
+    acltdtDataItem *acl_data = nullptr;
+    if (DataTypeCanUseMemcpy(tensor.dtype())) {
+      auto dims = tensor.shape().dim_sizes();
+      acl_data = acltdtCreateDataItem(
+          ACL_TENSOR_DATA_TENSOR, (dims.empty() ? nullptr : reinterpret_cast<const int64_t *>(dims.data())),
+          dims.size(), acl_data_type, const_cast<char *>(tensor.tensor_data().data()), tensor.tensor_data().size());
+    } else if (tensor.dtype() == DT_STRING) {
+      if (tensor.dims() != 0) {
+        return errors::Internal("Acl send got unexpected non-scalar string tensor with dim ", tensor.dims());
+      }
+      auto value = reinterpret_cast<string *>(const_cast<char *>(tensor.tensor_data().data()));
+      // for scalar type, *dims is nullptr and dim_num is 0
+      acl_data = acltdtCreateDataItem(ACL_TENSOR_DATA_TENSOR, nullptr, 0, acl_data_type,
+                                      const_cast<char *>(value->c_str()), value->size());
+    } else {
+      return errors::Internal("Acl send got unexpected data type ", DataTypeString(tensor.dtype()));
+    }
+    if (acl_data == nullptr) {
+      return errors::Internal("Acl create tensor item failed when send tensor data ", tensor.DebugString());
+    }
+    if (acltdtAddDataItem(acl_dataset, acl_data) != ACL_ERROR_NONE) {
+      if (acltdtDestroyDataItem(acl_data) != ACL_ERROR_NONE) {
+        ADP_LOG(ERROR) << "Acl destroy tensor data item failed when send data with type ACL_TENSOR_DATA_TENSOR.";
+      }
+      return errors::Internal("Acl add tensor data to dataset failed when send tensor data.");
+    }
+  }
+  return Status::OK();
+}
+
+Status DestroyAclDataset(acltdtDataset *acl_dataset, bool include_data_item) {
+  if (include_data_item) {
+    for (size_t i = 0; i < acltdtGetDatasetSize(acl_dataset); i++) {
+      if (acltdtDestroyDataItem(acltdtGetDataItem(acl_dataset, i)) != ACL_ERROR_NONE) {
+        return errors::Internal("Acl destroy tensor data failed.");
+      }
+    }
+  }
+  if (acltdtDestroyDataset(acl_dataset) != ACL_ERROR_NONE) {
+    return errors::Internal("Acl destroy tensor dataset failed.");
+  }
+  return Status::OK();
+}
+
+Status RecvTensorByAcl(acltdtChannelHandle *acl_handle, std::vector<Tensor> &tensors) {
+  auto acl_dataset = acltdtCreateDataset();
+  if (acl_dataset == nullptr) { return errors::Internal("Failed create acl channel."); }
+  auto acl_status = acltdtReceiveTensor(acl_handle, acl_dataset, -1 /* no timeout */);
+  
+  if (acl_status != ACL_ERROR_NONE) {
+    ADAPTER_LOG_IF_ERROR(DestroyAclDataset(acl_dataset, false));
+    return errors::Internal("Failed receive data from acl channel, acl status:", acl_status);
+  }
+
+  auto status = AssembleAclDataset2Tensors(acl_dataset, tensors, true /* call by channel receive */);
+  if (!status.ok()) {
+    ADAPTER_LOG_IF_ERROR(DestroyAclDataset(acl_dataset, false));
+    return status;
+  }
+  TF_RETURN_IF_ERROR(DestroyAclDataset(acl_dataset, false));
+  return Status::OK();
+}
+
+aclError SendTensorsByAcl(const acltdtChannelHandle *acl_handle,
+                          acltdtTensorType acl_type,
+                          const std::vector<Tensor> &tensors) {
+  acltdtDataset *acl_dataset = nullptr;
+  auto assemble_status =
+      AssembleTensors2AclDataset(acl_type, tensors, &acl_dataset);
+  if (!assemble_status.ok()) {
+    ADP_LOG(ERROR) << "Call AssembleTensors2AclDataset failed.";
+    return ACL_ERROR_RT_PARAM_INVALID;
+  }
+  auto acl_status = acltdtSendTensor(acl_handle, acl_dataset, 1000);
+  auto ds_data_status = DestroyAclDataset(acl_dataset);
+  if (!ds_data_status.ok()) {
+    ADP_LOG(ERROR) << "Call DestroyAclDataset failed.";
+    return ACL_ERROR_RT_PARAM_INVALID;
+  }
+  return acl_status;
+}
+
+} // namespace tensorflow
\ No newline at end of file
diff --git a/tf_adapter/util/acl_channel.h b/tf_adapter/util/acl_channel.h
new file mode 100644
index 000000000..721824929
--- /dev/null
+++ b/tf_adapter/util/acl_channel.h
@@ -0,0 +1,46 @@
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_ACL_CHANNEL_H_
+#define TENSORFLOW_ACL_CHANNEL_H_
+
+#include "acl/acl_tdt.h"
+#include "tensorflow/core/framework/tensor.h"
+
+namespace tensorflow {
+
+Status MappingTfDtypeToAcl(const tensorflow::DataType tf_type, aclDataType &acl_type);
+
+Status MappingAclDtypeToTf(const aclDataType &acl_type, tensorflow::DataType &tf_type);
+
+Status AssembleAclTensor2Tensor(acltdtDataItem *item, std::vector<Tensor> &tensors, bool call_by_channel_receive);
+
+Status AssembleAclDataset2Tensors(acltdtDataset *acl_dataset, std::vector<Tensor> &out_tensors,
+                                  bool call_by_channel_receive);
+
+Status AssembleTensors2AclDataset(acltdtTensorType acl_type, const std::vector<Tensor> &tensors,
+                                  acltdtDataset **acl_dataset);
+
+Status AssembleTensors2AclDataset(acltdtTensorType acl_type, const std::vector<Tensor> &tensors,
+                                  acltdtDataset *acl_dataset);
+
+Status DestroyAclDataset(acltdtDataset *acl_dataset, bool include_data_item = true);
+
+Status RecvTensorByAcl(acltdtChannelHandle *acl_handle, std::vector<Tensor> &tensors);
+
+aclError SendTensorsByAcl(const acltdtChannelHandle *acl_handle, acltdtTensorType acl_type, const std::vector<Tensor> &tensors);
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_ACL_CHANNEL_H_
diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc
index 35e5210de..3c64b261e 100644
--- a/tf_adapter/util/ge_plugin.cc
+++ b/tf_adapter/util/ge_plugin.cc
@@ -20,7 +20,6 @@
 #include "framework/omg/omg_inner_types.h"
 #include "ge/ge_api.h"
 #include "ge/ge_api_types.h"
-#include "tdt/tdt_host_interface.h"
 #include "tensorflow/core/util/env_var.h"
 #include "tf_adapter/common/adp_logger.h"
 #include "tf_adapter/common/common.h"
@@ -31,7 +30,6 @@
 using json = nlohmann::json;
 
 using namespace tensorflow;
-using namespace tdt;
 constexpr int kFatalSleepTime = 3000;
 namespace {
 inline string ToString(ge::Status status) { return ::ge::StatusFactory::Instance()->GetErrDesc(status); }
@@ -56,7 +54,6 @@ void GeFinalize() {
 }  // namespace
 
 GePlugin::GePlugin()
-
     : device_id_(0), isInit_(false), isGlobal_(false) {
   ADP_LOG(INFO) << "[GePlugin] new constructor";
 }
@@ -217,22 +214,6 @@ void GePlugin::Init(std::map<std::string, std::string> &init_options, bool is_gl
 
   ADP_LOG(INFO) << "[GePlugin] optypelist_for_implmode :" << init_options[ge::OPTYPELIST_FOR_IMPLMODE];
 
-  const char *tdt_uninit_env = std::getenv("ASCEND_TDT_UNINIT");
-  bool tdt_init = true;
-  if (tdt_uninit_env != nullptr && std::atoi(tdt_uninit_env) == 1) {
-    tdt_init = false;
-  }
-  if (tdt_init) {
-    // Open TsdClient first, then call GEInitialize
-    ADP_LOG(INFO) << "[GePlugin] Open TsdClient and Init tdt host.";
-    int32_t ret = tdt::TdtOutFeedInit(static_cast<uint32_t>(device_id_));
-    if (ret != 0) {
-      std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime));
-      ADP_LOG(FATAL) << "[GePlugin] Tdt host init failed, tdt error code : " << ret;
-      LOG(FATAL) << "[GePlugin] Tdt host init failed, tdt error code : " << ret;
-    }
-  }
-
   // ge Initialize
   ge::Status status = ge::GEInitialize(init_options);
   if (status != ge::SUCCESS) {
@@ -290,20 +271,6 @@ void GePlugin::Finalize() {
 
   // ge finalize
   GeFinalize();
-
-  const char *tdt_uninit_env = std::getenv("ASCEND_TDT_UNINIT");
-  bool tdt_init = true;
-  if (tdt_uninit_env != nullptr && std::atoi(tdt_uninit_env) == 1) {
-    tdt_init = false;
-  }
-  if (tdt_init) {
-    ADP_LOG(INFO) << "[GePlugin] Close TsdClient and destroy tdt.";
-    int32_t ret = tdt::TdtOutFeedDestroy();
-    if (ret != 0) {
-      LOG(ERROR) << "[GePlugin] Close tdt host failed.";
-      ADP_LOG(ERROR) << "[GePlugin] Close tdt host failed.";
-    }
-  }
   isInit_ = false;
 
 }
@@ -338,19 +305,6 @@ void PluginFinalize() {
  */
 void NpuClose() {
   GeFinalize();
-  uint32_t device_id = 0;
-  (void)GetEnvDeviceID(device_id);
-  if (NpuAttrs::GetUseTdtStatus(device_id)) {
-    ADP_LOG(INFO) << "[GePlugin] the process has turned on TDT resource, finalize resource at exit.";
-    int32_t tdt_status = TdtInFeedDestroy(device_id);
-    if (tdt_status != 0) {
-      ADP_LOG(ERROR) << "[GePlugin] Tdt client close failed.";
-      LOG(ERROR) << "[GePlugin] Tdt client close failed.";
-    } else {
-      ADP_LOG(INFO) << "[GePlugin] Tdt client close success.";
-      NpuAttrs::SetUseTdtStatus(device_id, false);
-    }
-  }
   ADP_LOG(INFO) << "[GePlugin] npu finalize resource success";
 }
 
diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc
index 23b93672b..b77a2a31d 100644
--- a/tf_adapter/util/npu_attrs.cc
+++ b/tf_adapter/util/npu_attrs.cc
@@ -27,6 +27,7 @@
 namespace tensorflow {
 std::map<int32_t, bool> NpuAttrs::turn_on_tdt_info_;
 std::map<std::string, bool> NpuAttrs::use_adp_info_;
+std::map<std::string, bool> NpuAttrs::dataset_execute_info_;
 
 std::string GetDumpPath() {
   char *npu_collect_path = std::getenv("NPU_COLLECT_PATH");
@@ -262,15 +263,6 @@ inline Status checkEnableDp(bool enable_dp) {
   }
 }
 
-bool NpuAttrs::GetUseTdtStatus(int32_t device_id) {
-  if (turn_on_tdt_info_.count(device_id) > 0) {
-    ADP_LOG(INFO) << "get device: " << device_id << " turn_on_tdt_info_: " << turn_on_tdt_info_[device_id];
-    return turn_on_tdt_info_[device_id];
-  } else {
-    return false;
-  }
-}
-
 void NpuAttrs::SetUseTdtStatus(int32_t device_id, bool is_turn_on_tdt) {
   turn_on_tdt_info_[device_id] = is_turn_on_tdt;
   ADP_LOG(INFO) << "set device: " << device_id << " turn_on_tdt_info_: " << turn_on_tdt_info_[device_id];
@@ -290,6 +282,20 @@ void NpuAttrs::SetUseAdpStatus(std::string iterator_name, bool is_use_adp) {
   ADP_LOG(INFO) << "set iterator: " << iterator_name << " use_adp_info_: " << use_adp_info_[iterator_name];
 }
 
+bool NpuAttrs::IsDatasetExecuteInDevice(std::string iterator_name) {
+  if (dataset_execute_info_.count(iterator_name) > 0) {
+    ADP_LOG(INFO) << "get data pre-process graph: " << iterator_name << " dataset_execute_info_: " << dataset_execute_info_[iterator_name];
+    return dataset_execute_info_[iterator_name];
+  } else {
+    return false;
+  }
+}
+
+void NpuAttrs::SetDatasetExecuteInDeviceStatus(std::string iterator_name, bool is_dataset_execute_device) {
+  dataset_execute_info_[iterator_name] = is_dataset_execute_device;
+  ADP_LOG(INFO) << "data pre-process graph: " << iterator_name << " dataset_execute_info_: " << dataset_execute_info_[iterator_name];
+}
+
 std::map<std::string, std::string> NpuAttrs::GetSessOptions(OpKernelConstruction *ctx) {
   std::map<std::string, std::string> sess_options;
   std::string variable_format_optimize = std::to_string(true);
diff --git a/tf_adapter/util/npu_attrs.h b/tf_adapter/util/npu_attrs.h
index 2ca3e4d4e..e57a21a95 100644
--- a/tf_adapter/util/npu_attrs.h
+++ b/tf_adapter/util/npu_attrs.h
@@ -44,13 +44,15 @@ class NpuAttrs {
   static std::map<std::string, std::string> GetDefaultPassOptions();
   static Status SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options, Node *node);
   static void LogOptions(const std::map<std::string, std::string>& options);
-  static bool GetUseTdtStatus(int32_t device_id);
   static void SetUseTdtStatus(int32_t device_id, bool is_turn_on_tdt);
   static bool GetUseAdpStatus(std::string iterator_name);
   static void SetUseAdpStatus(std::string iterator_name, bool is_use_adp);
+  static bool IsDatasetExecuteInDevice(std::string iterator_name);
+  static void SetDatasetExecuteInDeviceStatus(std::string iterator_name, bool is_dataset_execute_device);
  private:
   static std::map<int32_t, bool> turn_on_tdt_info_;
   static std::map<std::string, bool> use_adp_info_;
+  static std::map<std::string, bool> dataset_execute_info_;
 };
 }  // namespace tensorflow
 
-- 
Gitee