From ac3578676b87cdcd447af50010f596304fd19529 Mon Sep 17 00:00:00 2001 From: huanruizhi Date: Sat, 7 Aug 2021 14:57:51 +0800 Subject: [PATCH] DC&&MDC --- CMakeLists.txt | 5 +- configure.py | 314 ++++---- tf_adapter/BUILD | 6 +- tf_adapter/common/common.h | 9 +- tf_adapter/kernels/geop_npu.cc | 104 ++- tf_adapter/kernels/geop_npu.h | 6 + tf_adapter/kernels/host_queue_dataset_op.cc | 565 +++++++++++--- tf_adapter/kernels/infeed_outfeed_ops.cc | 174 ++--- tf_adapter/ops/npu_dataset_ops.cc | 19 +- tf_adapter/ops/npu_ops.cc | 2 - .../optimizers/dp_tf_ge_conversion_pass.cc | 641 +++++++++------- .../optimizers/om_partition_subgraphs_pass.cc | 6 +- .../npu_bridge/estimator/npu/npu_hook.py | 1 - tf_adapter/tests/CMakeLists.txt | 1 + .../tests/depends/ascendcl/CMakeLists.txt | 29 + .../depends/ascendcl/src/ascendcl_stub.cc | 268 +++++++ .../depends/ascendcl/src/ascendcl_stub.h | 82 ++ tf_adapter/tests/st/CMakeLists.txt | 10 +- .../tests/st/kernels/pbtxt/geop_dpop.pbtxt | 697 +++++++++++++++++ .../testcase/dataset/dataset_test_base.cc | 704 ++++++++++++++++++ .../testcase/dataset/function_testlib.cc | 649 ++++++++++++++++ .../dataset/host_queue_dats_set_st.cc | 203 +++++ .../st/kernels/testcase/geop_npu_test.cc | 13 +- .../kernels/testcase/infeed_outfeed_test.cc | 76 ++ .../testcase/dp_tf_ge_conversion_pass_test.cc | 12 + .../tests/st/util/testcase/ge_plugin_test.cc | 1 - tf_adapter/tests/ut/CMakeLists.txt | 5 +- .../tests/ut/kernels/pbtxt/geop_dpop.pbtxt | 697 +++++++++++++++++ .../testcase/dataset/dataset_test_base.cc | 704 ++++++++++++++++++ .../testcase/dataset/function_testlib.cc | 649 ++++++++++++++++ .../dataset/host_queue_dats_set_ut.cc | 203 +++++ .../ut/kernels/testcase/geop_npu_test.cc | 18 +- .../kernels/testcase/infeed_outfeed_test.cc | 76 ++ .../testcase/dp_tf_ge_conversion_pass_test.cc | 12 + .../tests/ut/util/testcase/ge_plugin_test.cc | 1 - tf_adapter/util/acl_channel.cc | 228 ++++++ tf_adapter/util/acl_channel.h | 46 ++ tf_adapter/util/ge_plugin.cc | 46 -- tf_adapter/util/npu_attrs.cc | 24 +- tf_adapter/util/npu_attrs.h | 4 +- 40 files changed, 6607 insertions(+), 703 deletions(-) create mode 100644 tf_adapter/tests/depends/ascendcl/CMakeLists.txt create mode 100644 tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc create mode 100644 tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h create mode 100644 tf_adapter/tests/st/kernels/pbtxt/geop_dpop.pbtxt create mode 100644 tf_adapter/tests/st/kernels/testcase/dataset/dataset_test_base.cc create mode 100644 tf_adapter/tests/st/kernels/testcase/dataset/function_testlib.cc create mode 100644 tf_adapter/tests/st/kernels/testcase/dataset/host_queue_dats_set_st.cc create mode 100644 tf_adapter/tests/st/kernels/testcase/infeed_outfeed_test.cc create mode 100644 tf_adapter/tests/ut/kernels/pbtxt/geop_dpop.pbtxt create mode 100644 tf_adapter/tests/ut/kernels/testcase/dataset/dataset_test_base.cc create mode 100644 tf_adapter/tests/ut/kernels/testcase/dataset/function_testlib.cc create mode 100644 tf_adapter/tests/ut/kernels/testcase/dataset/host_queue_dats_set_ut.cc create mode 100644 tf_adapter/tests/ut/kernels/testcase/infeed_outfeed_test.cc create mode 100644 tf_adapter/util/acl_channel.cc create mode 100644 tf_adapter/util/acl_channel.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a71e499c..932bf5777 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -190,6 +190,8 @@ else() -Wl,--no-as-needed c_sec ge_runner + ascendcl + acl_tdt_channel datatransfer fmk_parser fmk_onnx_parser @@ -197,7 +199,6 @@ else() tensorflow_framework pywrap_tensorflow_internal -Wl,--as-needed - -s ) # rename libtf_adapter.so to _tf_adapter.so @@ -231,7 +232,7 @@ else() && cp -r ${CMAKE_CURRENT_BINARY_DIR}/_tf_adapter.so ${CMAKE_CURRENT_BINARY_DIR}/wheel/npu_bridge && cp -r $ ${CMAKE_CURRENT_BINARY_DIR}/wheel/npu_bridge # && cp -r ${CMAKE_CURRENT_BINARY_DIR}/../../../../../soft_dp/libSoftDp.so ${CMAKE_CURRENT_BINARY_DIR}/wheel/npu_bridge - && cd ${CMAKE_CURRENT_BINARY_DIR}/wheel + && cd ${CMAKE_CURRENT_BINARY_DIR}/wheel && ${HI_PYTHON} setup.py bdist_wheel >/dev/null && cp -f dist/npu_bridge-1.15.0-py3-none-any.whl ${CMAKE_CURRENT_BINARY_DIR}/ && echo "package whl end" diff --git a/configure.py b/configure.py index 62f8b8be9..65b95c495 100755 --- a/configure.py +++ b/configure.py @@ -22,10 +22,11 @@ from __future__ import print_function import os import subprocess import sys + try: - from shutil import which + from shutil import which except ImportError: - from distutils.spawn import find_executable as which + from distutils.spawn import find_executable as which _COMPAT_TENSORFLOW_VERSION = "1.15.0" _COMPAT_PYTHON_VERSION = "Python 3.7" @@ -34,176 +35,183 @@ _ASCEND_INSTALL_PATH_ENV = "ASCEND_CUSTOM_PATH" _OPEN_UT = "OPEN_UT" - def run_command(cmd): - output = subprocess.check_output(cmd) - return output.decode('UTF-8').strip() + output = subprocess.check_output(cmd) + return output.decode('UTF-8').strip() def get_input(question): - try: try: - answer = raw_input(question) - except NameError: - answer = input(question) - except EOFError: - answer = '' - return answer + try: + answer = raw_input(question) + except NameError: + answer = input(question) + except EOFError: + answer = '' + return answer + def real_config_path(file): - return os.path.join("tools", file) + return os.path.join("tools", file) + def setup_python(): - """Get python install path.""" - default_python_bin_path = which('python3') - custom_python_bin_path = '' - ask_python_bin_path = '' - if default_python_bin_path: - custom_python_bin_path = default_python_bin_path - compile_args = run_command([ - custom_python_bin_path, '--version']) - if not _COMPAT_PYTHON_VERSION in compile_args: - print('Invalid default python version: %s, only support Python 3.7.' % compile_args) - ask_python_bin_path = ('Please specify the location of python with valid ' - 'tensorflow 1.15.0 site-packages installed. [Default ' - 'is %s]\n(Please enter the correct python path: ') % default_python_bin_path - custom_python_bin_path = '' - else: - ask_python_bin_path = ('Please specify the location of python with valid ' - 'tensorflow 1.15.0 site-packages installed. [Default ' - 'is %s]\n(Please enter the correct python path: ') % default_python_bin_path - - while True: - if not custom_python_bin_path: - python_bin_path = get_input(ask_python_bin_path) - else: - python_bin_path = custom_python_bin_path - custom_python_bin_path = None - if not python_bin_path: - python_bin_path = default_python_bin_path - pass - # Check if the path is valid - if os.path.isfile(python_bin_path) and os.access(python_bin_path, os.X_OK): - pass - elif not os.path.exists(python_bin_path): - print('Invalid python path: %s cannot be found.' % python_bin_path) - continue + """Get python install path.""" + default_python_bin_path = which('python3') + custom_python_bin_path = '' + ask_python_bin_path = '' + if default_python_bin_path: + custom_python_bin_path = default_python_bin_path + compile_args = run_command([ + custom_python_bin_path, '--version']) + if not _COMPAT_PYTHON_VERSION in compile_args: + print('Invalid default python version: %s, only support Python 3.7.' % compile_args) + ask_python_bin_path = ('Please specify the location of python with valid ' + 'tensorflow 1.15.0 site-packages installed. [Default ' + 'is %s]\n(Please enter the correct python path: ') % default_python_bin_path + custom_python_bin_path = '' else: - print('%s is not executable. Is it the python binary?' % python_bin_path) - continue - - try: - compile_args = run_command([ - python_bin_path, '-c', - 'import distutils.sysconfig; import tensorflow as tf; print(tf.__version__ + "|" + tf.sysconfig.get_lib(' - ') + "|" + "|".join(tf.sysconfig.get_compile_flags()) + "|" + distutils.sysconfig.get_python_inc())' - ]).split("|") - if not compile_args[0].startswith(_COMPAT_TENSORFLOW_VERSION): - print('Invalid python path: %s compat tensorflow version is %s' - ' got %s.' % (python_bin_path, _COMPAT_TENSORFLOW_VERSION, - compile_args[0])) - continue - except subprocess.CalledProcessError: - print('Invalid python path: %s tensorflow not installed.' % - python_bin_path) - continue - # Write tools/python_bin_path.sh - with open(real_config_path('PYTHON_BIN_PATH'), 'w') as f: - f.write(python_bin_path) - with open(real_config_path('COMPILE_FLAGS'), 'w') as f: - for flag in compile_args[2:-1]: - f.write("".join([flag , '\n'])) - f.write("".join(["-I" , compile_args[-1] , '\n'])) - print('tensorflow path: %s.' % compile_args[1]) - with open(real_config_path('LINK_FLAGS'), 'w') as f: - f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n')) - f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n')) - with open(real_config_path('UT_LINK_FLAGS'), 'w') as f: - f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n')) - f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n')) - with open(real_config_path('ST_LINK_FLAGS'), 'w') as f: - f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n')) - f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n')) - break + ask_python_bin_path = ('Please specify the location of python with valid ' + 'tensorflow 1.15.0 site-packages installed. [Default ' + 'is %s]\n(Please enter the correct python path: ') % default_python_bin_path + + while True: + if not custom_python_bin_path: + python_bin_path = get_input(ask_python_bin_path) + else: + python_bin_path = custom_python_bin_path + custom_python_bin_path = None + if not python_bin_path: + python_bin_path = default_python_bin_path + pass + # Check if the path is valid + if os.path.isfile(python_bin_path) and os.access(python_bin_path, os.X_OK): + pass + elif not os.path.exists(python_bin_path): + print('Invalid python path: %s cannot be found.' % python_bin_path) + continue + else: + print('%s is not executable. Is it the python binary?' % python_bin_path) + continue + + try: + compile_args = run_command([ + python_bin_path, '-c', + 'import distutils.sysconfig; import tensorflow as tf; print(tf.__version__ + "|" + tf.sysconfig.get_lib(' + ') + "|" + "|".join(tf.sysconfig.get_compile_flags()) + "|" + distutils.sysconfig.get_python_inc())' + ]).split("|") + if not compile_args[0].startswith(_COMPAT_TENSORFLOW_VERSION): + print('Invalid python path: %s compat tensorflow version is %s' + ' got %s.' % (python_bin_path, _COMPAT_TENSORFLOW_VERSION, + compile_args[0])) + continue + except subprocess.CalledProcessError: + print('Invalid python path: %s tensorflow not installed.' % + python_bin_path) + continue + # Write tools/python_bin_path.sh + with open(real_config_path('PYTHON_BIN_PATH'), 'w') as f: + f.write(python_bin_path) + with open(real_config_path('COMPILE_FLAGS'), 'w') as f: + for flag in compile_args[2:-1]: + f.write("".join([flag, '\n'])) + f.write("".join(["-I", compile_args[-1], '\n'])) + print('tensorflow path: %s.' % compile_args[1]) + with open(real_config_path('LINK_FLAGS'), 'w') as f: + f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n')) + f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n')) + with open(real_config_path('UT_LINK_FLAGS'), 'w') as f: + f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n')) + f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n')) + with open(real_config_path('ST_LINK_FLAGS'), 'w') as f: + f.write(os.path.join(compile_args[1], 'libtensorflow_framework.so.1\n')) + f.write(os.path.join(compile_args[1], 'python', '_pywrap_tensorflow_internal.so\n')) + break def setup_ascend(env_path): - """Get ascend install path.""" - default_ascend_path = os.path.realpath("/usr/local/Ascend") - custom_ascend_path = env_path - while True: - if not custom_ascend_path: - ascend_path = default_ascend_path - else: - ascend_path = custom_ascend_path - # Check if the path is valid - if os.path.isdir(ascend_path) and os.access(ascend_path, os.X_OK): - break - elif not os.path.exists(ascend_path): - print('Invalid ascend path: %s cannot be found.' % ascend_path) - print('ascend path: %s.' % ascend_path) - with open(real_config_path('LINK_FLAGS'), 'a') as f: - if 'ALL_IN_ONE_ENABLE' in os.environ: - f.write(os.path.join(ascend_path, "compiler", "lib64", "libge_runner.so\n")) - f.write(os.path.join(ascend_path, "compiler", "lib64", "libfmk_parser.so\n")) - f.write(os.path.join(ascend_path, "compiler", "lib64", "libfmk_onnx_parser.so\n")) - f.write(os.path.join(ascend_path, "compiler", "lib64", "libdatatransfer.so\n")) - f.write(os.path.join(ascend_path, "compiler", "lib64", "libindextransform.so\n")) - f.write(os.path.join(ascend_path, "compiler", "lib64", "libalog.so\n")) - else: - f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libge_runner.so\n")) - f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libfmk_parser.so\n")) - f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libfmk_onnx_parser.so\n")) - f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libdatatransfer.so\n")) - f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libindextransform.so\n")) - f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libalog.so\n")) + """Get ascend install path.""" + default_ascend_path = os.path.realpath("/usr/local/Ascend") + custom_ascend_path = env_path + while True: + if not custom_ascend_path: + ascend_path = default_ascend_path + else: + ascend_path = custom_ascend_path + # Check if the path is valid + if os.path.isdir(ascend_path) and os.access(ascend_path, os.X_OK): + break + elif not os.path.exists(ascend_path): + print('Invalid ascend path: %s cannot be found.' % ascend_path) + print('ascend path: %s.' % ascend_path) + with open(real_config_path('LINK_FLAGS'), 'a') as f: + if 'ALL_IN_ONE_ENABLE' in os.environ: + f.write(os.path.join(ascend_path, "compiler", "lib64", "libge_runner.so\n")) + f.write(os.path.join(ascend_path, "compiler", "lib64", "libfmk_parser.so\n")) + f.write(os.path.join(ascend_path, "compiler", "lib64", "libfmk_onnx_parser.so\n")) + f.write(os.path.join(ascend_path, "compiler", "lib64", "libdatatransfer.so\n")) + f.write(os.path.join(ascend_path, "compiler", "lib64", "libindextransform.so\n")) + f.write(os.path.join(ascend_path, "runtime", "lib64", "libascendcl.so\n")) + f.write(os.path.join(ascend_path, "compiler", "lib64", "libalog.so\n")) + f.write(os.path.join(ascend_path, "compiler", "lib64", "libacl_tdt_channel.so\n")) + else: + f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libge_runner.so\n")) + f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libfmk_parser.so\n")) + f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libfmk_onnx_parser.so\n")) + f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libdatatransfer.so\n")) + f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libindextransform.so\n")) + f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libalog.so\n")) + f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libascendcl.so\n")) + f.write(os.path.join(ascend_path, "fwkacllib", "lib64", "libacl_tdt_channel.so\n")) + def setup_swig(): - """Get swig install path.""" - default_swig_path = which('swig') - custom_swig_path = '' - ask_swig_path = '' - if default_swig_path: - custom_swig_path = default_swig_path - compile_args = run_command([ - custom_swig_path, '-version']) - if not _COMPAT_SWIG_VERSION in compile_args: - print('Invalid default python version: %s.' % compile_args) - ask_swig_path = ('Please specify the location of swig. [Default is ' - '%s]\n(Please enter the correct swig path: ') % default_swig_path - custom_swig_path = '' - else: - ask_swig_path = ('Please specify the location of swig. [Default is ' - '%s]\n(Please enter the correct swig path: ') % default_swig_path - - while True: - if not custom_swig_path: - swig_path = get_input(ask_swig_path) - else: - swig_path = custom_swig_path - custom_swig_path = None - if not swig_path: - swig_path = default_swig_path - # Check if the path is valid - if os.path.isfile(swig_path) and os.access(swig_path, os.X_OK): - break - elif not os.path.exists(swig_path): - print('Invalid swig path: %s cannot be found.' % swig_path) - continue + """Get swig install path.""" + default_swig_path = which('swig') + custom_swig_path = '' + ask_swig_path = '' + if default_swig_path: + custom_swig_path = default_swig_path + compile_args = run_command([ + custom_swig_path, '-version']) + if not _COMPAT_SWIG_VERSION in compile_args: + print('Invalid default python version: %s.' % compile_args) + ask_swig_path = ('Please specify the location of swig. [Default is ' + '%s]\n(Please enter the correct swig path: ') % default_swig_path + custom_swig_path = '' else: - print('%s is not executable. Is it the swig binary?' % swig_path) - continue + ask_swig_path = ('Please specify the location of swig. [Default is ' + '%s]\n(Please enter the correct swig path: ') % default_swig_path + + while True: + if not custom_swig_path: + swig_path = get_input(ask_swig_path) + else: + swig_path = custom_swig_path + custom_swig_path = None + if not swig_path: + swig_path = default_swig_path + # Check if the path is valid + if os.path.isfile(swig_path) and os.access(swig_path, os.X_OK): + break + elif not os.path.exists(swig_path): + print('Invalid swig path: %s cannot be found.' % swig_path) + continue + else: + print('%s is not executable. Is it the swig binary?' % swig_path) + continue + + with open(real_config_path('SWIG_BIN_PATH'), 'w') as f: + f.write(swig_path) - with open(real_config_path('SWIG_BIN_PATH'), 'w') as f: - f.write(swig_path) def main(): - env_snapshot = dict(os.environ) - setup_python() - if not env_snapshot.get(_OPEN_UT): - setup_ascend(env_snapshot.get(_ASCEND_INSTALL_PATH_ENV)) - setup_swig() + env_snapshot = dict(os.environ) + setup_python() + if not env_snapshot.get(_OPEN_UT): + setup_ascend(env_snapshot.get(_ASCEND_INSTALL_PATH_ENV)) + setup_swig() if __name__ == '__main__': - main() + main() diff --git a/tf_adapter/BUILD b/tf_adapter/BUILD index 05a146b5e..6d817a4f1 100644 --- a/tf_adapter/BUILD +++ b/tf_adapter/BUILD @@ -33,9 +33,9 @@ cc_binary( linkopts = [] + select({ # Public introduction of external dependencies on project. # External linked libraries, typically, located in out/${product}/host/obj/lib - ":cloud_build": ["-Lexternal/tf_adapter_cloud_host_libs/ -lc_sec -lge_runner -ltsdclient -ldatatransfer -lfmk_parser -lfmk_onnx_parser -lindextransform"], - ":mini_build": ["-Lexternal/tf_adapter_mini_host_libs/ -lc_sec -lge_runner -ltsdclient -ldatatransfer -lfmk_parser -lfmk_onnx_parser -lindextransform",], - ":onetrack_build": ["-Lexternal/tf_adapter_onetrack_host_libs/ -lc_sec -lge_runner -ltsdclient -ldatatransfer -lfmk_parser -lfmk_onnx_parser -lindextransform",], + ":cloud_build": ["-Lexternal/tf_adapter_cloud_host_libs/ -lc_sec -lge_runner -lascendcl -lfmk_parser -lfmk_onnx_parser -lindextransform"], + ":mini_build": ["-Lexternal/tf_adapter_mini_host_libs/ -lc_sec -lge_runner -lascendcl -lfmk_parser -lfmk_onnx_parser -lindextransform",], + ":onetrack_build": ["-Lexternal/tf_adapter_onetrack_host_libs/ -lc_sec -lge_runner -lascendcl -lfmk_parser -lfmk_onnx_parser -lindextransform",], "//conditions:default": [], }) + [ # "-z defs", diff --git a/tf_adapter/common/common.h b/tf_adapter/common/common.h index 35b0c6ed6..5cb93b007 100644 --- a/tf_adapter/common/common.h +++ b/tf_adapter/common/common.h @@ -31,11 +31,18 @@ if ((v) == nullptr) { \ ADP_LOG(ERROR) << #v " is nullptr."; \ LOG(ERROR) << #v " is nullptr."; \ - return errors::InvalidArgument(#v " is nullptr."); \ + return errors::Internal(#v " is nullptr."); \ } #define REQUIRES_STATUS_OK(s) \ if (!s.ok()) { return s; } #define ADAPTER_ENV_MAX_LENTH 1024 * 1024 + +#define ADAPTER_LOG_IF_ERROR(...) \ + do { \ + const ::tensorflow::Status _status = (__VA_ARGS__); \ + if (TF_PREDICT_FALSE(!_status.ok())) LOG(INFO) << _status.ToString(); \ + } while (0) + #endif // TENSORFLOW_COMMON_COMMON_H_ diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 687a1f5b5..fad63acdf 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -809,7 +809,7 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { return; } int64 run_end_time = InferShapeUtil::GetCurrentTimestap(); - ADP_LOG(INFO) << "[GEOP] RunGraphAsync callback, status:" << ge_status << ", kernel_name:" + ADP_LOG(EVENT) << "[GEOP] RunGraphAsync callback, status:" << ge_status << ", kernel_name:" << ctx->op_kernel().name() << "[ " << (run_end_time - run_start_time) << "us]"; done(); }; @@ -839,6 +839,41 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { return; } +void GeOp::ChangeChannelNameAttr(NodeDef &node_def) { + std::hash hash_channel_name; + const std::string pre_channel_name = node_def.attr().at("channel_name").s(); + uint32_t device_id = 0; + (void)GetEnvDeviceID(device_id); + AttrValue channel_name = AttrValue(); + channel_name.set_s(std::to_string(hash_channel_name(tf_session_ + pre_channel_name + + "_device_" + std::to_string(device_id)))); + (*node_def.mutable_attr())["channel_name"] = channel_name; + ADP_LOG(INFO) << "[GEOP] changed the value of channel_name attr of node:" << node_def.name() << " to " << channel_name.s(); +} + +void GeOp::ProcessDpOpFuncDef(Node *node) { + const std::string func_name = node->def().attr().at("function").func().name(); + const std::string org_func_def_lib = node->def().attr().at("func_def").s(); + FunctionDefLibrary func_def_lib; + func_def_lib.ParseFromString(org_func_def_lib); + for (auto &func_def : *func_def_lib.mutable_function()) { + if (func_def.signature().name() == func_name) { + for (auto &node_def : *func_def.mutable_node_def()) { + if (node_def.op() == "IteratorV2" || node_def.op() == "Iterator") { + NpuAttrs::SetDatasetExecuteInDeviceStatus(tf_session_ + node_def.name(), true); + } + if (node_def.op() == "DeviceQueueDataset") { ChangeChannelNameAttr(node_def); } + } + } + } + std::string new_func_def_lib; + func_def_lib.SerializeToString(&new_func_def_lib); + AttrValue func_def_value = AttrValue(); + func_def_value.set_s(new_func_def_lib); + NodeDef &node_def = const_cast(node->def()); + (*node_def.mutable_attr())["func_def"] = func_def_value; +} + void GeOp::AddNodeAttrs(Node *node, bool &is_initialize) { // Add dp custom kernel label if (node->type_string() == "IteratorGetNext") { @@ -876,6 +911,68 @@ void GeOp::AddNodeAttrs(Node *node, bool &is_initialize) { } } +void GeOp::HandleDpOpAndGetNextNodes(Graph &graph) { + std::vector remove_nodes; + for (Node *node : graph.nodes()) { + CHECK_NOT_NULL(node); + if (node->type_string() == "DPOP") { + ProcessDpOpFuncDef(node); + } else if (node->type_string() == "IteratorGetNext") { + Node *iterator_node = nullptr; + std::string iterator_name; + NodeDef &node_def = const_cast(node->def()); + for (auto in_edge : node->in_edges()) { + CHECK_NOT_NULL(in_edge); + CHECK_NOT_NULL(in_edge->src()); + if (in_edge->src()->type_string() == "IteratorV2" || in_edge->src()->type_string() == "Iterator") { + iterator_name = in_edge->src()->name(); + iterator_node = in_edge->src(); + } + } + if (dynamic_input_ == "1" && NpuAttrs::IsDatasetExecuteInDevice(tf_session_ + iterator_name)) { + node_def.set_op("DynamicGetNext"); + } + if (dynamic_input_ == "1" && dynamic_graph_execute_mode_ == "lazy_recompile") { + graph_options_["ge.exec.enableCopyOutputAddr"] = "1"; + } + if (!NpuAttrs::IsDatasetExecuteInDevice(tf_session_ + iterator_name)) { + uint32_t device_id = 0; + (void)GetEnvDeviceID(device_id); + Node *aicpu_getnext = nullptr; + std::string aicpu_getnext_name = "aicpu_getnext_" + node->name(); + auto getnext_attrs = node->def().attr(); + std::hash hash_channel_name; + std::string channel_name = std::to_string(hash_channel_name(tf_session_ + iterator_name + + "_device_" + std::to_string(device_id))); + std::string aicpu_getnext_type = dynamic_input_ == "1" ? "DynamicGetNext" : "GetNext"; + TF_CHECK_OK(NodeBuilder(aicpu_getnext_name, aicpu_getnext_type) + .Device(node->def().device()) + .Attr("channel_name", channel_name) + .Attr("output_types", getnext_attrs["output_types"]) + .Attr("output_shapes", getnext_attrs["output_shapes"]) + .Finalize(&graph, &aicpu_getnext)); + for (auto out_edge : node->out_edges()) { + CHECK_NOT_NULL(out_edge); + graph.AddEdge(aicpu_getnext, out_edge->src_output(), out_edge->dst(), out_edge->dst_input()); + } + const OpDef &getnext_op_def = aicpu_getnext->op_def(); + NodeDef &node_def = const_cast(aicpu_getnext->def()); + std::string op_def_s; + getnext_op_def.SerializeToString(&op_def_s); + tensorflow::AttrValue value; + value.set_s(op_def_s); + node_def.mutable_attr()->insert({"op_def", value}); + remove_nodes.push_back(node); + remove_nodes.push_back(iterator_node); + } + } + } + for (Node *node : remove_nodes) { + ADP_LOG(INFO) << "[GEOP] Remove node:" << node->name(); + graph.RemoveNode(node); + } +} + // Build GraphDef from FunctionDef. Status GeOp::BuildGraphDef(FunctionLibraryDefinition &flib_def, const std::vector &input_vec, GraphDef &graph_def, bool &is_initialize) { @@ -947,6 +1044,7 @@ Status GeOp::BuildGraphDef(FunctionLibraryDefinition &flib_def, return ret; } } + HandleDpOpAndGetNextNodes(graph); graph.ToGraphDef(&graph_def); char *enable_force_v2_control = getenv("ENABLE_FORCE_V2_CONTROL"); if (enable_force_v2_control != nullptr && strcmp("1", enable_force_v2_control) == 0) { @@ -1216,10 +1314,6 @@ Status GeOp::GenerateDesc(Node *&node) { REQUIRES_NOT_NULL(node); NodeDef &node_def = const_cast(node->def()); const OpDef &op_def = node->op_def(); - if (dynamic_input_ == "1" && node->type_string() == "IteratorGetNext") { - node_def.set_op("DynamicGetNext"); - if (dynamic_graph_execute_mode_ == "lazy_recompile") { graph_options_["ge.exec.enableCopyOutputAddr"] = "1"; } - } std::string format = this->data_format_; // format int32_t domi_format = domi::domiTensorFormat_t::DOMI_TENSOR_RESERVED; diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h index f6c1d5d75..bbddc40c9 100644 --- a/tf_adapter/kernels/geop_npu.h +++ b/tf_adapter/kernels/geop_npu.h @@ -97,6 +97,12 @@ class GeOp : public AsyncOpKernel { void AnalyzeInputDesc(void *tensor_ptr, ge::Tensor &input, ge::DataType type, std::vector &input_shapes); + void ProcessDpOpFuncDef(Node *node); + + void HandleDpOpAndGetNextNodes(Graph &graph); + + void ChangeChannelNameAttr(NodeDef &node_def); + private: static const std::string INPUT_DESC; static const std::string OUTPUT_DESC; diff --git a/tf_adapter/kernels/host_queue_dataset_op.cc b/tf_adapter/kernels/host_queue_dataset_op.cc index 94c90bfd9..4dd84e1c3 100644 --- a/tf_adapter/kernels/host_queue_dataset_op.cc +++ b/tf_adapter/kernels/host_queue_dataset_op.cc @@ -1,19 +1,32 @@ -/* - * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Copyright (C) 2019-2020. Huawei Technologies Co., Ltd. All rights reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "acl/acl_tdt.h" +#include "acl/acl.h" +#include "acl/error_codes/rt_error_codes.h" #include "tdt/tdt_host_interface.h" #include "tensorflow/core/framework/partial_tensor_shape.h" #include "tensorflow/core/framework/stats_aggregator.h" @@ -28,20 +41,31 @@ #include "tf_adapter/common/adp_logger.h" #include "tf_adapter/common/common.h" #include "tf_adapter/kernels/data_item_deliver.h" +#include "tf_adapter/kernels/threads_pool.h" +#include "tf_adapter/util/acl_channel.h" #include "tf_adapter/util/npu_attrs.h" #include #include +#include #include - #include "unistd.h" - namespace tensorflow { namespace data { namespace { using namespace std; using namespace tdt; +inline bool VersionFlag(){ + char *is_new = getenv("IS_NEW"); + if (is_new != nullptr && strcmp("1", is_new) == 0) { + return true; + } +// for test wait for drv code + return true; +} -const static uint32_t kMaxValue = 128; +const static uint32_t kMaxValue = 128U; +const static uint32_t kMaxShape = 2048U; +const static uint32_t kUnknowshapeDepth = 3U; // total memory usage controlled below 2G const uint64_t kTotalBytes = 2147483648; std::atomic tdt_release(false); @@ -58,96 +82,132 @@ class HostQueueDatasetOp : public DatasetOpKernel { OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("_local_rank_id", &tmp_rank_id)); OP_REQUIRES_OK(ctx, ctx->GetAttr("_local_device_list", &tmp_device_list)); - ADP_LOG(INFO) << "Get local rank id:" << tmp_rank_id << ", local device list:" << tmp_device_list; + ADP_LOG(INFO) << "Get local rank id:" << tmp_rank_id + << ", local device list:" << tmp_device_list; // local rank id range 0-7 local_rank_id_ = std::atoi(tmp_rank_id.c_str()); for (size_t i = 0; i < tmp_device_list.size(); i += 2) { int device_id = std::atoi(&tmp_device_list[i]); - OP_REQUIRES(ctx, device_id >= 0, errors::InvalidArgument("device id should be >= 0.")); + OP_REQUIRES(ctx, device_id >= 0, + errors::InvalidArgument("device id should be >= 0.")); local_device_list_.push_back(device_id); } - - ADP_LOG(INFO) << "Start to init tdt."; + ADP_LOG(INFO) << "Start to init channel."; uint32_t device_id = 0; OP_REQUIRES_OK(ctx, GetEnvDeviceID(device_id)); device_id_ = device_id; - int32_t tdt_status = TdtInFeedInit(device_id_); - OP_REQUIRES(ctx, tdt_status == 0, - errors::InvalidArgument("Tdt client init failed.")); - ADP_LOG(INFO) << "Init tdt host success."; + if (!VersionFlag()) { + int32_t tdt_status = TdtInFeedInit(device_id_); + OP_REQUIRES(ctx, tdt_status == 0, + errors::InvalidArgument("Tdt client init failed.")); + ADP_LOG(INFO) << "Init tdt host success."; + } tdt_release = false; } ~HostQueueDatasetOp() { ADP_LOG(INFO) << "Start to destroy tdt."; - if (!tdt_release) { - int32_t tdt_status = TdtInFeedDestroy(device_id_); - if (tdt_status != 0) { - ADP_LOG(ERROR) << "Tdt client close failed."; - LOG(ERROR) << "Tdt client close failed."; - } else { - ADP_LOG(INFO) << "Tdt client close success."; - tdt_release = true; - NpuAttrs::SetUseTdtStatus(device_id_, false); - } + if(!VersionFlag()){ + if (!tdt_release) { + int32_t tdt_status = TdtInFeedDestroy(device_id_); + if (tdt_status != 0) { + ADP_LOG(ERROR) << "Tdt client close failed."; + LOG(ERROR) << "Tdt client close failed."; + } else { + ADP_LOG(INFO) << "Tdt client close success."; + tdt_release = true; + NpuAttrs::SetUseTdtStatus(device_id_, false); + } + } } } void MakeDataset(OpKernelContext *ctx, DatasetBase **output) override { std::vector inputs; + tf_session_ = ctx->session_handle(); CHECK_NOT_NULL(output); for (int i = 0; i < ctx->num_inputs(); ++i) { DatasetBase *input = nullptr; OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(i), &input)); inputs.push_back(input); } - *output = new (nothrow) Dataset(ctx, inputs, channel_name_, output_types_, output_shapes_, - local_rank_id_, local_device_list_, device_id_); + *output = new (nothrow) Dataset( + ctx, inputs, channel_name_, output_types_, output_shapes_, + local_rank_id_, local_device_list_, device_id_, tf_session_); OP_REQUIRES(ctx, *output != nullptr, - errors::InvalidArgument("Data process host queue dataset op: new dataset failed.")); + errors::InvalidArgument( + "Data process host queue dataset op: new dataset failed.")); } private: class Dataset : public DatasetBase { - public: - Dataset(OpKernelContext *ctx, const std::vector &inputs, const string &channelName, - const DataTypeVector &outputTypes, const vector &outputShapes, - const int &local_rank_id, const std::vector &local_device_list, - const uint32_t &device_id) - : DatasetBase(DatasetContext(ctx)), inputs_(inputs), channel_name_(channelName), output_types_(outputTypes), - output_shapes_(outputShapes), local_rank_id_(local_rank_id), local_device_list_(local_device_list), - device_id_(device_id) { - for (const auto &input : inputs_) { input->Ref(); } + public: + Dataset(OpKernelContext *ctx, const std::vector &inputs, + const string &channelName, const DataTypeVector &outputTypes, + const vector &outputShapes, + const int &local_rank_id, + const std::vector &local_device_list, + const uint32_t &device_id, + const string &tf_session) + : DatasetBase(DatasetContext(ctx)), + inputs_(inputs), + channel_name_(channelName), + output_types_(outputTypes), + output_shapes_(outputShapes), + local_rank_id_(local_rank_id), + local_device_list_(local_device_list), + device_id_(device_id), + tf_session_(tf_session) { + for (const auto &input : inputs_) { + input->Ref(); + } } ~Dataset() override { - for (const auto &input : inputs_) { input->Unref(); } + for (const auto &input : inputs_) { + input->Unref(); + } } - unique_ptr MakeIteratorInternal(const string &prefix) const override { - return unique_ptr(new (nothrow) Iterator({this, strings::StrCat(prefix, "::HostQueue")})); + unique_ptr MakeIteratorInternal( + const string &prefix) const override { + return unique_ptr(new (nothrow) Iterator( + {this, strings::StrCat(prefix, "::HostQueue")})); } - const DataTypeVector &output_dtypes() const override { return output_types_; } - const vector &output_shapes() const override { return output_shapes_; } + const DataTypeVector &output_dtypes() const override { + return output_types_; + } + const vector &output_shapes() const override { + return output_shapes_; + } - string DebugString() const override { return "HostQueueDatasetOp::Dataset"; } + string DebugString() const override { + return "HostQueueDatasetOp::Dataset"; + } protected: - Status AsGraphDefInternal(SerializationContext *ctx, DatasetGraphDefBuilder *b, Node **output) const override { + Status AsGraphDefInternal(SerializationContext *ctx, + DatasetGraphDefBuilder *b, + Node **output) const override { return Status::OK(); } private: class Iterator : public DatasetIterator { public: - explicit Iterator(const Params ¶ms) : DatasetIterator(params) { - data_deliver_ = new DataItemDeliver( - dataset()->local_rank_id_, dataset()->device_id_, - dataset()->local_device_list_, dataset()->channel_name_); + explicit Iterator(const Params ¶ms) + : DatasetIterator(params) { + if (!VersionFlag()) { + data_deliver_ = new DataItemDeliver( + dataset()->local_rank_id_, dataset()->device_id_, + dataset()->local_device_list_, dataset()->channel_name_); + } } ~Iterator() override { - std::vector stop_message; - data_deliver_->ParallelSendDataVec(stop_message); + if (!VersionFlag()){ + std::vector stop_message; + data_deliver_->ParallelSendDataVec(stop_message); + } { mutex_lock lck(mu_); finish_send_ = true; @@ -159,7 +219,39 @@ class HostQueueDatasetOp : public DatasetOpKernel { cancelled_ = true; cond_var_.notify_all(); } - delete data_deliver_; + if (!VersionFlag()){ + delete data_deliver_; + } + if (VersionFlag()) { + if (!tdt_release && dataset()->local_rank_id_ == 0) { + std::vector> acl_status; + int index_handle = 0; + for (auto device_id : dataset()->local_device_list_) { + acl_status.emplace_back(dataset()->pools_->Enqueue( + acltdtDestroyChannel, acl_handles_[index_handle])); + index_handle++; + } + for (auto &result : acl_status) { + if (result.get() != ACL_ERROR_NONE) { + ADP_LOG(ERROR) << "Queue desrory failed."; + } + } + ADP_LOG(INFO) << "Queue desrory all host success."; + tdt_release = true; + } else if (!tdt_release && dataset()->local_rank_id_ == -1) { + ADP_LOG(INFO) << "Start to destroy channel."; + aclError acl_status = acltdtDestroyChannel(acl_handle_); + if (acl_status != ACL_ERROR_NONE) { + ADP_LOG(ERROR) << "Queue desrory failed."; + } else { + ADP_LOG(INFO) << "Queue desrory all host success."; + tdt_release = true; + } + } else { + ADP_LOG(INFO) << "Queue do not destroy in slave."; + tdt_release = true; + } + } ADP_LOG(INFO) << "HostQueueDatasetOp's iterator is released."; } @@ -169,13 +261,16 @@ class HostQueueDatasetOp : public DatasetOpKernel { while (true) { { mutex_lock lck(mu_); - while (!cancelled_ && (buffer_.size() >= kMaxValue || total_bytes_ > kTotalBytes)) { + while (!cancelled_ && (buffer_.size() >= kMaxValue || + total_bytes_ > kTotalBytes)) { RecordStop(ctx.get()); cond_var_.wait(lck); RecordStart(ctx.get()); } - if (cancelled_) { return; } + if (cancelled_) { + return; + } } mutex_lock parent_l(parent_mu_); @@ -186,14 +281,17 @@ class HostQueueDatasetOp : public DatasetOpKernel { ADP_LOG(INFO) << "Do not need to GetNext."; return; } else { - buffer_element.status = input_impls_[1]->GetNext(ctx.get(), &args, &end_of_sequence); + buffer_element.status = + input_impls_[1]->GetNext(ctx.get(), &args, &end_of_sequence); } - if (!buffer_element.status.ok() || (buffer_element.status.ok() && end_of_sequence)) { + if (!buffer_element.status.ok() || + (buffer_element.status.ok() && end_of_sequence)) { if (!buffer_element.status.ok()) { - ADP_LOG(ERROR) << "Failed to get tensor data, Status:" << buffer_element.status.ToString(); - LOG(ERROR) << "Failed to get tensor data, Status:" << buffer_element.status.ToString(); + ADP_LOG(ERROR) << "Failed to get tensor data, Status:" + << buffer_element.status.ToString(); } else { - ADP_LOG(INFO) << "Finish to get tensor data, Status:" << buffer_element.status.ToString() + ADP_LOG(INFO) << "Finish to get tensor data, Status:" + << buffer_element.status.ToString() << "; end_of_sequence:" << end_of_sequence; } mutex_lock lck(mu_); @@ -240,6 +338,169 @@ class HostQueueDatasetOp : public DatasetOpKernel { } ADP_LOG(INFO) << "Slave SendDataThread exit."; } + + void PushDataFront(const vector &args) { + mutex_lock lck(mu_); + BufferElement buffer_element; + buffer_element.status = Status::OK(); + buffer_element.host_thread_finished = false; + buffer_element.value = args; + buffer_.push_front(buffer_element); + cond_var_.notify_all(); + } + + void QueryThreadStatus(const std::shared_ptr &ctx) { + mutex_lock lck(mu_); + while (!cancelled_ && !finish_send_ && buffer_.empty()) { + RecordStop(ctx.get()); + cond_var_.wait(lck); + RecordStart(ctx.get()); + } + if (cancelled_ || finish_send_) { + ADP_LOG(INFO) << "Host queue " << dataset()->channel_name_ + << " push data thread exit with cancelled: " + << cancelled_ << ", finished:" << finish_send_ + << " when wait data."; + return; + } + } + + void SendMultiEndData() { + std::vector> status; + for (auto handle : acl_handles_) { + for (auto &tensor : buffer_.front().value) { + } + status.emplace_back(dataset()->pools_->Enqueue( + SendTensorsByAcl, handle, + (buffer_.front().status.ok() ? ACL_TENSOR_DATA_END_OF_SEQUENCE + : ACL_TENSOR_DATA_ABNORMAL), + buffer_.front().value)); + } + for (auto &result : status) { + if (result.get() != ACL_RT_SUCCESS) { + ADP_LOG(INFO) << "End training as host push end data failed."; + } + } + } + // When calling SendTensorsByAcl and its'return is the queue is full or + // empty (actually no event, drv wants us to treat it as a no event, + // because they cannot return no evnet code , only empty). The above 2 + // cases , we need to push data into dequeue to sent again. (include multi + // and single ) + void SendMultiData(const vector &args) { + uint64_t total_bytes = 0; + for (auto &tensor : args) { + total_bytes += tensor.TotalBytes(); + } + std::map> status_map; + for (auto acl_handle : acl_handles_) { + status_map.insert({acl_handle, dataset()->pools_->Enqueue( + SendTensorsByAcl, acl_handle, ACL_TENSOR_DATA_TENSOR, args)}); + } + bool is_send_success = false; + while (!is_send_success) { + is_send_success = true; + for (auto iter = status_map.begin(); iter != status_map.end();) { + aclError tmp_code = iter->second.get(); + if (tmp_code == ACL_RT_SUCCESS) { + status_map.erase(iter++); + } else if (tmp_code == ACL_ERROR_RT_QUEUE_EMPTY || + tmp_code == ACL_ERROR_RT_QUEUE_FULL) { + sleep(1); + iter->second = dataset()->pools_->Enqueue( + SendTensorsByAcl, iter->first, ACL_TENSOR_DATA_TENSOR, args); + is_send_success = false; + iter++; + } else { + ADP_LOG(INFO) << "End training and host push data finished."; + mutex_lock lck(mu_); + cancelled_ = true; + cond_var_.notify_all(); + return; + } + } + } + { + mutex_lock lck(mu_); + total_bytes_ -= total_bytes; + cond_var_.notify_all(); + } + } + + void SendSingleData(const vector &args) { + uint64_t total_bytes = 0; + for (auto &tensor : args) { + total_bytes += tensor.TotalBytes(); + } + aclError status = + SendTensorsByAcl(acl_handle_, ACL_TENSOR_DATA_TENSOR, args); + if (status == ACL_ERROR_RT_QUEUE_EMPTY || + status == ACL_ERROR_RT_QUEUE_FULL) { + sleep(1); + PushDataFront(args); + return; + } + if (status != ACL_RT_SUCCESS) { + mutex_lock lck(mu_); + cancelled_ = true; + cond_var_.notify_all(); + return; + } + { + mutex_lock lck(mu_); + total_bytes_ -= total_bytes; + cond_var_.notify_all(); + } + } + void SendDataThreadForMbuf(const std::shared_ptr &ctx) { + ADP_LOG(INFO) << "Begin to send data."; + vector args; + while (true) { + { + mutex_lock lck(mu_); + while (!cancelled_ && !finish_send_ && buffer_.empty()) { + RecordStop(ctx.get()); + cond_var_.wait(lck); + RecordStart(ctx.get()); + } + if (cancelled_ || finish_send_) { + ADP_LOG(INFO) + << "Host queue " << dataset()->channel_name_ + << " push data thread exit with cancelled: " << cancelled_ + << ", finished:" << finish_send_ << " when wait data."; + return; + } + if (buffer_.front().host_thread_finished) { + if (dataset()->local_rank_id_ == 0) { + SendMultiEndData(); + } else { + aclError status = + SendTensorsByAcl(acl_handle_, + (buffer_.front().status.ok() + ? ACL_TENSOR_DATA_END_OF_SEQUENCE + : ACL_TENSOR_DATA_ABNORMAL), + {}); + if (status != ACL_RT_SUCCESS) { + ADP_LOG(INFO) + << "End training as host push end data failed." << status; + } + } + cancelled_ = true; + cond_var_.notify_all(); + return; + } else { + args = buffer_.front().value; + buffer_.pop_front(); + } + } + if (dataset()->local_rank_id_ == 0) { + SendMultiData(args); + } else { + SendSingleData(args); + } + } + } + void SendDataThread(const std::shared_ptr &ctx) { vector args; while (true) { @@ -344,62 +605,169 @@ class HostQueueDatasetOp : public DatasetOpKernel { } } - Status EnsureReceiveThreadStarted(IteratorContext *ctx) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + Status EnsureReceiveThreadStarted(IteratorContext *ctx) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { // ctx is not nullptr if (!receive_thread_) { - std::shared_ptr new_ctx(new (std::nothrow) IteratorContext(*ctx)); + std::shared_ptr new_ctx(new (std::nothrow) + IteratorContext(*ctx)); REQUIRES_NOT_NULL(new_ctx); REQUIRES_NOT_NULL(ctx->env()); - receive_thread_.reset( - ctx->env()->StartThread({}, "receive_thread", [this, new_ctx]() { GetDataThread(new_ctx); })); + + receive_thread_.reset(ctx->env()->StartThread( + {}, "receive_thread", + [this, new_ctx]() { GetDataThread(new_ctx); })); } return Status::OK(); } - Status EnsureSendThreadStarted(IteratorContext *ctx) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + Status EnsureSendThreadStarted(IteratorContext *ctx) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (!send_thread_) { - std::shared_ptr new_ctx(new (std::nothrow) IteratorContext(*ctx)); + std::shared_ptr new_ctx(new (std::nothrow) + IteratorContext(*ctx)); REQUIRES_NOT_NULL(new_ctx); REQUIRES_NOT_NULL(ctx->env()); - if (dataset()->local_rank_id_ <= 0) { - send_thread_.reset(ctx->env()->StartThread( - {}, "send_thread", - [this, new_ctx]() { SendDataThread(new_ctx); })); + if (!VersionFlag()) { + if (dataset()->local_rank_id_ <= 0) { + send_thread_.reset(ctx->env()->StartThread( + {}, "send_thread", + [this, new_ctx]() { SendDataThread(new_ctx); })); + } else { + send_thread_.reset(ctx->env()->StartThread( + {}, "send_thread", [this]() { SendDataThread(); })); + } } else { send_thread_.reset(ctx->env()->StartThread( - {}, "send_thread", [this]() { SendDataThread(); })); + {}, "send_thread", + [this, new_ctx]() { SendDataThreadForMbuf(new_ctx); })); + } + } + return Status::OK(); + } + bool IsUnkownShape() { + for (auto &out_shape : dataset()->output_shapes_) { + auto tmp_shape = const_cast(out_shape); + if (tmp_shape.unknown_rank()) { + return true; + } + return false; + } + } + + Status CreatMutiChannel(std::hash &hash_channel_name, std::string &channel_name, uint32_t shape_depth) { + ADP_LOG(INFO) << "Start to init all host thread."; + dataset()->pools_ = std::make_shared(); + dataset()->pools_->InitThreadPool(dataset()->local_device_list_.size()); + vector> acl_handles; + for (auto device_id : dataset()->local_device_list_) { + aclError status = aclrtSetDevice(static_cast(device_id)); + if (status != ACL_SUCCESS) { + return errors::InvalidArgument("SetDevice fail"); + } + channel_name = std::to_string(hash_channel_name( + dataset()->tf_session_ + dataset()->channel_name_ + "_device_" + + std::to_string(device_id))); + acl_handles.emplace_back(dataset()->pools_->Enqueue( + acltdtCreateChannelWithCapacity, device_id, channel_name.c_str(), + shape_depth)); + } + for (auto &&handle : acl_handles) { + handle.wait(); + auto tmp_handle = handle.get(); + if (tmp_handle == nullptr) { + ADP_LOG(ERROR) << "Call acltdtCreateChannelWithCapacity failed"; + return errors::InvalidArgument( + "Call acltdtCreateChannelWithCapacity failed"); } + acl_handles_.push_back(tmp_handle); } + ADP_LOG(INFO) << "Init all host thread success."; return Status::OK(); } + Status CreatChannel() { + int32_t out_shape_size = dataset()->output_shapes_.size(); + if(out_shape_size == 0){ + ADP_LOG(ERROR) + << "Out_shape_size is euqal to zero ,can not set channel depth"; + } + uint32_t shape_depth = + IsUnkownShape() ? kUnknowshapeDepth : kMaxShape / out_shape_size; + shape_depth = std::min(shape_depth, 128U); + std::hash hash_channel_name; + std::string channel_name; + if (dataset()->local_rank_id_ == 0) { + return CreatMutiChannel(hash_channel_name, channel_name, shape_depth); + } else if (dataset()->local_rank_id_ == -1) { + ADP_LOG(INFO) << "Start to init one host thread."; + aclError status_sigle = + aclrtSetDevice(static_cast(dataset()->device_id_)); + if (status_sigle != ACL_SUCCESS) { + return errors::InvalidArgument("SetDevice fail"); + } + channel_name = std::to_string(hash_channel_name( + dataset()->tf_session_ + dataset()->channel_name_ + "_device_" + + std::to_string(dataset()->device_id_))); + acl_handle_ = acltdtCreateChannelWithCapacity( + dataset()->device_id_, channel_name.c_str(), shape_depth); + if (acl_handle_ == nullptr) { + ADP_LOG(ERROR) << "Call acltdtCreateChannelWithCapacity failed"; + return errors::InvalidArgument( + "Call acltdtCreateChannelWithCapacity failed"); + } + ADP_LOG(INFO) << "Init tdt one thread success."; + return Status::OK(); + } else { + ADP_LOG(INFO) << "Queue do not init in slave."; + } + } + Status Initialize(IteratorContext *ctx) override { - ADP_LOG(INFO) << "Start to check channel name. channelName: " << dataset()->channel_name_; if (dataset()->channel_name_.empty()) { return errors::InvalidArgument("HostQueueDataset channel_name is null."); } - + Status temp_status = Status::OK(); + if(VersionFlag()) { + temp_status = CreatChannel(); + if(temp_status != Status::OK()){ + return errors::InvalidArgument("Call CreatChannel queue failed"); + } + } ADP_LOG(INFO) << "Start to check receive and send thread."; try { input_impls_.resize(dataset()->inputs_.size()); } catch (...) { return errors::InvalidArgument("HostQueueDataset resize failed."); } for (size_t i = 0; i < input_impls_.size(); ++i) { - TF_RETURN_IF_ERROR( - dataset()->inputs_[i]->MakeIterator(ctx, strings::StrCat(prefix(), "[", i, "]"), &input_impls_[i])); - } - if (dataset()->local_rank_id_ == 0) { - TF_RETURN_IF_ERROR(data_deliver_->ParallelInitSocketClient()); - } else if(dataset()->local_rank_id_ > 0) { - TF_RETURN_IF_ERROR(data_deliver_->InitSocketServer()); + TF_RETURN_IF_ERROR(dataset()->inputs_[i]->MakeIterator( + ctx, strings::StrCat(prefix(), "[", i, "]"), &input_impls_[i])); } + if(!VersionFlag()){ + if (dataset()->local_rank_id_ == 0) { + TF_RETURN_IF_ERROR(data_deliver_->ParallelInitSocketClient()); + } else if(dataset()->local_rank_id_ > 0) { + TF_RETURN_IF_ERROR(data_deliver_->InitSocketServer()); + } + } { mutex_lock lck(mu_); - TF_RETURN_IF_ERROR(EnsureReceiveThreadStarted(ctx)); - TF_RETURN_IF_ERROR(EnsureSendThreadStarted(ctx)); + if(VersionFlag()){ + if(dataset()->local_rank_id_ <= 0) { + TF_RETURN_IF_ERROR(EnsureReceiveThreadStarted(ctx)); + TF_RETURN_IF_ERROR(EnsureSendThreadStarted(ctx)); + } else { + ADP_LOG(INFO) << "HostQueue is not chief, not send data."; + return Status::OK(); + } + } else { + TF_RETURN_IF_ERROR(EnsureReceiveThreadStarted(ctx)); + TF_RETURN_IF_ERROR(EnsureSendThreadStarted(ctx)); + } } - ADP_LOG(INFO) << "HostQueue success to Initialize. channelName: " << dataset()->channel_name_; + ADP_LOG(INFO) << "HostQueue success to Initialize. channelName: " + << dataset()->channel_name_; return Status::OK(); } @@ -435,24 +803,31 @@ class HostQueueDatasetOp : public DatasetOpKernel { bool finish_send_ GUARDED_BY(mu_) = false; bool host_thread_finished_ GUARDED_BY(mu_) = false; uint64_t total_bytes_ GUARDED_BY(mu_) = 0; - // The following two thread must be the first member to be destructed, because tensorflow::Thread does not provide - // an explicit join function. If the thread is destructed after other members, such as buffer_, when the thread - // joins, it will access the already destructed buffer_ , Resulting in an unknown error. + // The following two thread must be the first member to be destructed, + // because tensorflow::Thread does not provide an explicit join function. + // If the thread is destructed after other members, such as buffer_, when + // the thread joins, it will access the already destructed buffer_ , + // Resulting in an unknown error. std::unique_ptr receive_thread_ GUARDED_BY(mu_); std::unique_ptr send_thread_ GUARDED_BY(mu_); DataItemDeliver *data_deliver_; + acltdtChannelHandle* acl_handle_; + std::vector acl_handles_; }; const std::vector inputs_; std::string channel_name_; + std::string tf_session_; const DataTypeVector output_types_; const vector output_shapes_; int local_rank_id_; + mutable std::shared_ptr pools_; std::vector local_device_list_; uint32_t device_id_; }; std::string channel_name_; DataTypeVector output_types_; vector output_shapes_; + std::string tf_session_; int local_rank_id_; std::vector local_device_list_; uint32_t device_id_; diff --git a/tf_adapter/kernels/infeed_outfeed_ops.cc b/tf_adapter/kernels/infeed_outfeed_ops.cc index d355d66e1..97fe25dec 100644 --- a/tf_adapter/kernels/infeed_outfeed_ops.cc +++ b/tf_adapter/kernels/infeed_outfeed_ops.cc @@ -14,74 +14,16 @@ * limitations under the License. */ -#include "securec.h" -#include "tdt/tdt_host_interface.h" #include "tensorflow/core/framework/op_kernel.h" #include "tf_adapter/common/adp_logger.h" #include "tf_adapter/common/common.h" +#include "tf_adapter/util/acl_channel.h" +#include "tf_adapter/util/npu_attrs.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include namespace tensorflow { namespace { -Status GetTensorShape(const string &tensor_shape, TensorShape &shape) { - // change "[32,224,224,3]" => "32,224,224,3" - // tensor_shape.size() - 2 is the second to last - string str = tensor_shape.substr(1, tensor_shape.size() - 2); - string::size_type index = 0; - if (!str.empty()) { - while ((index = str.find(' ', index)) != string::npos) { str.erase(index, 1); } - } - string split = ","; - string::size_type pos2 = str.find(split); - string::size_type pos1 = 0; - while (pos2 != string::npos) { - try { - shape.AddDim(std::stoi(str.substr(pos1, pos2 - pos1))); - } catch (...) { return errors::InvalidArgument("Invalid shape string: ", tensor_shape); } - // string::size_type can store the length of any string object - pos1 = pos2 + split.size(); - pos2 = str.find(split, pos1); - } - if (pos1 != str.length()) { - try { - shape.AddDim(std::stoi(str.substr(pos1))); - } catch (...) { return errors::InvalidArgument("Invalid shape string: ", tensor_shape); } - } - return Status::OK(); -} - -Status ConvertDataItem2Tensor(const std::vector &items, std::vector &tensors) { - for (auto &item : items) { - if (item.dataType_ == tdt::TDT_END_OF_SEQUENCE) { - ADP_LOG(INFO) << "End of processing."; - return Status::OK(); - } - DataType type = DT_FLOAT; - DataTypeFromString(item.tensorType_, &type); - if (type == DT_STRING) { - Tensor result_tensor(tensorflow::DT_STRING, TensorShape({})); - std::shared_ptr data_str_ptr = std::static_pointer_cast(item.dataPtr_); - result_tensor.scalar()() = - std::move(string(reinterpret_cast(data_str_ptr->c_str()), item.dataLen_)); - tensors.emplace_back(std::move(result_tensor)); - } else if (DataTypeCanUseMemcpy(type)) { - TensorShape tensorShape; - Status s = GetTensorShape(item.tensorShape_, tensorShape); - if (!s.ok()) { return s; } - Tensor result_tensor = Tensor(type, tensorShape); - std::shared_ptr data_str_ptr = std::static_pointer_cast(item.dataPtr_); - errno_t ret = memcpy_s(const_cast(result_tensor.tensor_data().data()), result_tensor.tensor_data().size(), - data_str_ptr->c_str(), item.dataLen_); - if (ret != EOK) { return errors::Unknown("memcpy failed"); } - tensors.emplace_back(std::move(result_tensor)); - } else { - return errors::InvalidArgument("Not support this type: ", type); - } - } - return Status::OK(); -} - class OutfeedEnqueueOp : public OpKernel { public: explicit OutfeedEnqueueOp(OpKernelConstruction *ctx) : OpKernel(ctx) { @@ -99,56 +41,96 @@ class OutfeedEnqueueOp : public OpKernel { class OutfeedDequeueOp : public OpKernel { public: explicit OutfeedDequeueOp(OpKernelConstruction *ctx) : OpKernel(ctx) { - // ctx is not nullptr OP_REQUIRES_OK(ctx, ctx->GetAttr("channel_name", &channel_name_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_)); - OP_REQUIRES(ctx, tdt::TdtHostPreparePopData() == 0, errors::Internal("Prepare Pop Data failed")); - ADP_LOG(INFO) << "OutfeedDequeueOp built"; + // Create log summary acl channel + ADP_LOG(INFO) << "Start create acl channel for out-feed dequeue op " << channel_name_; + uint32_t device_id = 0; + OP_REQUIRES_OK(ctx, GetEnvDeviceID(device_id)); + const static std::string kReceivePrefix = "TF_RECEIVE_"; + char *is_new = getenv("IS_NEW"); + if (is_new != nullptr && strcmp("1", is_new) == 0) { + const size_t kDefaultCapacity = 3; + acl_handle_ = acltdtCreateChannelWithCapacity(device_id, (kReceivePrefix + channel_name_).c_str(), + kDefaultCapacity); + } else { + acl_handle_ = acltdtCreateChannel(device_id, (kReceivePrefix + channel_name_).c_str()); + } + OP_REQUIRES(ctx, acl_handle_ != nullptr, errors::Internal("Acl create receive channel failed.")); + ADP_LOG(INFO) << "Succeed create acl channel for out-feed dequeue op " << channel_name_; } - ~OutfeedDequeueOp() override { ADP_LOG(INFO) << "OutfeedDequeueOp has been destructed"; } - void Compute(OpKernelContext *ctx) override { - CHECK_NOT_NULL(ctx); - std::vector bundle; - OP_REQUIRES(ctx, tdt::TdtHostPopData(channel_name_, bundle) == 0, - errors::Internal("TdtHostPopData get data failed")); - std::vector out_tensors; - OP_REQUIRES_OK(ctx, ConvertDataItem2Tensor(bundle, out_tensors)); - OP_REQUIRES(ctx, !out_tensors.empty(), errors::OutOfRange("Outfeed tensors reach the end")); - OP_REQUIRES( - ctx, out_tensors.size() == output_shapes_.size(), - errors::Internal("Outfeed tensors num mismatch", out_tensors.size(), "vs. expect", output_shapes_.size())); - for (int i = 0; i < ctx->num_outputs(); ++i) { ctx->set_output(i, out_tensors[i]); } + ~OutfeedDequeueOp() override { + ADP_LOG(INFO) << "Start destroy acl channel for out-feed dequeue op " << channel_name_; + if (acl_handle_ != nullptr) { + if (acltdtDestroyChannel(acl_handle_) != ACL_ERROR_NONE) { + ADP_LOG(ERROR) << "Failed destroy acl channel for out-feed dequeue op " << channel_name_; + } else { + ADP_LOG(INFO) << "Succeed destroy acl channel for out-feed dequeue op " << channel_name_; + } + } } - bool IsExpensive() override { return false; } + void Compute(OpKernelContext *ctx) override { + ADP_LOG(INFO) << "Start compute out-feed dequeue op " << channel_name_; + CancellationManager *cm = ctx->cancellation_manager(); + CancellationToken token = cm->get_cancellation_token(); + bool already_cancelled = !cm->RegisterCallback(token, [this]() { + ADP_LOG(INFO) << "Start run cancellation callback of out-feed dequeue op " << channel_name_; + char *is_new = getenv("IS_NEW"); + if (is_new != nullptr && strcmp("1", is_new) == 0) { + if (acltdtDestroyChannel(acl_handle_) != ACL_ERROR_NONE) { + ADP_LOG(ERROR) << "Failed destroy acl data channel for host queue " << channel_name_; + } else { + ADP_LOG(INFO) << "Succeed destroy acl data channel for host queue " << channel_name_; + acl_handle_ = nullptr; + } + } else { + if (acltdtStopChannel(acl_handle_) != ACL_ERROR_NONE) { + ADP_LOG(ERROR) << "Failed stop acl data channel for host queue " << channel_name_; + } else { + ADP_LOG(INFO) << "Succeed stop acl data channel for host queue " << channel_name_; + } + } + }); - private: - DataTypeVector output_types_; - std::vector output_shapes_; - std::string channel_name_; -}; + if (TF_PREDICT_FALSE(already_cancelled)) { + ctx->SetStatus(errors::Internal("out-feed op ", channel_name_, " called after cancelled.")); + return; + } -class StopOutfeedDequeueOp : public OpKernel { - public: - explicit StopOutfeedDequeueOp(OpKernelConstruction *ctx) : OpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("channel_name", &channel_name_)); - ADP_LOG(INFO) << "StopOutfeedDequeueOp built"; - } - ~StopOutfeedDequeueOp() override { ADP_LOG(INFO) << "StopOutfeedDequeueOp has been destructed"; } - void Compute(OpKernelContext *ctx) override { - ADP_LOG(INFO) << "StopOutfeedDequeueOp running"; - OP_REQUIRES(ctx, tdt::TdtHostStop(channel_name_) == 0, errors::Internal("TdtHostStop failed")); + std::vector tensors; + ADP_LOG(INFO) << "Start recv tensors by acl out-feed dequeue op " << channel_name_; + auto status = RecvTensorByAcl(acl_handle_, tensors); + ADP_LOG(INFO) << "Start de-register callback out-feed dequeue op " << channel_name_; + (void) cm->DeregisterCallback(token); + OP_REQUIRES_OK(ctx, status); + OP_REQUIRES(ctx, !tensors.empty(), errors::OutOfRange("out-feed op ", channel_name_, " received end-of-sequence")); + OP_REQUIRES(ctx, tensors.size() == output_shapes_.size(), + errors::Internal("out-feed op ", channel_name_, " received ", tensors.size(), " tensors but expect ", + output_shapes_.size(), " tensors")); + ADP_LOG(INFO) << "out-feed op output num:" << ctx->num_outputs(); + for (int i = 0; i < ctx->num_outputs(); ++i) { + ADP_LOG(INFO) << "output tensor " << i << ", " << tensors[i].DebugString(); + ctx->set_output(i, tensors[i]); + } } bool IsExpensive() override { return false; } private: + DataTypeVector output_types_; + std::vector output_shapes_; std::string channel_name_; + acltdtChannelHandle *acl_handle_ = nullptr; }; -REGISTER_KERNEL_BUILDER(Name("OutfeedDequeueOp").Device(DEVICE_CPU), OutfeedDequeueOp); - -REGISTER_KERNEL_BUILDER(Name("OutfeedEnqueueOp").Device(DEVICE_CPU), OutfeedEnqueueOp); +REGISTER_KERNEL_BUILDER(Name("OutfeedDequeueOp") +. +Device(DEVICE_CPU), OutfeedDequeueOp +); -REGISTER_KERNEL_BUILDER(Name("StopOutfeedDequeueOp").Device(DEVICE_CPU), StopOutfeedDequeueOp); +REGISTER_KERNEL_BUILDER(Name("OutfeedEnqueueOp") +. +Device(DEVICE_CPU), OutfeedEnqueueOp +); } // namespace } // namespace tensorflow diff --git a/tf_adapter/ops/npu_dataset_ops.cc b/tf_adapter/ops/npu_dataset_ops.cc index 50177a8b8..082445d4b 100644 --- a/tf_adapter/ops/npu_dataset_ops.cc +++ b/tf_adapter/ops/npu_dataset_ops.cc @@ -65,5 +65,22 @@ REGISTER_OP("AdpGetNext") .Attr("output_types: list(type) >= 1") .Attr("output_shapes: list(shape) >= 1") .Attr("queue_name: string") - .SetShapeFn(shape_inference::ScalarShape); + .SetIsStateful() + .SetShapeFn(tensorflow::shape_inference::ScalarShape); + +REGISTER_OP("GetNext") + .Output("components: output_types") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .Attr("channel_name: string") + .SetIsStateful() + .SetShapeFn(tensorflow::shape_inference::ScalarShape); + +REGISTER_OP("DynamicGetNext") + .Output("components: output_types") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .Attr("channel_name: string") + .SetIsStateful() + .SetShapeFn(tensorflow::shape_inference::ScalarShape); } // namespace tensorflow diff --git a/tf_adapter/ops/npu_ops.cc b/tf_adapter/ops/npu_ops.cc index 1da33c991..8a7100055 100644 --- a/tf_adapter/ops/npu_ops.cc +++ b/tf_adapter/ops/npu_ops.cc @@ -131,8 +131,6 @@ REGISTER_OP("OutfeedDequeueOp") .SetIsStateful() .SetShapeFn(OutfeedDequeueShapeFn); -REGISTER_OP("StopOutfeedDequeueOp").Attr("channel_name: string").SetIsStateful().SetShapeFn(shape_inference::NoOutputs); - REGISTER_OP("DropOutDoMask") .Input("x: T") .Input("mask: uint8") diff --git a/tf_adapter/optimizers/dp_tf_ge_conversion_pass.cc b/tf_adapter/optimizers/dp_tf_ge_conversion_pass.cc index aff5e2495..b160e02bb 100644 --- a/tf_adapter/optimizers/dp_tf_ge_conversion_pass.cc +++ b/tf_adapter/optimizers/dp_tf_ge_conversion_pass.cc @@ -146,6 +146,7 @@ class DpTfToGEConversionPassImpl { inline bool IsDeviceQueueDatasetNode() const; inline bool IsIteratorNode(const Node *n) const; inline bool IsSkipDataset(const Node *n) const; + inline bool IsGeSupportDataset(const Node *n) const; inline std::string GetEdgeName(const Edge *e) const; inline std::string GetRandomName(const std::string &prefix) const; std::string GetRandomName() const; @@ -161,6 +162,20 @@ class DpTfToGEConversionPassImpl { bool RemoveIsolatedNode(Graph *g, std::unordered_set visited); Status RemoveNotSupportDataset(Graph *g, const std::string &device_queue_dataset, const std::string &make_iterator) const; + Status AddDataTransDatasets(Node *topo_end, std::string &host_channel_name, std::string &device_channel_name, + std::map &all_options); + void GetTopoEndsNodes(std::vector &topo_ends); + Status BuildDeviceDpGraph(Node *topo_end, Graph *device_graph, const std::string device_channel_name); + Status AddAttr2DeviceNodes(Node *topo_end, Graph *device_graph); + Status AddGeopNodeFunctionDef(FunctionDefLibrary &fdeflib, const std::string &fn_geop, const std::string &fn_dpop, + const string &default_device); + Status AddGeopDatasetFunctionDef(FunctionDefLibrary &fdeflib, const std::string &fn_geop, + const std::string &fn_geop_dataset, const string &default_device, std::map all_options); + Status BuildGeOpDatasetFunction(FunctionDefLibrary &fdeflib, Graph *device_graph, + const std::string &fn_geop_dataset, const string &default_device, std::map all_options); + Status AddGeOpDatasetFunctionLibrary(FunctionLibraryDefinition *flib, Node *topo_end, const std::string &device_channel_name, + const std::string &fn_geop_dataset, std::map &all_options); + Status AddGeOpDatasetAndDpGroupDataset(Node *topo_end, const std::string &fn_geop_dataset, const std::string &host_channel_name, const std::string &device_channel_name); // graph num int graph_run_num_; @@ -197,6 +212,10 @@ inline bool DpTfToGEConversionPassImpl::IsSkipDataset(const Node *n) const { return std::find(SKIP_DATASET_LIST.begin(), SKIP_DATASET_LIST.end(), n->type_string()) != SKIP_DATASET_LIST.end(); } +inline bool DpTfToGEConversionPassImpl::IsGeSupportDataset(const Node *n) const { + return std::find(GE_OPS_WHITELIST.begin(), GE_OPS_WHITELIST.end(), n->type_string()) != GE_OPS_WHITELIST.end(); +} + inline std::string DpTfToGEConversionPassImpl::GetEdgeName(const Edge *e) const { if (e == nullptr || e->src() == nullptr || e->dst() == nullptr) { return "invalid_edge"; } return strings::StrCat("Edge_from_", e->src()->name(), "_out", e->src_output(), "_To_", e->dst()->name(), "_in", @@ -350,21 +369,37 @@ Status DpTfToGEConversionPassImpl::InsertChannelQueue(Node *topo_end, std::strin std::string &device_queue_name, std::map &all_options) const { ADP_LOG(INFO) << "Start to insert HostQueueDataset and DeviceQueueDataset."; + REQUIRES_NOT_NULL(topo_end); + char *is_new = std::getenv("IS_NEW"); + const Node *iterator_node = nullptr; + if (IsMakeIteratorNode(topo_end)) { + topo_end->input_node(1, &iterator_node); + } + + uint32_t device_id = 0; + GetEnvDeviceID(device_id); for (const Edge *e : split_edges_.at(topo_end)) { REQUIRES_NOT_NULL(e); REQUIRES_NOT_NULL(e->src()); REQUIRES_NOT_NULL(e->dst()); + bool need_add_device_dataset = false; + if (is_new != nullptr && strcmp("1", is_new) == 0 && IsGeSupportDataset(e->dst())) { need_add_device_dataset = true; } + std::string local_rank_id = all_options["local_rank_id"]; std::string local_device_list = all_options["local_device_list"]; - std::string queue_name; + std::string channel_name; if (local_rank_id == "-1") { - queue_name = strings::StrCat("Queue_", GetEdgeName(e), "_", GetRandomName()); + REQUIRES_NOT_NULL(iterator_node); + if (is_new != nullptr && strcmp("1", is_new) == 0 && !need_add_device_dataset) { + channel_name = iterator_node->name(); + } else { + channel_name = "Queue_" + GetEdgeName(e) + "_" + GetRandomName(); + } } else { - queue_name = strings::StrCat(e->src()->name(), "_index_", std::to_string(g_channel_index)); + channel_name = strings::StrCat(e->src()->name(), "_index_", std::to_string(g_channel_index)); g_channel_index += 1; } - host_queue_name = strings::StrCat("Host", queue_name); - device_queue_name = strings::StrCat("Device", queue_name); + host_queue_name = "HostQueue_" + channel_name; ADP_LOG(INFO) << "Add_" << host_queue_name; // Host and Device queue should save type and shape auto m_src = e->src()->def().attr(); @@ -372,27 +407,31 @@ Status DpTfToGEConversionPassImpl::InsertChannelQueue(Node *topo_end, std::strin string::size_type idx = SummarizeAttrValue(m_src["output_types"]).find("Unknown AttrValue"); if (idx == string::npos) { type_status = true; } Node *queue_node_host = nullptr; - // Make sure that 'queue_name' of host and device queue be same + // Make sure that 'channel_name' of host and device queue be same TF_CHECK_OK(NodeBuilder(host_queue_name, "HostQueueDataset") .Input(e->src(), e->src_output()) // Will be replaced by GEOPDataset later .Input(e->src(), e->src_output()) .Device(e->src()->def().device()) - .Attr("channel_name", queue_name) + .Attr("channel_name", channel_name) .Attr("output_types", type_status ? m_src["output_types"] : m_src["Toutput_types"]) .Attr("output_shapes", m_src["output_shapes"]) .Attr("_local_rank_id", local_rank_id) .Attr("_local_device_list", local_device_list) - .Finalize(&*graph_, &queue_node_host)); + .Finalize(graph_, &queue_node_host)); REQUIRES_NOT_NULL(queue_node_host); + + if (is_new != nullptr && strcmp("1", is_new) == 0 && !need_add_device_dataset) { return Status::OK(); } + + device_queue_name = "DeviceQueue_" + channel_name; ADP_LOG(INFO) << "Add_" << device_queue_name; Node *queue_node_device = nullptr; - // Make sure that 'queue_name' of host and device queue be same + // Make sure that 'channel_name' of host and device queue be same TF_CHECK_OK(NodeBuilder(device_queue_name, "DeviceQueueDataset") .Device(e->dst()->def().device()) - .Attr("channel_name", queue_name) + .Attr("channel_name", channel_name) .Attr("output_types", type_status ? m_src["output_types"] : m_src["Toutput_types"]) .Attr("output_shapes", m_src["output_shapes"]) - .Finalize(&*graph_, &queue_node_device)); + .Finalize(graph_, &queue_node_device)); REQUIRES_NOT_NULL(queue_node_device); // 0 means the the 0th output of queue_node_device REQUIRES_NOT_NULL(graph_->AddEdge(queue_node_device, 0, e->dst(), e->dst_input())); @@ -451,6 +490,7 @@ Status DpTfToGEConversionPassImpl::RemoveNotSupportDataset(Graph *g, const std:: } void DpTfToGEConversionPassImpl::RemoveSplitEdges(Node *topo_end) { + ADP_LOG(INFO) << "Start to remove split edges"; for (const Edge *e : split_edges_.at(topo_end)) { ADP_LOG(INFO) << "Remove_" << GetEdgeName(e); graph_->RemoveEdge(e); @@ -488,16 +528,7 @@ bool DpTfToGEConversionPassImpl::GetNodeFuncs(const FunctionLibraryDefinition *f return !node_funcs.empty(); } -bool DpTfToGEConversionPassImpl::RunPass(std::unique_ptr *g, FunctionLibraryDefinition *flib, - std::map all_options) { - ADP_LOG(INFO) << ">>>> DpTfToGEConversionPassImpl::RunPass <<<<"; - // Convert just for convenient access - split_edges_.clear(); - graph_ = &**g; - flib_def_ = &(*g)->flib_def(); - - // Find split edges from subgraphs, which MakeIterator connect to Itearator op - std::vector topo_ends; +void DpTfToGEConversionPassImpl::GetTopoEndsNodes(std::vector &topo_ends) { for (Node *node : graph_->op_nodes()) { if (IsMakeIteratorNode(node)) { for (Node *in_node : node->in_nodes()) { @@ -509,279 +540,351 @@ bool DpTfToGEConversionPassImpl::RunPass(std::unique_ptr *g, FunctionLibr } } } - // After traversal, topo_ends should store MakeIterator Nodes. - if (topo_ends.empty()) { - ADP_LOG(INFO) << "Do not find MakeIterator <- IteratorV2 connects in the graph," - << " pass datapreprocess pass."; - return true; - } - ADP_LOG(INFO) << "Start to write graph's pbtxt before optimization."; +} - const char *need_print = getenv("PRINT_MODEL"); - if (need_print != nullptr && strcmp("1", need_print) == 0) { - GraphDef before_graphdef; - (*g)->ToGraphDef(&before_graphdef); - string pre_model_path = GetDumpPath() + "BeforeSubGraph_dp_"; - string pmodel_path = pre_model_path + std::to_string(graph_run_num_) + ".pbtxt"; - TF_DO_CHECK_OK(WriteTextProto(Env::Default(), pmodel_path, before_graphdef), ERROR); - } +Status DpTfToGEConversionPassImpl::AddDataTransDatasets(Node *topo_end, std::string &host_channel_name, + std::string &device_channel_name, std::map &all_options) { + const Edge *tmp_edge = nullptr; + Status ret = GetSplitEdges(topo_end, split_edges_[topo_end], tmp_edge); + if (!ret.ok()) { return ret; } - ADP_LOG(INFO) << "Start to optimize dp_init topological graph"; - for (Node *topo_end : topo_ends) { - // Get all edges that should be replace with HostQueue->DeviceQueue - ADP_LOG(INFO) << "Start to find split edges, topo_end node is : " << topo_end->name() << ", op is " - << topo_end->type_string(); - const Edge *tmp_edge = nullptr; - TF_DO_CHECK_OK(GetSplitEdges(topo_end, split_edges_[topo_end], tmp_edge), ERROR); - - const string DEFAULT_DEVICE = topo_end->def().device(); - // Start optimize graph - // Insert Host and Device queue - ADP_LOG(INFO) << "Start to add host and device queue on split edges"; - std::string host_queue_name; - std::string device_queue_name; - TF_DO_CHECK_OK(InsertChannelQueue(topo_end, host_queue_name, device_queue_name, all_options), ERROR); - ADP_LOG(INFO) << "host queue name is " << host_queue_name; - ADP_LOG(INFO) << "device queue name is " << device_queue_name; - // Remove all split edges - ADP_LOG(INFO) << "Start to remove split edges"; - RemoveSplitEdges(topo_end); - - // Make a copy of graph for pruned GE - ADP_LOG(INFO) << "Start to prune GE graph"; - std::unique_ptr graph_ge(new (std::nothrow) Graph(OpRegistry::Global())); - if (graph_ge == nullptr) { - ADP_LOG(ERROR) << "new graph ge failed"; - LOG(ERROR) << "new graph ge failed"; - return false; - } - CopyGraph(*graph_, &*graph_ge); - // Prune visiable GE graph - std::unordered_set visiable_ge; - for (const Node *n : graph_ge->op_nodes()) { - if (IsMakeIteratorNode(n) && n->name() == topo_end->name()) { - visiable_ge.emplace(n); - break; - } - } - TF_DO_CHECK_OK(RemoveNotSupportDataset(&*graph_ge, device_queue_name, topo_end->name()), ERROR); + // Start optimize graph + // Insert Host and Device queue + ADP_LOG(INFO) << "Start to add host and device queue on split edges"; + ret = InsertChannelQueue(topo_end, host_channel_name, device_channel_name, all_options); + if (!ret.ok()) { return ret; } + ADP_LOG(INFO) << "host queue name is " << host_channel_name << ", device queue name is " << device_channel_name; - ADP_LOG(INFO) << "Start to to PruneForReverseReachability."; - PruneForReverseReachability(&*graph_ge, visiable_ge); - // add function_def begin - ADP_LOG(INFO) << "Start to add function_def for GEOP's func"; - FunctionDefLibrary fdeflib; - for (auto node : graph_ge->nodes()) { - std::vector node_funcs; - if (GetNodeFuncs(flib, node, node_funcs)) { - ADP_LOG(INFO) << "Node [" << node->name() << "] has func:"; - for (const auto &func : node_funcs) { - FunctionDef *fdef = fdeflib.add_function(); - if (flib->Find(func) == nullptr) { - ADP_LOG(ERROR) << "function def is nullptr"; - LOG(ERROR) << "function def is nullptr"; - return false; - } - *fdef = *(flib->Find(func)); - } - } - } + RemoveSplitEdges(topo_end); + return ret; +} - // Add required function for GEOPDataset->func(GEOP->func2) topo graph - std::string fn_dpop = GetRandomName("dpop_function"); - std::string fn_geop = GetRandomName("geop_function"); - std::string fn_geop_dataset = GetRandomName("geopdataset_function"); - std::string iterator_name = ""; - for (auto in_node : topo_end->in_nodes()) { - if (in_node == nullptr) { - ADP_LOG(ERROR) << "topo end node is nullptr"; - LOG(ERROR) << "topo end node is nullptr"; - return false; - } - ADP_LOG(INFO) << "in_node name is " << in_node->name(); - if (IsIteratorNode(in_node)) { - iterator_name = in_node->name(); - ADP_LOG(INFO) << "iterator name is " << iterator_name; - break; - } +Status DpTfToGEConversionPassImpl::BuildDeviceDpGraph(Node *topo_end, Graph *device_graph, + const std::string device_channel_name) { + // Make a copy of graph for pruned GE + ADP_LOG(INFO) << "Start to prune GE graph"; + CopyGraph(*graph_, device_graph); + // Prune visiable GE graph + std::unordered_set visiable_ge; + for (const Node *n : device_graph->op_nodes()) { + if (IsMakeIteratorNode(n) && n->name() == topo_end->name()) { + visiable_ge.emplace(n); + break; } - if (iterator_name.empty()) { - ADP_LOG(ERROR) << "There is no connection between MakeIteraotr and IteratorV2"; - LOG(ERROR) << "There is no connection between MakeIteraotr and IteratorV2"; - return false; + } + Status ret = RemoveNotSupportDataset(device_graph, device_channel_name, topo_end->name()); + if (!ret.ok()) { return ret; } + + ADP_LOG(INFO) << "Start to to PruneForReverseReachability."; + PruneForReverseReachability(device_graph, visiable_ge); + return ret; +} + +Status DpTfToGEConversionPassImpl::AddAttr2DeviceNodes(Node *topo_end, Graph *device_graph) { + std::string iterator_name; + for (auto in_node : topo_end->in_nodes()) { + REQUIRES_NOT_NULL(in_node); + ADP_LOG(INFO) << "in_node name is " << in_node->name(); + if (IsIteratorNode(in_node)) { + iterator_name = in_node->name(); + ADP_LOG(INFO) << "iterator name is " << iterator_name; + break; } - // Add dp custom kernel label - for (auto node : graph_ge->nodes()) { - if (node->type_string() == "DeviceQueueDataset") { node->AddAttr(DP_ITERATOR_MARK, iterator_name); } - if (std::find(CUSTOMIZE_DATASET_LIST.begin(), CUSTOMIZE_DATASET_LIST.end(), node->type_string()) - != CUSTOMIZE_DATASET_LIST.end()) { - ADP_LOG(INFO) << node->name() << " is " << node->type_string() << ", need to add label."; - node->AddAttr("_kernel", "dp"); - node->AddAttr(DP_ITERATOR_MARK, iterator_name); - } + } + if (iterator_name.empty()) { + ADP_LOG(ERROR) << "There is no connection between MakeIteraotr and IteratorV2"; + return errors::Internal("There is no connection between MakeIteraotr and IteratorV2"); + } + // Add dp custom kernel label + for (auto node : device_graph->nodes()) { + REQUIRES_NOT_NULL(node); + if (node->type_string() == "DeviceQueueDataset") { node->AddAttr(DP_ITERATOR_MARK, iterator_name); } + if (std::find(CUSTOMIZE_DATASET_LIST.begin(), CUSTOMIZE_DATASET_LIST.end(), node->type_string()) + != CUSTOMIZE_DATASET_LIST.end()) { + ADP_LOG(INFO) << node->name() << " is " << node->type_string() << ", need to add label."; + node->AddAttr("_kernel", "dp"); + node->AddAttr(DP_ITERATOR_MARK, iterator_name); } - // Convert GE graph to GEOP function body - { - ADP_LOG(INFO) << "Start to convert GE graph to geop function"; - FunctionDef *fd = fdeflib.add_function(); - TF_DO_CHECK_OK(GraphToFunctionDef(*graph_ge, fn_dpop, fd), ERROR); + } + return Status::OK(); +} + +Status DpTfToGEConversionPassImpl::AddGeopNodeFunctionDef(FunctionDefLibrary &fdeflib, + const std::string &fn_geop, + const std::string &fn_dpop, + const string &default_device) { + // Add DPOP node(visable only by function of geop) + string func_def_str; + fdeflib.SerializeToString(&func_def_str); + + // DPOP node should created by function of geop + ADP_LOG(INFO) << "Start to convert dpop node to geop function"; + FunctionDef *fd = fdeflib.add_function(); + REQUIRES_NOT_NULL(fd); + REQUIRES_NOT_NULL(fd->mutable_signature()); + fd->mutable_signature()->set_name(fn_geop); + NodeDef *n = fd->add_node_def(); + REQUIRES_NOT_NULL(n); + NameAttrList f_attr; + f_attr.set_name(fn_dpop); + *f_attr.mutable_attr() = n->attr(); + TF_CHECK_OK(NodeDefBuilder(fn_dpop, "DPOP") + .Input(EMPTY_DEF_INPUT) // No partition dp_init graph on GE + .Device(default_device) + .Attr("function", f_attr) // dpop funcion + .Attr("func_def", func_def_str) + .Attr("Tin", EMPTY_TYPE) + .Attr("Tout", EMPTY_TYPE) + .Attr("Tout", EMPTY_TYPE) + .Finalize(n)); // n is created by function of geop function + return Status::OK(); +} + +Status DpTfToGEConversionPassImpl::AddGeopDatasetFunctionDef(FunctionDefLibrary &fdeflib, + const std::string &fn_geop, + const std::string &fn_geop_dataset, + const string &default_device, + std::map all_options) { + // GEOP node should created by function of geopDataset + ADP_LOG(INFO) << "Start to convert geop node to geopdataset function"; + FunctionDef *fd = fdeflib.add_function(); + REQUIRES_NOT_NULL(fd); + REQUIRES_NOT_NULL(fd->mutable_signature()); + fd->mutable_signature()->set_name(fn_geop_dataset); + + NodeDef *n = fd->add_node_def(); + REQUIRES_NOT_NULL(n); + NameAttrList f_attr; + f_attr.set_name(fn_geop); + *f_attr.mutable_attr() = n->attr(); + TF_CHECK_OK(NodeDefBuilder(GetRandomName("GeOp"), "GeOp") + .Input(EMPTY_DEF_INPUT) // No partition dp_init graph on GE + .Device(default_device) + .Attr("function", f_attr) // geop funcion + .Attr("Tin", EMPTY_TYPE) + .Attr("Tout", EMPTY_TYPE) + .Attr("Tout", EMPTY_TYPE) + .Attr("_enableDP", true) + .Finalize(n)); // n is created by function of geopDataset function + std::string attr_name; + for (auto option : all_options) { + attr_name = std::string("_") + option.first; + AddNodeAttr(attr_name, option.second, n); + } + AddNodeAttr("_NpuOptimizer", "NpuOptimizer", n); + return Status::OK(); +} + +Status DpTfToGEConversionPassImpl::BuildGeOpDatasetFunction(FunctionDefLibrary &fdeflib, + Graph *device_graph, + const std::string &fn_geop_dataset, + const string &default_device, + std::map all_options) { + // Convert GE graph to GEOP function body + Status ret = Status::OK(); + std::string fn_dpop = GetRandomName("dpop_function"); + { + ADP_LOG(INFO) << "Start to convert GE graph to geop function"; + FunctionDef *fd = fdeflib.add_function(); + ret = GraphToFunctionDef(*device_graph, fn_dpop, fd); + if (!ret.ok()) { + ADP_LOG(ERROR) << "GraphToFunctionDef failed:" << ret.error_message(); + return ret; } - // Add DPOP node(visable only by function of geop) - { - string func_def_str; - fdeflib.SerializeToString(&func_def_str); + } + std::string fn_geop = GetRandomName("geop_function"); + ret = AddGeopNodeFunctionDef(fdeflib, fn_geop, fn_dpop, default_device); + if (!ret.ok()) { return ret; } + ret = AddGeopDatasetFunctionDef(fdeflib, fn_geop, fn_geop_dataset, default_device, all_options); + if (!ret.ok()) { return ret; } + return ret; +} - // DPOP node should created by function of geop - ADP_LOG(INFO) << "Start to convert dpop node to geop function"; - FunctionDef *fd = fdeflib.add_function(); - if (fd == nullptr || fd->mutable_signature() == nullptr) { - ADP_LOG(ERROR) << "fd is nullptr"; - LOG(ERROR) << "fd is nullptr"; - return false; - } - fd->mutable_signature()->set_name(fn_geop); - NodeDef *n = fd->add_node_def(); - if (n == nullptr) { - ADP_LOG(ERROR) << "fd node def is nullptr"; - LOG(ERROR) << "fd node def is nullptr"; - return false; - } - NameAttrList f_attr; - f_attr.set_name(fn_dpop); - *f_attr.mutable_attr() = n->attr(); - TF_CHECK_OK(NodeDefBuilder(fn_dpop, "DPOP") - .Input(EMPTY_DEF_INPUT) // No partition dp_init graph on GE - .Device(DEFAULT_DEVICE) - .Attr("function", f_attr) // dpop funcion - .Attr("func_def", func_def_str) - .Attr("Tin", EMPTY_TYPE) - .Attr("Tout", EMPTY_TYPE) - .Attr("Tout", EMPTY_TYPE) - .Finalize(n)); // n is created by function of geop function - } - { +Status DpTfToGEConversionPassImpl::AddGeOpDatasetFunctionLibrary(FunctionLibraryDefinition *flib, + Node *topo_end, + const std::string &device_channel_name, + const std::string &fn_geop_dataset, + std::map &all_options) { + FunctionDefLibrary fdeflib; + char *is_new = std::getenv("IS_NEW"); + if (is_new != nullptr && strcmp("1", is_new) == 0 && device_channel_name.empty()) { // GEOP node should created by function of geopDataset - ADP_LOG(INFO) << "Start to convert geop node to geopdataset function"; + ADP_LOG(INFO) << "No Dataset node can be computed in device, GeOpDataset func is null."; FunctionDef *fd = fdeflib.add_function(); - if (fd == nullptr || fd->mutable_signature() == nullptr) { - ADP_LOG(ERROR) << "fd is nullptr"; - LOG(ERROR) << "fd is nullptr"; - return false; - } + REQUIRES_NOT_NULL(fd); + REQUIRES_NOT_NULL(fd->mutable_signature()); fd->mutable_signature()->set_name(fn_geop_dataset); - NodeDef *n = fd->add_node_def(); - if (n == nullptr) { - ADP_LOG(ERROR) << "fd node def is nullptr"; - LOG(ERROR) << "fd node def is nullptr"; - return false; - } - NameAttrList f_attr; - f_attr.set_name(fn_geop); - *f_attr.mutable_attr() = n->attr(); - TF_CHECK_OK(NodeDefBuilder(GetRandomName("GeOp"), "GeOp") - .Input(EMPTY_DEF_INPUT) // No partition dp_init graph on GE - .Device(DEFAULT_DEVICE) - .Attr("function", f_attr) // geop funcion - .Attr("Tin", EMPTY_TYPE) - .Attr("Tout", EMPTY_TYPE) - .Attr("Tout", EMPTY_TYPE) - .Attr("_enableDP", true) - .Finalize(n)); // n is created by function of geopDataset function - std::string attr_name = ""; - for (auto option : all_options) { - attr_name = std::string("_") + option.first; - AddNodeAttr(attr_name, option.second, n); + } else { + // Make a copy of graph for pruned GE + ADP_LOG(INFO) << "Start to prune GE graph"; + std::unique_ptr device_graph(new (std::nothrow) Graph(OpRegistry::Global())); + REQUIRES_NOT_NULL(device_graph); + Status ret = BuildDeviceDpGraph(topo_end, device_graph.get(), device_channel_name); + if (!ret.ok()) { return ret; } + + // add function_def begin + ADP_LOG(INFO) << "Start to add function_def for GEOP's func"; + for (auto node : device_graph->nodes()) { + std::vector node_funcs; + if (GetNodeFuncs(flib, node, node_funcs)) { + REQUIRES_NOT_NULL(flib); + ADP_LOG(INFO) << "Node [" << node->name() << "] has func:"; + for (const auto &func : node_funcs) { + FunctionDef *fdef = fdeflib.add_function(); + REQUIRES_NOT_NULL(flib->Find(func)); + *fdef = *(flib->Find(func)); + } + } } - AddNodeAttr("_NpuOptimizer", "NpuOptimizer", n); + ret = AddAttr2DeviceNodes(topo_end, device_graph.get()); + if (!ret.ok()) { return ret; } + + const string kDefaultDevice = topo_end->def().device(); + ret = BuildGeOpDatasetFunction(fdeflib, device_graph.get(), fn_geop_dataset, kDefaultDevice, all_options); + if (!ret.ok()) { return ret; } } + // Update graph function libray ADP_LOG(INFO) << "Start to add geop and geopdataset function in graph library"; // Not a must, just for Tensorbord viewing convenience graph_->AddFunctionLibrary(fdeflib); flib->AddLibrary(fdeflib); - // Add GEOPDataset node to graph_ - std::vector topo_end_input_edges(topo_end->in_edges().begin(), topo_end->in_edges().end()); - Node *dpgroup_dataset_node = nullptr; - Node *geop_dataset_node = nullptr; - std::unordered_set isolated_nodes; - { - ADP_LOG(INFO) << "Start to add geopdataset node in graph"; - const Node *n = nullptr; - for (const Edge *e : topo_end_input_edges) { - if (IsIteratorNode(e->src())) { n = e->src(); } - } - if (n == nullptr) { - ADP_LOG(ERROR) << "edge src is nullptr"; - LOG(ERROR) << "edge src is nullptr"; - return false; - } - auto m_src = n->def().attr(); - - NameAttrList f_attr; - f_attr.set_name(fn_geop_dataset); - // Combine all host queue dataset with GEOPDataset - std::vector inputs; - for (Node *n : graph_->op_nodes()) { - // host tf makeiterator add dp label - if (IsMakeIteratorNode(n)) { n->AddAttr("_kernel", "dp"); } - if (n->type_string() == "HostQueueDataset" && n->name() == host_queue_name) { - // 0: Host queue always generate one dataset - ADP_LOG(INFO) << "inputs add node : name is " << n->name() << ", op is " << n->type_string(); - inputs.push_back(NodeBuilder::NodeOut(n, 0)); - } - if (n->type_string().compare("DeviceQueueDataset") == 0 && n->name() == device_queue_name) { - isolated_nodes.insert(n); - } - } + return Status::OK(); +} - TF_CHECK_OK(NodeBuilder(GetRandomName("DPGroupDataset"), "DPGroupDataset") - .Input(inputs) // All host queue flow into geopDataset for driver - .Device(DEFAULT_DEVICE) - .Attr("output_types", m_src["output_types"]) - .Attr("output_shapes", m_src["output_shapes"]) - .Finalize(&*graph_, - &dpgroup_dataset_node)); // Finalize geopDataset in graph_ - TF_CHECK_OK(NodeBuilder(GetRandomName("GeopDataset"), "GEOPDataset") - .Device(DEFAULT_DEVICE) - .Attr("f", f_attr) // geopDataset function - .Finalize(&*graph_, - &geop_dataset_node)); // Finalize geopDataset in graph_ - - for (Node *n : graph_->op_nodes()) { - if (n->type_string() == "HostQueueDataset" && n->name() == host_queue_name) { - graph_->RemoveEdge(*(n->in_edges().begin())); - graph_->AddEdge(geop_dataset_node, 0, n, 0); - } - } +Status DpTfToGEConversionPassImpl::AddGeOpDatasetAndDpGroupDataset(Node *topo_end, + const std::string &fn_geop_dataset, + const std::string &host_channel_name, + const std::string &device_channel_name) { + // Add GEOPDataset node to graph_ + std::vector topo_end_input_edges(topo_end->in_edges().begin(), topo_end->in_edges().end()); + + ADP_LOG(INFO) << "Start to add geopdataset node in graph"; + const Node *iterator_node = nullptr; + for (const Edge *e : topo_end_input_edges) { + REQUIRES_NOT_NULL(e); + if (IsIteratorNode(e->src())) { iterator_node = e->src(); } + } + + // Combine all host queue dataset with GEOPDataset + std::vector inputs; + std::unordered_set isolated_nodes; + for (Node *n : graph_->op_nodes()) { + REQUIRES_NOT_NULL(n); + // host tf makeiterator add dp label + if (IsMakeIteratorNode(n)) { n->AddAttr("_kernel", "dp"); } + if (n->type_string() == "HostQueueDataset" && n->name() == host_channel_name) { + // 0: Host queue always generate one dataset + ADP_LOG(INFO) << "inputs add node : name is " << n->name() << ", op is " << n->type_string(); + inputs.push_back(NodeBuilder::NodeOut(n, 0)); + } + if (n->type_string() == "DeviceQueueDataset" && n->name() == device_channel_name) { + isolated_nodes.insert(n); } - // Remove all edges flow to MakeIterator except the one from IteratorV2 - ADP_LOG(INFO) << "Start to combine geopdataset with iterator node and remove " - "orignal edges"; + } + + Node *dpgroup_dataset_node = nullptr; + REQUIRES_NOT_NULL(iterator_node); + auto m_src = iterator_node->def().attr(); + TF_CHECK_OK(NodeBuilder(GetRandomName("DPGroupDataset"), "DPGroupDataset") + .Input(inputs) // All host queue flow into geopDataset for driver + .Device(iterator_node->def().device()) + .Attr("output_types", m_src["output_types"]) + .Attr("output_shapes", m_src["output_shapes"]) + .Finalize(graph_, + &dpgroup_dataset_node)); // Finalize geopDataset in graph_ + + NameAttrList f_attr; + f_attr.set_name(fn_geop_dataset); + Node *geop_dataset_node = nullptr; + TF_CHECK_OK(NodeBuilder(GetRandomName("GeopDataset"), "GEOPDataset") + .Device(iterator_node->def().device()) + .Attr("f", f_attr) // geopDataset function + .Finalize(graph_, + &geop_dataset_node)); // Finalize geopDataset in graph_ - // We must copy all topoend input edges as we can't modify it when combine - // geopdataset an topoend + for (Node *n : graph_->op_nodes()) { + if (n->type_string() == "HostQueueDataset" && n->name() == host_channel_name) { + graph_->RemoveEdge(*(n->in_edges().begin())); + graph_->AddEdge(geop_dataset_node, 0, n, 0); + } + } + // Remove all edges flow to MakeIterator except the one from IteratorV2 + ADP_LOG(INFO) << "Start to combine geopdataset with iterator node and remove " + "orignal edges"; + + // We must copy all topoend input edges as we can't modify it when combine + // geopdataset an topoend + char *is_new = std::getenv("IS_NEW"); + if (is_new != nullptr && strcmp("1", is_new) == 0 && device_channel_name.empty()) { + CHECK_NOTNULL(graph_->AddEdge(dpgroup_dataset_node, 0, topo_end, 0)); + } else { for (const Edge *e : topo_end_input_edges) { + ADP_LOG(INFO) << "node:" << topo_end->name() << ", input node is:" << e->src()->name(); if (!IsIteratorNode(e->src())) { CHECK_NOTNULL(graph_->AddEdge(dpgroup_dataset_node, 0, e->dst(), e->dst_input())); ADP_LOG(INFO) << "Remove_" << GetEdgeName(e); graph_->RemoveEdge(e); } } - // Prune for the final optimized graph - ADP_LOG(INFO) << "Start to prune final optimized graph"; - - RemoveIsolatedNode(&*graph_, isolated_nodes); - ADP_LOG(INFO) << "Start to assign unassigned node on default device"; - // We do pass after assign, so we must assign all new added nodes - for (Node *n : (*g)->op_nodes()) { - if (n->assigned_device_name().empty()) { - // Use device of MakeIterator node as default - n->set_assigned_device_name(DEFAULT_DEVICE); - ADP_LOG(INFO) << "Assigned node [" << n->name() << "] on device [" << n->assigned_device_name() << "]"; - } + } + + // Prune for the final optimized graph + ADP_LOG(INFO) << "Start to prune final optimized graph"; + + RemoveIsolatedNode(graph_, isolated_nodes); + ADP_LOG(INFO) << "Start to assign unassigned node on default device"; + // We do pass after assign, so we must assign all new added nodes + for (Node *n : graph_->op_nodes()) { + if (n->assigned_device_name().empty()) { + // Use device of MakeIterator node as default + n->set_assigned_device_name(iterator_node->def().device()); + ADP_LOG(INFO) << "Assigned node [" << n->name() << "] on device [" << n->assigned_device_name() << "]"; } } + return Status::OK(); +} + +bool DpTfToGEConversionPassImpl::RunPass(std::unique_ptr *g, FunctionLibraryDefinition *flib, + std::map all_options) { + ADP_LOG(INFO) << ">>>> DpTfToGEConversionPassImpl::RunPass <<<<"; + // Convert just for convenient access + split_edges_.clear(); + graph_ = &**g; + flib_def_ = &(*g)->flib_def(); + + // Find split edges from subgraphs, which MakeIterator connect to Itearator op + std::vector topo_ends; + GetTopoEndsNodes(topo_ends); + // After traversal, topo_ends should store MakeIterator Nodes. + if (topo_ends.empty()) { + ADP_LOG(INFO) << "Do not find MakeIterator <- IteratorV2 connects in the graph," + << " pass datapreprocess pass."; + return true; + } + + const char *need_print = getenv("PRINT_MODEL"); + if (nullptr != need_print && strcmp("1", need_print) == 0) { + GraphDef before_graphdef; + (*g)->ToGraphDef(&before_graphdef); + string pre_model_path = GetDumpPath() + "BeforeSubGraph_dp_"; + string pmodel_path = pre_model_path + std::to_string(graph_run_num_) + ".pbtxt"; + TF_DO_CHECK_OK(WriteTextProto(Env::Default(), pmodel_path, before_graphdef), ERROR); + } + + ADP_LOG(INFO) << "Start to optimize dp_init topological graph"; + for (Node *topo_end : topo_ends) { + // Get all edges that should be replace with HostQueue->DeviceQueue + ADP_LOG(INFO) << "Start to find split edges, topo_end node is : " << topo_end->name() << ", op is " + << topo_end->type_string(); + std::string host_channel_name; + std::string device_channel_name; + TF_DO_CHECK_OK(AddDataTransDatasets(topo_end, host_channel_name, device_channel_name, all_options), ERROR); + std::string fn_geop_dataset = GetRandomName("geopdataset_function"); + TF_DO_CHECK_OK(AddGeOpDatasetFunctionLibrary(flib, topo_end, device_channel_name, fn_geop_dataset, all_options), ERROR); + TF_DO_CHECK_OK(AddGeOpDatasetAndDpGroupDataset(topo_end, fn_geop_dataset, host_channel_name, device_channel_name), ERROR); + } ADP_LOG(INFO) << "End optimize dp_init topological graph"; if (need_print != nullptr && strcmp("1", need_print) == 0) { @@ -867,18 +970,18 @@ Status DpTfToGEConversionPassImpl::ProcessGraph(std::unique_ptr *graph, F if (graph == nullptr) { return Status::OK(); } - std::string queue_name; + std::string channel_name; for (Node *n : graph->get()->nodes()) { REQUIRES_NOT_NULL(n); if (n->type_string() == "Iterator" || n->type_string() == "IteratorV2") { - queue_name = n->name(); + channel_name = n->name(); } if (n->attrs().Find("_NoNeedOptimize")) { ADP_LOG(INFO) << "Found mark of noneed optimize on node [" << n->name() << "], skip DpTfToGEConversionPass."; return Status::OK(); } } - NpuAttrs::SetUseAdpStatus(queue_name, false); + NpuAttrs::SetUseAdpStatus(channel_name, false); std::map all_options; std::map pass_options; @@ -889,10 +992,10 @@ Status DpTfToGEConversionPassImpl::ProcessGraph(std::unique_ptr *graph, F if (n->type_string() == "DvppDataset") { uint32_t device_id = 0; (void)GetEnvDeviceID(device_id); - n->AddAttr("queue_name", "device" + std::to_string(device_id) + "_" + queue_name); - NpuAttrs::SetUseAdpStatus(queue_name, true); - ADP_LOG(INFO) << "The graph include DvppDataset, set queue_name:" - << queue_name << ", skip DpTfToGEConversionPass."; + n->AddAttr("channel_name", "device" + std::to_string(device_id) + "_" + channel_name); + NpuAttrs::SetUseAdpStatus(channel_name, true); + ADP_LOG(INFO) << "The graph include DvppDataset, set channel_name:" + << channel_name << ", skip DpTfToGEConversionPass."; return Status::OK(); } if (n->attrs().Find("_NpuOptimizer")) { diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc index ed3c57bd7..19cf6eb49 100644 --- a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc +++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc @@ -520,7 +520,6 @@ Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates, bool hasIteratorOp = false; bool hasMakeIteratorOp = false; bool hasOutfeedDequeueOp = false; - bool hasStopOutfeedDequeueOp = false; for (Node *node : graph.op_nodes()) { sortedNodes.push_back(node); if (node->type_string().find("MakeIterator") != string::npos) { @@ -530,12 +529,10 @@ Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates, hasIteratorOp = true; } else if (node->type_string() == "OutfeedDequeueOp") { hasOutfeedDequeueOp = true; - } else if (node->type_string() == "StopOutfeedDequeueOp") { - hasStopOutfeedDequeueOp = true; } } - if (hasStopOutfeedDequeueOp || hasOutfeedDequeueOp) { + if (hasOutfeedDequeueOp) { candidates->clear(); ADP_LOG(INFO) << "hostcall subgraph will run on host."; return Status::OK(); @@ -925,7 +922,6 @@ Status MarkForPartition(std::unique_ptr *graphIn, int &clusterNum, bool m || !NodeIsCandidateForClustering(dst, &npuSupportCandidates)) { continue; } - if (is_set_lazy_recompile && src->type_string() == "IteratorGetNext" && enable_dp) { graph_options["is_dynamic_getnext"] = "1"; continue; diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_hook.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_hook.py index 5880cb37a..59a04b162 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_hook.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_hook.py @@ -172,7 +172,6 @@ class _SIGNAL(object): class _OpQueueContext(object): """Manages work queue and thread for a infeed/outfeed thread.""" - def __init__(self, name, target, args): self._name = name self._queue = Queue.Queue() diff --git a/tf_adapter/tests/CMakeLists.txt b/tf_adapter/tests/CMakeLists.txt index 4115712e7..b8bb2ff2d 100644 --- a/tf_adapter/tests/CMakeLists.txt +++ b/tf_adapter/tests/CMakeLists.txt @@ -33,6 +33,7 @@ add_subdirectory(depends/alog) add_subdirectory(depends/datatransfer) add_subdirectory(depends/ge_runner) add_subdirectory(depends/aoe) +add_subdirectory(depends/ascendcl) if (ENABLE_TFADAPTER_UT) add_subdirectory(ut) diff --git a/tf_adapter/tests/depends/ascendcl/CMakeLists.txt b/tf_adapter/tests/depends/ascendcl/CMakeLists.txt new file mode 100644 index 000000000..5143d4bae --- /dev/null +++ b/tf_adapter/tests/depends/ascendcl/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +project(ascendcl_stub) + +file(GLOB_RECURSE SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "src/*.cc" +) + +include_directories(${TFADAPTER_DIR}/inc/ + ${CMAKE_CURRENT_SOURCE_DIR}/src +) + +add_library(ascendcl_stub SHARED ${SRC_FILES}) +target_link_libraries(ascendcl_stub PRIVATE + $ +) \ No newline at end of file diff --git a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc new file mode 100644 index 000000000..4b99a32d8 --- /dev/null +++ b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.cc @@ -0,0 +1,268 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "acl/acl_tdt.h" +#include "ascendcl_stub.h" +#include "acl/acl_rt.h" +#include +#include + +namespace { + std::mutex aclChannleMutex; + std::map aclChannleMap; + std::map aclDataTypeStrMap = + { + {"bool", ACL_BOOL}, + {"int8", ACL_INT8}, + {"uint8", ACL_UINT8}, + {"half", ACL_FLOAT16}, + {"int16", ACL_INT16}, + {"uint16", ACL_UINT16}, + {"float", ACL_FLOAT}, + {"int32", ACL_INT32}, + {"uint32", ACL_UINT32}, + {"int64", ACL_INT64}, + {"uint64", ACL_UINT64}, + {"double", ACL_DOUBLE}, + {"string", ACL_STRING} + }; +} + +namespace acl { + void GetTensorDimsString(const int64_t *dims, size_t dimNum, std::string &dimsStr) + { + for (size_t i = 0; i < dimNum; ++i) { + dimsStr += std::to_string(dims[i]); + if (i + 1 == dimNum) { + break; + } + dimsStr.push_back(','); + } + dimsStr += "]"; + } +} + +aclError acltdtDestroyChannel(acltdtChannelHandle *handle) { + if (handle == nullptr) { + return ACL_ERROR_INVALID_PARAM; + } + return ACL_SUCCESS; +} + +acltdtChannelHandle *acltdtCreateChannel(uint32_t deviceId, const char *name) { + acltdtChannelHandle *handle = new(std::nothrow) acltdtChannelHandle(deviceId, name); + { + std::unique_lock lk(aclChannleMutex); + aclChannleMap[name] = handle; + } + return handle; +} + +aclError aclrtSetDevice(int32_t deviceId){ + return ACL_SUCCESS; +} + +acltdtChannelHandle *acltdtCreateChannelWithCapacity(uint32_t deviceId, + const char *name, + size_t capacity) { + acltdtChannelHandle *handle = new(std::nothrow) acltdtChannelHandle(deviceId, name); + { + std::unique_lock lk(aclChannleMutex); + aclChannleMap[name] = handle; + } + return handle; +} + +acltdtDataItem *acltdtGetDataItem(const acltdtDataset *dataset, size_t index) { + if ((dataset == nullptr) || (index >= dataset->blobs.size())) { + return nullptr; + } + + return dataset->blobs[index]; +} + +aclError acltdtDestroyDataItem(acltdtDataItem *dataItem) { + if (dataItem == nullptr) { + return ACL_ERROR_INVALID_PARAM; + } + delete dataItem; + return ACL_SUCCESS; +} + +size_t acltdtGetDatasetSize(const acltdtDataset *dataset) { + if (dataset == nullptr) { + return 0; + } + return dataset->blobs.size(); +} + +aclError acltdtDestroyDataset(acltdtDataset *dataset) { + if (dataset == nullptr) { + return ACL_ERROR_INVALID_PARAM; + } + delete dataset; + return ACL_SUCCESS; +} + +acltdtDataset *acltdtCreateDataset() { + return new(std::nothrow) acltdtDataset(); +} + +aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, + acltdtDataset *dataset, + int32_t timeout) { + if (handle->recvName.empty()) { + return ACL_ERROR_INVALID_PARAM; + } + if (handle->recvName == "train") { + acltdtDataItem *acl_data = acltdtCreateDataItem(ACL_TENSOR_DATA_END_OF_SEQUENCE, nullptr, 0, ACL_BOOL /* whatever */, nullptr, 0); + if (acltdtAddDataItem(dataset, acl_data) != ACL_ERROR_NONE) { + if (acltdtDestroyDataItem(acl_data) != ACL_ERROR_NONE) { + return ACL_ERROR_FAILURE; + } + } + } else { + std::string vaue_str = "print message!!"; + std::string *value = &vaue_str; + // for scalar type, *dims is nullptr and dim_num is 0 + acltdtDataItem *acl_data = acltdtCreateDataItem(ACL_TENSOR_DATA_TENSOR, nullptr, 0, ACL_STRING, + const_cast(value->c_str()), value->size()); + if (acltdtAddDataItem(dataset, acl_data) != ACL_ERROR_NONE) { + if (acltdtDestroyDataItem(acl_data) != ACL_ERROR_NONE) { + return ACL_ERROR_FAILURE; + } + } + int32_t value_int = 1; + acltdtDataItem *acl_int_data = acltdtCreateDataItem( + ACL_TENSOR_DATA_TENSOR, nullptr, + 0, ACL_INT32, &value_int, 4); + if (acltdtAddDataItem(dataset, acl_int_data) != ACL_ERROR_NONE) { + if (acltdtDestroyDataItem(acl_int_data) != ACL_ERROR_NONE) { + return ACL_ERROR_FAILURE; + } + } + } + return ACL_SUCCESS; +} + +acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, + const int64_t *dims, + size_t dimNum, + aclDataType dataType, + void *data, + size_t size) { + if ((dims == nullptr && dimNum != 0) || (dims != nullptr && dimNum == 0)) { + return nullptr; + } + std::string dimsStr = "["; + acl::GetTensorDimsString(dims, dimNum, dimsStr); + std::string typeStr; + for (const auto &item: aclDataTypeStrMap) { + if (item.second == dataType) { + typeStr = item.first; + break; + } + } + if (typeStr.empty()) { + return nullptr; + } + std::shared_ptr dataPtr; + dataPtr.reset(data, [](const void *p) {}); + return new(std::nothrow) acltdtDataItem(tdtType, dims, dimNum, dimsStr, dataType, typeStr, dataPtr, size); +} + +aclError acltdtAddDataItem(acltdtDataset *dataset, acltdtDataItem *dataItem) { + if (dataset == nullptr || dataItem == nullptr) { + return ACL_ERROR_INVALID_PARAM; + } + if (dataset->freeSelf) { + return ACL_ERROR_FEATURE_UNSUPPORTED; + } + dataset->blobs.push_back(dataItem); + return ACL_SUCCESS; +} + +aclError acltdtSendTensor(const acltdtChannelHandle *handle, + const acltdtDataset *dataset, + int32_t timeout) { + if (dataset == nullptr || handle == nullptr) { + return ACL_ERROR_INVALID_PARAM; + } + return ACL_SUCCESS; +} + +acltdtTensorType acltdtGetTensorTypeFromItem(const acltdtDataItem *dataItem) { + if (dataItem == nullptr) { + return ACL_TENSOR_DATA_UNDEFINED; + } + return dataItem->tdtType; +} + +aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem *dataItem) { + if (dataItem == nullptr) { + return ACL_DT_UNDEFINED; + } + return dataItem->dataType; +} + +size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem) { + if (dataItem == nullptr) { + return 0; + } + return dataItem->dims.size(); +} + +size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem) { + if (dataItem == nullptr) { + return 0; + } + return dataItem->dataLen; +} + +void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem) { + if (dataItem == nullptr) { + return nullptr; + } + return dataItem->dataPtr.get(); +} + +aclError acltdtGetDimsFromItem(const acltdtDataItem *dataItem, int64_t *dims, size_t dimNum) { + if (dataItem == nullptr) { + return ACL_TENSOR_DATA_UNDEFINED; + } + // check dims and dimNum + if ((dims == nullptr && dimNum != 0) || (dims != nullptr && dimNum == 0)) { + return ACL_ERROR_INVALID_PARAM; + } + + if (dimNum < dataItem->dims.size()) { + return ACL_ERROR_INVALID_PARAM; + } + + for (size_t i = 0; i < dataItem->dims.size(); ++i) { + dims[i] = dataItem->dims[i]; + } + + return ACL_SUCCESS; +} + +aclError acltdtStopChannel(acltdtChannelHandle *handle) +{ + if (handle == nullptr) { + return ACL_TENSOR_DATA_UNDEFINED; + } + return ACL_SUCCESS; +} \ No newline at end of file diff --git a/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h new file mode 100644 index 000000000..c418e4d15 --- /dev/null +++ b/tf_adapter/tests/depends/ascendcl/src/ascendcl_stub.h @@ -0,0 +1,82 @@ +/** +* @file tensor_data_transfer.h +* +* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#ifndef DEPENDS_ASCENDCL_STUB_H +#define DEPENDS_ASCENDCL_STUB_H +#include +#include +#include +#include + +#include "acl/acl_base.h" +#include "acl/acl_tdt.h" + +struct acltdtDataItem { + acltdtDataItem(acltdtTensorType tdtType, + const int64_t *dims, size_t dimNum, const std::string &dimsStr, + aclDataType type, const std::string &typeStr, + std::shared_ptr tensorData, size_t size) + { + this->tdtType = tdtType; + for (size_t i = 0; i < dimNum; ++i) { + this->dims.push_back(dims[i]); + } + this->dimsStr = dimsStr; + this->dataType = type; + this->dataTypeStr = typeStr; + this->dataLen = size; + this->dataPtr = tensorData; + } + acltdtDataItem() = default; + ~acltdtDataItem() = default; + acltdtTensorType tdtType; + std::vector dims; + std::string dimsStr; + aclDataType dataType; + std::string dataTypeStr; + size_t dataLen; + std::shared_ptr dataPtr; +}; + +struct acltdtDataset { + acltdtDataset() : freeSelf(false) {}; + ~acltdtDataset() + { + if (freeSelf) { + for (auto iter = blobs.begin(); iter != blobs.end(); ++iter) { + (void)acltdtDestroyDataItem(*iter); + } + } + } + std::vector blobs; + bool freeSelf; +}; + +struct acltdtChannelHandle { + acltdtChannelHandle(uint32_t deviceId, const char *channelName) + { + devId = deviceId; + if (channelName != nullptr) { + name = channelName; + size_t prefixLen = sizeof("TF_RECEIVE_") - 1; + if (0 == strncmp(channelName, "TF_RECEIVE_", prefixLen)) { + recvName = channelName + prefixLen; + } + } + } + acltdtChannelHandle() = default; + ~acltdtChannelHandle() = default; + std::string name; + std::string recvName; + uint32_t devId; +}; + +#endif //ACL_TENSOR_DATA_TRANSFER_H + diff --git a/tf_adapter/tests/st/CMakeLists.txt b/tf_adapter/tests/st/CMakeLists.txt index 302442238..b7462d700 100644 --- a/tf_adapter/tests/st/CMakeLists.txt +++ b/tf_adapter/tests/st/CMakeLists.txt @@ -25,12 +25,17 @@ file(GLOB_RECURSE ST_SOURCES ${TFADAPTER_DIR}/tf_adapter/kernels/*.cc "optimizers/testcase/om_partition_subgraphs_pass_test.cc" "optimizers/testcase/get_attr_optimize_pass_test.cc" + "optimizers/testcase/dp_tf_ge_conversion_pass_test.cc" "kernels/testcase/ocr_ops_test.cc" "kernels/testcase/non_zero_with_value_ops_test.cc" "kernels/testcase/geop_npu_test.cc" - "util/testcase/*.cc" + "kernels/testcase/infeed_outfeed_test.cc" "kernels/testcase/layer_norm_ops_test.cc" "kernels/testcase/layer_norm_grad_ops_test.cc" + "kernels/testcase/dataset/host_queue_dats_set_st.cc" + "kernels/testcase/dataset/function_testlib.cc" + "kernels/testcase/dataset/dataset_test_base.cc" + "util/testcase/*.cc" ) add_executable(tfadapter_stest @@ -65,7 +70,6 @@ add_dependencies(tfadapter_stest aoe_tuning) target_link_libraries(tfadapter_stest PUBLIC $ - gtest gtest_main c_sec mmpa_stub indextransform_stub alog_stub datatransfer_stub ge_runner_stub + gtest gtest_main c_sec mmpa_stub indextransform_stub alog_stub datatransfer_stub ge_runner_stub ascendcl_stub ${PYTHON_LIB_PATH} -lrt -ldl -lgcov ) - diff --git a/tf_adapter/tests/st/kernels/pbtxt/geop_dpop.pbtxt b/tf_adapter/tests/st/kernels/pbtxt/geop_dpop.pbtxt new file mode 100644 index 000000000..2b67e2a6d --- /dev/null +++ b/tf_adapter/tests/st/kernels/pbtxt/geop_dpop.pbtxt @@ -0,0 +1,697 @@ +node { + name: "GeOp1_0_dp" + op: "GeOp" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + } + } + } + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_auto_tune_mode" + value { + s: "" + } + } + attr { + key: "_buffer_optimize" + value { + s: "l2_optimize" + } + } + attr { + key: "_compress_weight_conf" + value { + s: "" + } + } + attr { + key: "_debug_dir" + value { + s: "" + } + } + attr { + key: "_distribute_config" + value { + s: "" + } + } + attr { + key: "_do_npu_optimizer" + value { + s: "1" + } + } + attr { + key: "_dump_debug_mode" + value { + s: "all" + } + } + attr { + key: "_dump_mode" + value { + s: "output" + } + } + attr { + key: "_dump_path" + value { + s: "./" + } + } + attr { + key: "_dump_step" + value { + s: "1" + } + } + attr { + key: "_dynamic_dims" + value { + s: "" + } + } + attr { + key: "_dynamic_graph_execute_mode" + value { + s: "dynamic_execute" + } + } + attr { + key: "_dynamic_input" + value { + s: "0" + } + } + attr { + key: "_dynamic_node_type" + value { + s: "" + } + } + attr { + key: "_enable_compress_weight" + value { + s: "0" + } + } + attr { + key: "_enable_data_pre_proc" + value { + s: "0" + } + } + attr { + key: "_enable_dump" + value { + s: "1" + } + } + attr { + key: "_enable_dump_debug" + value { + s: "1" + } + } + attr { + key: "_enable_exception_dump" + value { + s: "" + } + } + attr { + key: "_enable_scope_fusion_passes" + value { + s: "" + } + } + attr { + key: "_enable_small_channel" + value { + s: "0" + } + } + attr { + key: "_fusion_switch_file" + value { + s: "" + } + } + attr { + key: "_graph_run_mode" + value { + s: "1" + } + } + attr { + key: "_hcom_multi_mode" + value { + s: "" + } + } + attr { + key: "_hcom_parallel" + value { + s: "0" + } + } + attr { + key: "_in_out_pair" + value { + s: "" + } + } + attr { + key: "_in_out_pair_flag" + value { + s: "1" + } + } + attr { + key: "_input_shape" + value { + s: "" + } + } + attr { + key: "_is_tailing_optimization" + value { + s: "0" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_local_device_list" + value { + s: "" + } + } + attr { + key: "_local_rank_id" + value { + s: "-1" + } + } + attr { + key: "_lower_functional_ops" + value { + s: "0" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_mstune_mode" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_dir" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_mode" + value { + s: "" + } + } + attr { + key: "_op_debug_level" + value { + s: "0" + } + } + attr { + key: "_op_select_implmode" + value { + s: "" + } + } + attr { + key: "_op_tune_mode" + value { + s: "" + } + } + attr { + key: "_optypelist_for_implmode" + value { + s: "" + } + } + attr { + key: "_precision_mode" + value { + s: "" + } + } + attr { + key: "_profiling_mode" + value { + s: "0" + } + } + attr { + key: "_profiling_options" + value { + s: "" + } + } + attr { + key: "_session_device_id" + value { + s: "" + } + } + attr { + key: "_stream_max_parallel_num" + value { + s: "" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "_variable_format_optimize" + value { + s: "1" + } + } + attr { + key: "_work_path" + value { + s: "" + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } + attr { + key: "function" + value { + func { + name: "geop_function_D9x45pM0kZ0" + } + } + } +} +library { + function { + signature { + name: "geop_function_D9x45pM0kZ0" + } + node_def { + name: "dpop_function_FFvj93e0XnN" + op: "DPOP" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } + attr { + key: "func_def" + value { + s: "\n\271\001\n;\n\037__inference_Dataset_map_func_11\022\n\n\006args_0\030\002\032\014\n\010identity\030\002\0326\n\005add/y\022\005Const*\031\n\005value\022\020B\016\010\002\022\0002\010\000\000\000\000\000\000\360?*\013\n\005dtype\022\0020\002\032-\n\003add\022\005AddV2\032\016add/y:output:0\032\006args_0*\007\n\001T\022\0020\002\"\023\n\010identity\022\007add:z:0\n\356\016\n\033\n\031dpop_function_FFvj93e0XnN\032p\n\nbatch_size\022\005Const\",/job:localhost/replica:0/task:0/device:CPU:0*\022\n\005value\022\tB\007\010\t\022\000R\001\002*\013\n\005dtype\022\0020\t2\014\n\nbatch_size\032x\n\016drop_remainder\022\005Const\",/job:localhost/replica:0/task:0/device:CPU:0*\022\n\005value\022\tB\007\010\n\022\000Z\001\000*\013\n\005dtype\022\0020\n2\020\n\016drop_remainder\032\365\002\n\nIteratorV2\022\nIteratorV2\",/job:localhost/replica:0/task:0/device:CPU:0*\033\n\024_iterations_per_loop\022\003\022\0011*\024\n\r_use_off_line\022\003\022\0011*\030\n\021_mix_compile_mode\022\003\022\0010*\021\n\013shared_name\022\002\022\000*\"\n\routput_shapes\022\021\n\017:\r\022\013\010\377\377\377\377\377\377\377\377\377\001*\037\n\006_class\022\025\n\023\022\021loc:@MakeIterator*\037\n\r_NpuOptimizer\022\016\022\014NpuOptimizer*\017\n\tcontainer\022\002\022\000*\025\n\014output_types\022\005\n\0032\001\002*\023\n\004_job\022\013\022\tlocalhost*\034\n\025_enable_data_pre_proc\022\003\022\00112\014\n\nIteratorV2\032\357\002\n\016BatchDatasetV2\022\016BatchDatasetV2\032^DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1:handle:0\032\023batch_size:output:0\032\027drop_remainder:output:0\",/job:localhost/replica:0/task:0/device:CPU:0*\017\n\007_kernel\022\004\022\002dp*\036\n\016_iterator_name\022\014\022\nIteratorV2*\025\n\014output_types\022\005\n\0032\001\002*\023\n\rparallel_copy\022\002(\000*\"\n\routput_shapes\022\021\n\017:\r\022\013\010\377\377\377\377\377\377\377\377\377\0012\020\n\016BatchDatasetV2\032\240\002\n\nMapDataset\022\nMapDataset\032\027BatchDatasetV2:handle:0\",/job:localhost/replica:0/task:0/device:CPU:0*\"\n\routput_shapes\022\021\n\017:\r\022\013\010\377\377\377\377\377\377\377\377\377\001*(\n\001f\022#R!\n\037__inference_Dataset_map_func_11*\036\n\030use_inter_op_parallelism\022\002(\001*\025\n\014output_types\022\005\n\0032\001\002*\020\n\nTarguments\022\002\n\000*\032\n\024preserve_cardinality\022\002(\0002\014\n\nMapDataset\032\265\001\n\014MakeIterator\022\014MakeIterator\032\023MapDataset:handle:0\032\023IteratorV2:handle:0\",/job:localhost/replica:0/task:0/device:CPU:0*\017\n\007_kernel\022\004\022\002dp*\036\n\016_iterator_name\022\014\022\nIteratorV22\016\n\014MakeIterator\032\235\003\nUDeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1\022\022DeviceQueueDataset\",/job:localhost/replica:0/task:0/device:CPU:0*\025\n\routput_shapes\022\004\n\002:\000*[\n\014channel_name\022K\022IQueue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1*\036\n\016_iterator_name\022\014\022\nIteratorV2*\025\n\014output_types\022\005\n\0032\001\0022W\nUDeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1" + } + } + attr { + key: "function" + value { + func { + name: "dpop_function_FFvj93e0XnN" + } + } + } + } + } + function { + signature { + name: "dpop_function_FFvj93e0XnN" + } + node_def { + name: "batch_size" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 2 + } + } + } + experimental_debug_info { + original_node_names: "batch_size" + } + } + node_def { + name: "drop_remainder" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_BOOL + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_BOOL + tensor_shape { + } + bool_val: false + } + } + } + experimental_debug_info { + original_node_names: "drop_remainder" + } + } + node_def { + name: "IteratorV2" + op: "IteratorV2" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_class" + value { + list { + s: "loc:@MakeIterator" + } + } + } + attr { + key: "_enable_data_pre_proc" + value { + s: "1" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_DOUBLE + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } + experimental_debug_info { + original_node_names: "IteratorV2" + } + } + node_def { + name: "BatchDatasetV2" + op: "BatchDatasetV2" + input: "DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1:handle:0" + input: "batch_size:output:0" + input: "drop_remainder:output:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_iterator_name" + value { + s: "IteratorV2" + } + } + attr { + key: "_kernel" + value { + s: "dp" + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_DOUBLE + } + } + } + attr { + key: "parallel_copy" + value { + b: false + } + } + experimental_debug_info { + original_node_names: "BatchDatasetV2" + } + } + node_def { + name: "MapDataset" + op: "MapDataset" + input: "BatchDatasetV2:handle:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "f" + value { + func { + name: "__inference_Dataset_map_func_11" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_DOUBLE + } + } + } + attr { + key: "preserve_cardinality" + value { + b: false + } + } + attr { + key: "use_inter_op_parallelism" + value { + b: true + } + } + experimental_debug_info { + original_node_names: "MapDataset" + } + } + node_def { + name: "MakeIterator" + op: "MakeIterator" + input: "MapDataset:handle:0" + input: "IteratorV2:handle:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_iterator_name" + value { + s: "IteratorV2" + } + } + attr { + key: "_kernel" + value { + s: "dp" + } + } + experimental_debug_info { + original_node_names: "MakeIterator" + } + } + node_def { + name: "DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1" + op: "DeviceQueueDataset" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_iterator_name" + value { + s: "IteratorV2" + } + } + attr { + key: "channel_name" + value { + s: "Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1" + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_DOUBLE + } + } + } + experimental_debug_info { + original_node_names: "DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1" + } + } + } +} +versions { + producer: 134 +} diff --git a/tf_adapter/tests/st/kernels/testcase/dataset/dataset_test_base.cc b/tf_adapter/tests/st/kernels/testcase/dataset/dataset_test_base.cc new file mode 100644 index 000000000..ba2f05661 --- /dev/null +++ b/tf_adapter/tests/st/kernels/testcase/dataset/dataset_test_base.cc @@ -0,0 +1,704 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/data/dataset_test_base.h" + +#include "tensorflow/core/common_runtime/executor.h" +#include "tensorflow/core/framework/cancellation.h" +#include "tensorflow/core/framework/versions.pb.h" +#include "tensorflow/core/lib/io/record_writer.h" +#include "iostream" +using namespace std; +namespace tensorflow { +namespace data { + +string ToString(CompressionType compression_type) { + switch (compression_type) { + case CompressionType::ZLIB: + return "ZLIB"; + case CompressionType::GZIP: + return "GZIP"; + case CompressionType::RAW: + return "RAW"; + case CompressionType::UNCOMPRESSED: + return ""; + } +} + +io::ZlibCompressionOptions GetZlibCompressionOptions( + CompressionType compression_type) { + switch (compression_type) { + case CompressionType::ZLIB: + return io::ZlibCompressionOptions::DEFAULT(); + case CompressionType::GZIP: + return io::ZlibCompressionOptions::GZIP(); + case CompressionType::RAW: + return io::ZlibCompressionOptions::RAW(); + case CompressionType::UNCOMPRESSED: + LOG(WARNING) << "ZlibCompressionOptions does not have an option for " + << ToString(compression_type); + return io::ZlibCompressionOptions::DEFAULT(); + } +} + +Status WriteDataToFile(const string& filename, const char* data) { + return WriteDataToFile(filename, data, CompressionParams()); +} + +Status WriteDataToFile(const string& filename, const char* data, + const CompressionParams& params) { + Env* env = Env::Default(); + std::unique_ptr file_writer; + TF_RETURN_IF_ERROR(env->NewWritableFile(filename, &file_writer)); + if (params.compression_type == CompressionType::UNCOMPRESSED) { + TF_RETURN_IF_ERROR(file_writer->Append(data)); + } else if (params.compression_type == CompressionType::ZLIB || + params.compression_type == CompressionType::GZIP || + params.compression_type == CompressionType::RAW) { + auto zlib_compression_options = + GetZlibCompressionOptions(params.compression_type); + io::ZlibOutputBuffer out(file_writer.get(), params.input_buffer_size, + params.output_buffer_size, + zlib_compression_options); + TF_RETURN_IF_ERROR(out.Init()); + TF_RETURN_IF_ERROR(out.Append(data)); + TF_RETURN_IF_ERROR(out.Flush()); + TF_RETURN_IF_ERROR(out.Close()); + } else { + return tensorflow::errors::InvalidArgument( + "Unsupported compression_type: ", ToString(params.compression_type)); + } + + TF_RETURN_IF_ERROR(file_writer->Flush()); + TF_RETURN_IF_ERROR(file_writer->Close()); + + return Status::OK(); +} + +Status WriteDataToTFRecordFile(const string& filename, + const std::vector& records, + const CompressionParams& params) { + Env* env = Env::Default(); + std::unique_ptr file_writer; + TF_RETURN_IF_ERROR(env->NewWritableFile(filename, &file_writer)); + auto options = io::RecordWriterOptions::CreateRecordWriterOptions( + ToString(params.compression_type)); + options.zlib_options.input_buffer_size = params.input_buffer_size; + io::RecordWriter record_writer(file_writer.get(), options); + for (const auto& record : records) { + TF_RETURN_IF_ERROR(record_writer.WriteRecord(record)); + } + TF_RETURN_IF_ERROR(record_writer.Flush()); + TF_RETURN_IF_ERROR(record_writer.Close()); + TF_RETURN_IF_ERROR(file_writer->Flush()); + TF_RETURN_IF_ERROR(file_writer->Close()); + return Status::OK(); +} + +template +Status IsEqual(const Tensor& t1, const Tensor& t2) { + if (t1.dtype() != t2.dtype()) { + return tensorflow::errors::Internal( + "Two tensors have different dtypes: ", DataTypeString(t1.dtype()), + " vs. ", DataTypeString(t2.dtype())); + } + if (!t1.IsSameSize(t2)) { + return tensorflow::errors::Internal( + "Two tensors have different shapes: ", t1.shape().DebugString(), + " vs. ", t2.shape().DebugString()); + } + + auto flat_t1 = t1.flat(); + auto flat_t2 = t2.flat(); + auto length = flat_t1.size(); + + for (int i = 0; i < length; ++i) { + if (flat_t1(i) != flat_t2(i)) { + return tensorflow::errors::Internal( + "Two tensors have different values " + "at [", + i, "]: ", flat_t1(i), " vs. ", flat_t2(i)); + } + } + return Status::OK(); +} + +Status DatasetOpsTestBase::ExpectEqual(const Tensor& a, const Tensor& b) { + switch (a.dtype()) { +#define CASE(DT) \ + case DataTypeToEnum
::value: \ + TF_RETURN_IF_ERROR(IsEqual
(a, b)); \ + break; + TF_CALL_NUMBER_TYPES(CASE); + TF_CALL_tstring(CASE); + TF_CALL_uint32(CASE); + TF_CALL_uint64(CASE); + // TODO(feihugis): figure out how to support variant tensors. +#undef CASE + default: + return errors::Internal("Unsupported dtype: ", a.dtype()); + } + return Status::OK(); +} + +template +bool compare(const Tensor& t1, const Tensor& t2) { + auto flat_t1 = t1.flat(); + auto flat_t2 = t2.flat(); + auto length = std::min(flat_t1.size(), flat_t2.size()); + for (int i = 0; i < length; ++i) { + if (flat_t1(i) < flat_t2(i)) return true; + if (flat_t1(i) > flat_t2(i)) return false; + } + return flat_t1.size() < length; +} + +Status DatasetOpsTestBase::ExpectEqual(std::vector produced_tensors, + std::vector expected_tensors, + bool compare_order) { + if (produced_tensors.size() != expected_tensors.size()) { + return Status(tensorflow::errors::Internal( + "The two tensor vectors have different size (", produced_tensors.size(), + " v.s. ", expected_tensors.size(), ")")); + } + + if (produced_tensors.empty()) return Status::OK(); + if (produced_tensors[0].dtype() != expected_tensors[0].dtype()) { + return Status(tensorflow::errors::Internal( + "The two tensor vectors have different dtypes (", + produced_tensors[0].dtype(), " v.s. ", expected_tensors[0].dtype(), + ")")); + } + + if (!compare_order) { + const DataType& dtype = produced_tensors[0].dtype(); + switch (dtype) { +#define CASE(DT) \ + case DT: \ + std::sort(produced_tensors.begin(), produced_tensors.end(), \ + compare::Type>); \ + std::sort(expected_tensors.begin(), expected_tensors.end(), \ + compare::Type>); \ + break; + CASE(DT_FLOAT); + CASE(DT_DOUBLE); + CASE(DT_INT32); + CASE(DT_UINT8); + CASE(DT_INT16); + CASE(DT_INT8); + CASE(DT_STRING); + CASE(DT_INT64); + CASE(DT_BOOL); + CASE(DT_QINT8); + CASE(DT_QUINT8); + CASE(DT_QINT32); + CASE(DT_QINT16); + CASE(DT_QUINT16); + CASE(DT_UINT16); + CASE(DT_HALF); + CASE(DT_UINT32); + CASE(DT_UINT64); + // TODO(feihugis): support other dtypes. +#undef CASE + default: + return errors::Internal("Unsupported dtype: ", dtype); + } + } + + for (int i = 0; i < produced_tensors.size(); ++i) { + TF_RETURN_IF_ERROR(DatasetOpsTestBase::ExpectEqual(produced_tensors[i], + expected_tensors[i])); + } + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateTensorSliceDatasetKernel( + StringPiece node_name, const DataTypeVector& dtypes, + const std::vector& shapes, + std::unique_ptr* tensor_slice_dataset_kernel) { + std::vector components; + components.reserve(dtypes.size()); + for (int i = 0; i < dtypes.size(); ++i) { + // Create the placeholder names for the input components of + // `TensorSliceDataset`. + components.emplace_back(strings::StrCat("component_", i)); + } + NodeDef node_def = test::function::NDef( + node_name, "TensorSliceDataset", components, + {{"Toutput_types", dtypes}, {"output_shapes", shapes}}); + TF_RETURN_IF_ERROR(CreateOpKernel(node_def, tensor_slice_dataset_kernel)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateTensorSliceDataset( + StringPiece node_name, std::vector* const components, + DatasetBase** tensor_slice_dataset) { + std::unique_ptr tensor_slice_dataset_kernel; + DataTypeVector dtypes; + dtypes.reserve(components->size()); + std::vector shapes; + shapes.reserve(components->size()); + for (const auto& t : *components) { + dtypes.push_back(t.dtype()); + gtl::InlinedVector partial_dim_sizes; + for (int i = 1; i < t.dims(); ++i) { + partial_dim_sizes.push_back(t.dim_size(i)); + } + shapes.emplace_back(std::move(partial_dim_sizes)); + } + TF_RETURN_IF_ERROR(CreateTensorSliceDatasetKernel( + node_name, dtypes, shapes, &tensor_slice_dataset_kernel)); + gtl::InlinedVector inputs; + for (auto& tensor : *components) { + inputs.emplace_back(&tensor); + } + TF_RETURN_IF_ERROR(CheckOpKernelInput(*tensor_slice_dataset_kernel, inputs)); + std::unique_ptr context; + TF_RETURN_IF_ERROR(CreateOpKernelContext(tensor_slice_dataset_kernel.get(), + &inputs, &context)); + TF_RETURN_IF_ERROR( + RunOpKernel(tensor_slice_dataset_kernel.get(), context.get())); + TF_RETURN_IF_ERROR( + GetDatasetFromContext(context.get(), 0, tensor_slice_dataset)); + return Status::OK(); +} + +// Create a `RangeDataset` dataset as a variant tensor. +Status DatasetOpsTestBase::MakeRangeDataset( + const Tensor& start, const Tensor& stop, const Tensor& step, + const DataTypeVector& output_types, + const std::vector& output_shapes, + Tensor* range_dataset) { + GraphConstructorOptions graph_opts; + graph_opts.allow_internal_ops = true; + graph_opts.expect_device_spec = false; + TF_RETURN_IF_ERROR( + RunFunction(test::function::MakeRangeDataset(), + /*attrs*/ + {{RangeDatasetOp::kOutputTypes, output_types}, + {RangeDatasetOp::kOutputShapes, output_shapes}}, + /*inputs*/ {start, stop, step}, graph_opts, + /*rets*/ {range_dataset})); + return Status::OK(); +} + +// Create a `RangeDataset` dataset as a variant tensor. +Status DatasetOpsTestBase::MakeRangeDataset( + const RangeDatasetParams& range_dataset_params, Tensor* range_dataset) { + GraphConstructorOptions graph_opts; + graph_opts.allow_internal_ops = true; + graph_opts.expect_device_spec = false; + TF_RETURN_IF_ERROR(RunFunction( + test::function::MakeRangeDataset(), + /*attrs*/ + {{RangeDatasetOp::kOutputTypes, range_dataset_params.output_dtypes}, + {RangeDatasetOp::kOutputShapes, range_dataset_params.output_shapes}}, + /*inputs*/ + {range_dataset_params.start, range_dataset_params.stop, + range_dataset_params.step}, + graph_opts, + /*rets*/ {range_dataset})); + return Status::OK(); +} + +// Create a `TakeDataset` dataset as a variant tensor. +Status DatasetOpsTestBase::MakeTakeDataset( + const Tensor& input_dataset, int64 count, + const DataTypeVector& output_types, + const std::vector& output_shapes, + Tensor* take_dataset) { + GraphConstructorOptions graph_opts; + graph_opts.allow_internal_ops = true; + graph_opts.expect_device_spec = false; + + Tensor count_tensor = CreateTensor(TensorShape({}), {count}); + TF_RETURN_IF_ERROR( + RunFunction(test::function::MakeTakeDataset(), + /*attrs*/ + {{TakeDatasetOp::kOutputTypes, output_types}, + {TakeDatasetOp::kOutputShapes, output_shapes}}, + /*inputs*/ {input_dataset, count_tensor}, graph_opts, + /*rets*/ {take_dataset})); + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateOpKernel( + const NodeDef& node_def, std::unique_ptr* op_kernel) { + OpKernel* kernel; + TF_RETURN_IF_ERROR(tensorflow::CreateOpKernel(device_type_, device_.get(), + allocator_, flr_, node_def, + TF_GRAPH_DEF_VERSION, &kernel)); + op_kernel->reset(kernel); + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateDatasetContext( + OpKernel* const dateset_kernel, + gtl::InlinedVector* const inputs, + std::unique_ptr* dataset_context) { + TF_RETURN_IF_ERROR(CheckOpKernelInput(*dateset_kernel, *inputs)); + TF_RETURN_IF_ERROR( + CreateOpKernelContext(dateset_kernel, inputs, dataset_context)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateDataset(OpKernel* kernel, + OpKernelContext* context, + DatasetBase** const dataset) { + TF_RETURN_IF_ERROR(RunOpKernel(kernel, context)); + // Assume that DatasetOp has only one output. + DCHECK_EQ(context->num_outputs(), 1); + TF_RETURN_IF_ERROR(GetDatasetFromContext(context, 0, dataset)); + return Status::OK(); +} + +Status DatasetOpsTestBase::RestoreIterator( + IteratorContext* ctx, IteratorStateReader* reader, + const string& output_prefix, const DatasetBase& dataset, + std::unique_ptr* iterator) { + TF_RETURN_IF_ERROR(dataset.MakeIterator(ctx, output_prefix, iterator)); + TF_RETURN_IF_ERROR((*iterator)->Restore(ctx, reader)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateIteratorContext( + OpKernelContext* const op_context, + std::unique_ptr* iterator_context) { + IteratorContext::Params params(op_context); + params.resource_mgr = op_context->resource_manager(); + function_handle_cache_ = absl::make_unique(flr_); + params.function_handle_cache = function_handle_cache_.get(); + params.cancellation_manager = cancellation_manager_.get(); + *iterator_context = absl::make_unique(params); + return Status::OK(); +} + +Status DatasetOpsTestBase::GetDatasetFromContext(OpKernelContext* context, + int output_index, + DatasetBase** const dataset) { + Tensor* output = context->mutable_output(output_index); + Status status = GetDatasetFromVariantTensor(*output, dataset); + (*dataset)->Ref(); + return status; +} + +Status DatasetOpsTestBase::InitThreadPool(int thread_num) { + if (thread_num < 1) { + return errors::InvalidArgument( + "The `thread_num` argument should be positive but got: ", thread_num); + } + thread_pool_ = absl::make_unique( + Env::Default(), ThreadOptions(), "test_thread_pool", thread_num); + return Status::OK(); +} + +Status DatasetOpsTestBase::InitFunctionLibraryRuntime( + const std::vector& flib, int cpu_num) { + if (cpu_num < 1) { + return errors::InvalidArgument( + "The `cpu_num` argument should be positive but got: ", cpu_num); + } + SessionOptions options; + auto* device_count = options.config.mutable_device_count(); + device_count->insert({"CPU", cpu_num}); + std::vector> devices; + TF_RETURN_IF_ERROR(DeviceFactory::AddDevices( + options, "/job:localhost/replica:0/task:0", &devices)); + device_mgr_ = absl::make_unique(std::move(devices)); + resource_mgr_ = absl::make_unique("default_container"); + + FunctionDefLibrary proto; + for (const auto& fdef : flib) *(proto.add_function()) = fdef; + lib_def_ = + absl::make_unique(OpRegistry::Global(), proto); + + OptimizerOptions opts; + pflr_ = absl::make_unique( + device_mgr_.get(), Env::Default(), TF_GRAPH_DEF_VERSION, lib_def_.get(), + opts, thread_pool_.get(), nullptr /* cluster_flr */); + flr_ = pflr_->GetFLR("/job:localhost/replica:0/task:0/cpu:0"); + if (thread_pool_ == nullptr) { + runner_ = [](std::function fn) { fn(); }; + } else { + runner_ = [this](std::function fn) { + thread_pool_->Schedule(std::move(fn)); + }; + } + return Status::OK(); +} + +Status DatasetOpsTestBase::RunOpKernel(OpKernel* op_kernel, + OpKernelContext* context) { + device_->Compute(op_kernel, context); + return context->status(); +} + +Status DatasetOpsTestBase::RunFunction( + const FunctionDef& fdef, test::function::Attrs attrs, + const std::vector& args, + const GraphConstructorOptions& graph_options, std::vector rets) { + std::unique_ptr exec; + InstantiationResult result; + auto GetOpSig = [](const string& op, const OpDef** sig) { + return OpRegistry::Global()->LookUpOpDef(op, sig); + }; + TF_RETURN_IF_ERROR(InstantiateFunction(fdef, attrs, GetOpSig, &result)); + + DataTypeVector arg_types = result.arg_types; + DataTypeVector ret_types = result.ret_types; + + std::unique_ptr g(new Graph(OpRegistry::Global())); + TF_RETURN_IF_ERROR( + ConvertNodeDefsToGraph(graph_options, result.nodes, g.get())); + + const int version = g->versions().producer(); + LocalExecutorParams params; + params.function_library = flr_; + params.device = device_.get(); + params.create_kernel = [this, version](const NodeDef& ndef, + OpKernel** kernel) { + return CreateNonCachedKernel(device_.get(), this->flr_, ndef, version, + kernel); + }; + params.delete_kernel = [](OpKernel* kernel) { + DeleteNonCachedKernel(kernel); + }; + params.rendezvous_factory = [](const int64, const DeviceMgr* device_mgr, + Rendezvous** r) { + *r = new IntraProcessRendezvous(device_mgr); + return Status::OK(); + }; + + Executor* cur_exec; + TF_RETURN_IF_ERROR(NewLocalExecutor(params, std::move(g), &cur_exec)); + exec.reset(cur_exec); + FunctionCallFrame frame(arg_types, ret_types); + TF_RETURN_IF_ERROR(frame.SetArgs(args)); + Executor::Args exec_args; + exec_args.call_frame = &frame; + exec_args.runner = runner_; + TF_RETURN_IF_ERROR(exec->Run(exec_args)); + std::vector computed; + TF_RETURN_IF_ERROR(frame.GetRetvals(&computed)); + if (computed.size() != rets.size()) { + return errors::InvalidArgument( + "The result does not match the expected number of return outpus", + ". Expected: ", rets.size(), ". Actual: ", computed.size()); + } + for (int i = 0; i < rets.size(); ++i) { + *(rets[i]) = computed[i]; + } + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateOpKernelContext( + OpKernel* kernel, gtl::InlinedVector* inputs, + std::unique_ptr* context) { + params_ = absl::make_unique(); + cancellation_manager_ = absl::make_unique(); + params_->cancellation_manager = cancellation_manager_.get(); + params_->device = device_.get(); + params_->frame_iter = FrameAndIter(0, 0); + params_->function_library = flr_; + params_->inputs = inputs; + params_->op_kernel = kernel; + params_->resource_manager = resource_mgr_.get(); + params_->runner = &runner_; + checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper; + slice_reader_cache_ = + absl::make_unique(); + params_->slice_reader_cache = slice_reader_cache_.get(); + step_container_ = + absl::make_unique(0, [](const string&) {}); + params_->step_container = step_container_.get(); + + // Set the allocator attributes for the outputs. + allocator_attrs_.clear(); + for (int index = 0; index < params_->op_kernel->num_outputs(); index++) { + AllocatorAttributes attr; + const bool on_host = + (params_->op_kernel->output_memory_types()[index] == HOST_MEMORY); + attr.set_on_host(on_host); + allocator_attrs_.emplace_back(attr); + } + params_->output_attr_array = gtl::vector_as_array(&allocator_attrs_); + + *context = absl::make_unique(params_.get()); + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateSerializationContext( + std::unique_ptr* context) { + *context = + absl::make_unique(SerializationContext::Params{}); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckOpKernelInput( + const OpKernel& kernel, const gtl::InlinedVector& inputs) { + if (kernel.input_types().size() != inputs.size()) { + cout<<"++"<* inputs, DataTypeVector input_types, + DataType dtype, const TensorShape& shape) { + if (input_types.size() < inputs->size()) { + return errors::InvalidArgument("Adding more inputs than types: ", + inputs->size(), " vs. ", input_types.size()); + } + bool is_ref = IsRefType(input_types[inputs->size()]); + std::unique_ptr input = + absl::make_unique(allocator_, dtype, shape); + + if (is_ref) { + DataType expected_dtype = RemoveRefType(input_types[inputs->size()]); + if (expected_dtype != dtype) { + return errors::InvalidArgument("The input data type is ", dtype, + " , but expected: ", expected_dtype); + } + inputs->push_back({&lock_for_refs_, input.get()}); + } else { + if (input_types[inputs->size()] != dtype) { + return errors::InvalidArgument( + "The input data type is ", dtype, + " , but expected: ", input_types[inputs->size()]); + } + inputs->push_back({nullptr, input.get()}); + } + + // TODO(jsimsa): Figure out how to avoid using a member variable to garbage + // collect the inputs. + tensors_.push_back(std::move(input)); + + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckIteratorGetNext( + const std::vector& expected_outputs, bool compare_order) { + bool end_of_sequence = false; + std::vector out_tensors; + while (!end_of_sequence) { + std::vector next; + TF_RETURN_IF_ERROR( + iterator_->GetNext(iterator_ctx_.get(), &next, &end_of_sequence)); + out_tensors.insert(out_tensors.end(), next.begin(), next.end()); + } + + TF_EXPECT_OK(ExpectEqual(out_tensors, expected_outputs, + /*compare_order=*/compare_order)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckDatasetNodeName( + const string& expected_dataset_node_name) { + EXPECT_EQ(dataset_->node_name(), expected_dataset_node_name); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckDatasetTypeString( + const string& expected_type_str) { + EXPECT_EQ(dataset_->type_string(), expected_type_str); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckDatasetOutputDtypes( + const DataTypeVector& expected_output_dtypes) { + TF_EXPECT_OK( + VerifyTypesMatch(dataset_->output_dtypes(), expected_output_dtypes)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckDatasetOutputShapes( + const std::vector& expected_output_shapes) { + TF_EXPECT_OK(VerifyShapesCompatible(dataset_->output_shapes(), + expected_output_shapes)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckDatasetCardinality(int expected_cardinality) { + EXPECT_EQ(dataset_->Cardinality(), expected_cardinality); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckIteratorOutputDtypes( + const DataTypeVector& expected_output_dtypes) { + TF_EXPECT_OK( + VerifyTypesMatch(iterator_->output_dtypes(), expected_output_dtypes)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckIteratorOutputShapes( + const std::vector& expected_output_shapes) { + TF_EXPECT_OK(VerifyShapesCompatible(iterator_->output_shapes(), + expected_output_shapes)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckIteratorPrefix( + const string& expected_iterator_prefix) { + EXPECT_EQ(iterator_->prefix(), expected_iterator_prefix); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckIteratorSaveAndRestore( + const string& iterator_prefix, const std::vector& expected_outputs, + const std::vector& breakpoints) { + std::unique_ptr iterator; + TF_RETURN_IF_ERROR( + dataset_->MakeIterator(iterator_ctx_.get(), iterator_prefix, &iterator)); + std::unique_ptr serialization_ctx; + TF_RETURN_IF_ERROR(CreateSerializationContext(&serialization_ctx)); + bool end_of_sequence = false; + std::vector out_tensors; + int cur_iteration = 0; + auto expected_outputs_it = expected_outputs.begin(); + for (int breakpoint : breakpoints) { + VariantTensorData data; + VariantTensorDataWriter writer(&data); + TF_EXPECT_OK(iterator->Save(serialization_ctx.get(), &writer)); + TF_RETURN_IF_ERROR(writer.Flush()); + VariantTensorDataReader reader(&data); + TF_EXPECT_OK(RestoreIterator(iterator_ctx_.get(), &reader, iterator_prefix, + *dataset_, &iterator)); + + while (cur_iteration <= breakpoint) { + TF_RETURN_IF_ERROR(iterator->GetNext(iterator_ctx_.get(), &out_tensors, + &end_of_sequence)); + if (!end_of_sequence) { + EXPECT_NE(expected_outputs_it, expected_outputs.end()); + TF_EXPECT_OK(ExpectEqual(out_tensors.back(), *expected_outputs_it)); + expected_outputs_it++; + } + cur_iteration++; + } + + if (breakpoint >= expected_outputs.size()) { + EXPECT_TRUE(end_of_sequence); + EXPECT_EQ(expected_outputs_it, expected_outputs.end()); + } else { + EXPECT_FALSE(end_of_sequence); + } + } + return Status::OK(); +} + +} // namespace data +} // namespace tensorflow diff --git a/tf_adapter/tests/st/kernels/testcase/dataset/function_testlib.cc b/tf_adapter/tests/st/kernels/testcase/dataset/function_testlib.cc new file mode 100644 index 000000000..ad82bb6f6 --- /dev/null +++ b/tf_adapter/tests/st/kernels/testcase/dataset/function_testlib.cc @@ -0,0 +1,649 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/function_testlib.h" + +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/versions.pb.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { +namespace test { +namespace function { + +typedef FunctionDefHelper FDH; + +GraphDef GDef(gtl::ArraySlice nodes, + gtl::ArraySlice funcs) { + GraphDef g; + VersionDef* versions = g.mutable_versions(); + versions->set_producer(TF_GRAPH_DEF_VERSION); + versions->set_min_consumer(TF_GRAPH_DEF_VERSION_MIN_CONSUMER); + for (const auto& n : nodes) { + *(g.add_node()) = n; + } + auto lib = g.mutable_library(); + for (const auto& f : funcs) { + *(lib->add_function()) = f; + } + return g; +} + +// Helper to construct a NodeDef. +NodeDef NDef(StringPiece name, StringPiece op, gtl::ArraySlice inputs, + gtl::ArraySlice> attrs, + const string& device) { + NodeDef n; + n.set_name(string(name)); + n.set_op(string(op)); + for (const auto& in : inputs) n.add_input(in); + n.set_device(device); + for (auto na : attrs) n.mutable_attr()->insert({na.first, na.second.proto}); + return n; +} + +FunctionDef NonZero() { + return FDH::Define( + // Name + "NonZero", + // Args + {"x:T"}, + // Return values + {"y:T"}, + // Attr def + {"T:{float, double, int32, int64, string}"}, + // Nodes + { + {{"y"}, "Identity", {"x"}, {{"T", "$T"}}}, + }); +} + +FunctionDef IsZero() { + const Tensor kZero = test::AsScalar(0); + return FDH::Define( + // Name + "IsZero", + // Args + {"x: T"}, + // Return values + {"equal: bool"}, + // Attr def + {"T:{float, double, int32, int64, string}"}, + { + {{"zero"}, "Const", {}, {{"value", kZero}, {"dtype", DT_INT64}}}, + {{"cast"}, "Cast", {"zero"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"equal"}, "Equal", {"x", "cast"}, {{"T", "$T"}}}, + }); +} + +FunctionDef RandomUniform() { + const Tensor kZero = test::AsScalar(0); + + return FDH::Define( + // Name + "RandomUniform", + // Args + {"x: T"}, + // Return values + {"random_uniform: int64"}, + // Attr def + {"T:{float, double, int32, int64, string}"}, + {{{"random_uniform/shape"}, + "Const", + {}, + {{"value", kZero}, {"dtype", DT_INT64}}}, + {{"random_uniform"}, + "RandomUniform", + {"random_uniform/shape"}, + {{"T", DT_INT32}, + {"Tout", DT_FLOAT}, + {"seed", 87654321}, + {"seed2", 42}}}}); +} + +FunctionDef XTimesTwo() { + const Tensor kTwo = test::AsScalar(2); + return FDH::Define( + // Name + "XTimesTwo", + // Args + {"x: T"}, + // Return values + {"y: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, + {{"scale"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"y"}, "Mul", {"x", "scale"}, {{"T", "$T"}}}, + }); +} + +FunctionDef TwoDeviceMult() { + const Tensor kTwo = test::AsScalar(2); + const Tensor kThree = test::AsScalar(3); + return FDH::Create( + // Name + "TwoDeviceMult", + // Args + {"x: T"}, + // Return values + {"y_cpu: T", "y_gpu: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, + {{"num_3"}, "Const", {}, {{"value", kThree}, {"dtype", DT_INT64}}}, + {{"factor_2"}, + "Cast", + {"num_2:output:0"}, + {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"factor_3"}, + "Cast", + {"num_3:output:0"}, + {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"y_cpu"}, + "Mul", + {"x", "factor_2:y:0"}, + {{"T", "$T"}}, + {}, + "/device:CPU:0"}, + {{"y_gpu"}, + "Mul", + {"x", "factor_3:y:0"}, + {{"T", "$T"}}, + {}, + "/device:GPU:0"}, + }, + {{"y_cpu", "y_cpu:z:0"}, {"y_gpu", "y_gpu:z:0"}}); +} + +FunctionDef TwoDeviceInputOutput() { + const Tensor kTwo = test::AsScalar(2); + const Tensor kThree = test::AsScalar(3); + return FDH::Create( + // Name + "TwoDeviceInputOutput", + // Args + {"x1: T", "x2: T"}, + // Return values + {"y_cpu: T", "y_gpu: T"}, + // Attr def + {"T: {float}"}, + // Nodes + { + {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}}, + {{"num_3"}, "Const", {}, {{"value", kThree}, {"dtype", DT_FLOAT}}}, + {{"y_cpu"}, + "Mul", + {"x1", "num_2:output:0"}, + {{"T", "$T"}}, + {}, + "/device:CPU:0"}, + {{"y_gpu"}, + "Mul", + {"x2", "num_3:output:0"}, + {{"T", "$T"}}, + {}, + "/device:GPU:0"}, + }, + {{"y_cpu", "y_cpu:z:0"}, {"y_gpu", "y_gpu:z:0"}}); +} + +FunctionDef FuncWithListInput() { + const Tensor kTwo = test::AsScalar(2); + return FDH::Create( + // Name + "FuncWithListInput", + // Args + {"x1: N * T"}, + // Return values + {}, + // Attr def + {"T: {float}", "N: int >= 1"}, + // Nodes + { + {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}}, + }, + {}); +} + +FunctionDef FuncWithListOutput() { + const Tensor kTwo = test::AsScalar(2); + return FDH::Create( + // Name + "FuncWithListOutput", + // Args + {}, + // Return values + {"y: N * T"}, + // Attr def + {"T: {float}", "N: int >= 1"}, + // Nodes + { + {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}}, + }, + {{"y", "num_2:output:0"}}); +} + +FunctionDef XAddX() { + return FDH::Define( + // Name + "XAddX", + // Args + {"x: T"}, + // Return values + {"y: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"y"}, "Add", {"x", "x"}, {{"T", "$T"}}}, + }); +} + +FunctionDef XAddY() { + return FDH::Define( + // Name + "XAddY", + // Args + {"x: T", "y: T"}, + // Return values + {"z: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"z"}, "Add", {"x", "y"}, {{"T", "$T"}}}, + }); +} + +FunctionDef XTimesTwoInt32() { + const Tensor kTwo = test::AsScalar(2); + return FDH::Define( + // Name + "XTimesTwoInt32", + // Args + {"x: int32"}, + // Return values + {"y: int32"}, {}, + // Nodes + { + {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, + {{"scale"}, + "Cast", + {"two"}, + {{"SrcT", DT_INT64}, {"DstT", DT_INT32}}}, + {{"y"}, "Mul", {"x", "scale"}, {{"T", DT_INT32}}}, + }); +} + +FunctionDef XTimesFour() { + return FDH::Create( + // Name + "XTimesFour", + // Args + {"x: T"}, + // Return values + {"y: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"x2"}, "XTimesTwo", {"x"}, {{"T", "$T"}}}, + {{"y"}, "XTimesTwo", {"x2:y:0"}, {{"T", "$T"}}}, + }, + {{"y", "y:y:0"}}); +} + +FunctionDef XTimes16() { + return FDH::Create( + // Name + "XTimes16", + // Args + {"x: T"}, + // Return values + {"y: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"x4"}, "XTimesFour", {"x"}, {{"T", "$T"}}}, + {{"y"}, "XTimesFour", {"x4:y:0"}, {{"T", "$T"}}}, + }, + {{"y", "y:y:0"}}); +} + +FunctionDef WXPlusB() { + return FDH::Define( + // Name + "WXPlusB", + // Args + {"w: T", "x: T", "b: T"}, + // Return values + {"y: T"}, + // Attr def + {"T: {float, double}"}, + // Nodes + {{{"mm"}, + "MatMul", + {"w", "x"}, + {{"T", "$T"}, + {"transpose_a", false}, + {"transpose_b", false}, + {"_kernel", "eigen"}}}, + {{"y"}, "Add", {"mm", "b"}, {{"T", "$T"}}}}); +} + +FunctionDef Swap() { + return FDH::Define( + // Name + "Swap", + // Args + {"i0: T", "i1: T"}, + // Return values + {"o0: T", "o1: T"}, + // Attr def + {"T: {float, double, resource}"}, + // Nodes + {{{"o0"}, "Identity", {"i1"}, {{"T", "$T"}}}, + {{"o1"}, "Identity", {"i0"}, {{"T", "$T"}}}}); +} + +FunctionDef EmptyBodySwap() { + return FDH::Create( + // Name + "EmptyBodySwap", + // Args + {"i0: T", "i1: T"}, + // Return values + {"o0: T", "o1: T"}, + // Attr def + {"T: {float, double, resource}"}, + // Nodes + {}, + // Output mapping + {{"o0", "i1"}, {"o1", "i0"}}); +} + +FunctionDef ResourceOutput() { + const Tensor kTwo = test::AsScalar(2); + return FDH::Create( + // Name + "ResourceOutput", + // Args + {"x: float", "y: resource"}, + // Return values + {"y_out: resource", "two_x: float"}, + // Attr def + {}, + // Nodes + { + {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}}, + {{"mul"}, "Mul", {"x", "two:output:0"}, {{"T", DT_FLOAT}}, {}}, + }, + {{"y_out", "y"}, {"two_x", "mul:z:0"}}); +} + +FunctionDef ResourceIdentity() { + return FDH::Create( + // Name + "ResourceIdentity", + // Args + {"x: resource"}, + // Return values + {"y: resource"}, + // Attr def + {}, + // Nodes + {}, + // Output mapping + {{"y", "x"}}); +} + +FunctionDef ReadResourceVariable() { + return FDH::Create( + // Name + "ReadResourceVariable", + // Args + {"x: resource"}, + // Return values + {"y: float"}, + // Attr def + {}, + // Nodes + { + {{"read"}, "ReadVariableOp", {"x"}, {{"dtype", DT_FLOAT}}, {}}, + }, + {{"y", "read:value:0"}}); +} + +FunctionDef InvalidControlFlow() { + return FDH::Create( + // Name + "InvalidControlFlow", + // Args + {"i: int32"}, + // Return values + {"o: int32"}, + // Attr def + {}, + // Nodes + {{{"enter"}, "Enter", {"i"}, {{"T", DT_INT32}, {"frame_name", "while"}}}, + {{"add"}, "Add", {"enter:output", "i"}, {{"T", DT_INT32}}}}, + // Output mapping + {{"o", "add:z"}}); +} + +FunctionDef LessThanOrEqualToN(int64 N) { + const Tensor kN = test::AsScalar(N); + return FDH::Define( + // Name + "LessThanOrEqualToN", + // Args + {"x: T"}, + // Return values + {"z: bool"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"N"}, "Const", {}, {{"value", kN}, {"dtype", DT_INT64}}}, + {{"y"}, "Cast", {"N"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"z"}, "LessEqual", {"x", "y"}, {{"T", "$T"}}}, + }); +} + +FunctionDef XPlusOneXTimesY() { + const Tensor kOne = test::AsScalar(1); + return FDH::Define( + // Name + "XPlusOneXTimesY", + // Args + {"x: T", "y: T"}, + // Return values + {"s: T", "t: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + {{{"one"}, "Const", {}, {{"value", kOne}, {"dtype", DT_INT64}}}, + {{"increment"}, "Cast", {"one"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"s"}, "Add", {"x", "increment"}, {{"T", "$T"}}}, + {{"t"}, "Mul", {"x", "y"}, {{"T", "$T"}}}}); +} + +FunctionDef XYXLessThanOrEqualToN(int64 N) { + const Tensor kN = test::AsScalar(N); + return FDH::Define( + // Name + "XYXLessThanOrEqualToN", + // Args + {"x: T", "y: T"}, + // Return values + {"z: bool"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"N"}, "Const", {}, {{"value", kN}, {"dtype", DT_INT64}}}, + {{"N1"}, "Cast", {"N"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"z"}, "LessEqual", {"x", "N1"}, {{"T", "$T"}}}, + }); +} + +FunctionDef RandomUniformLess() { + const Tensor kZero = test::AsScalar(0); + const Tensor kOne = test::AsScalar(1); + const Tensor k005 = test::AsScalar(0.05); + + return FDH::Define( + // Name + "RandomUniformLess", + // Args + {"arg0: int64"}, + // Return values + {"strided_slice: bool"}, + // Attr def + {"T:{float, double, int32, int64, string}"}, + {{{"random_uniform/shape"}, + "Const", + {}, + {{"value", kZero}, {"dtype", DT_INT32}}}, + + {{"random_uniform/RandomUniform"}, + "RandomUniform", + {"random_uniform/shape"}, + {{"T", DT_INT32}, {"Tout", DT_FLOAT}, {"seed", 0}, {"seed2", 0}}}, + + {{"Less/y"}, "Const", {}, {{"value", k005}, {"dtype", DT_FLOAT}}}, + + {{"Less"}, + "Less", + {"random_uniform/RandomUniform", "Less/y"}, + {{"T", DT_FLOAT}}}, + + {{"strided_slice/stack"}, + "Const", + {}, + {{"value", kZero}, {"dtype", DT_INT32}}}, + + {{"strided_slice/stack_1"}, + "Const", + {}, + {{"value", kOne}, {"dtype", DT_INT32}}}, + + {{"strided_slice/stack_2"}, + "Const", + {}, + {{"value", kOne}, {"dtype", DT_INT32}}}, + + {{"strided_slice"}, + "StridedSlice", + {"Less", "strided_slice/stack", "strided_slice/stack_1", + "strided_slice/stack_2"}, + {{"Index", DT_INT32}, + {"T", DT_BOOL}, + {"begin_mask", 0}, + {"ellipsis_mask", 0}, + {"end_mask", 0}, + {"new_axis_mask", 0}, + {"shrink_axis_mask", 0}}}}); +} + +FunctionDef MakeRangeDataset() { + return FDH::Define( + // Name + "MakeRangeDataset", + // Args + {"start: int64", "stop: int64", "step: int64"}, + // Return values + {"y:variant"}, + // Attr def + {"output_types: list(type) >= 1", "output_shapes: list(shape) >= 1"}, + // Nodes + {{{"y"}, + "RangeDataset", + {"start", "stop", "step"}, + {{"output_types", "$output_types"}, + {"output_shapes", "$output_shapes"}}}}); +} + +FunctionDef MakeTakeDataset() { + return FDH::Define( + // Name + "TakeDataset", + // Args + {"input_dataset: variant", "count: int64"}, + // Return values + {"y:variant"}, + // Attr def + {"output_types: list(type) >= 1", "output_shapes: list(shape) >= 1"}, + // Nodes + {{{"y"}, + "TakeDataset", + {"input_dataset", "count"}, + {{"output_types", "$output_types"}, + {"output_shapes", "$output_shapes"}}}}); +} + +FunctionDef MakeTensorSliceDataset() { + return FDH::Define( + // Name + "MakeTensorSliceDataset", + // Args + {"x: Toutput_types"}, + // Return values + {"y: variant"}, + // Attr def + {"Toutput_types: list(type) >= 1", "output_shapes: list(shape) >= 1"}, + // Nodes + {{{"y"}, + "TensorSliceDataset", + {"x"}, + {{"Toutput_types", "$Toutput_types"}, + {"output_shapes", "$output_shapes"}}}}); +} + +FunctionDef Unique() { + return FDH::Create( + // Name + "GetUnique", + // Args + {"x:T"}, + // Return values + {"y:T", "idx: out_idx"}, + // Attr def + {"T: type", "out_idx: {int32, int64} = DT_INT32"}, + // Nodes + { + {{"result"}, "Unique", {"x"}, {{"T", "$T"}, {"out_idx", "$out_idx"}}}, + }, + {{"y", "result:y:0"}, {"idx", "result:idx:0"}}); +} + +void FunctionTestSchedClosure(std::function fn) { + static thread::ThreadPool* w = + new thread::ThreadPool(Env::Default(), "Test", 8); + w->Schedule(std::move(fn)); +} + +} // end namespace function +} // end namespace test +} // end namespace tensorflow diff --git a/tf_adapter/tests/st/kernels/testcase/dataset/host_queue_dats_set_st.cc b/tf_adapter/tests/st/kernels/testcase/dataset/host_queue_dats_set_st.cc new file mode 100644 index 000000000..05088e851 --- /dev/null +++ b/tf_adapter/tests/st/kernels/testcase/dataset/host_queue_dats_set_st.cc @@ -0,0 +1,203 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#define protected public +#include "gtest/gtest.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/kernels/data/dataset_test_base.h" +class HostQueueDatasetOp; +namespace tensorflow { +namespace data { +namespace { + +static constexpr char kNodeName[] = "host_queue_dataset"; +static constexpr const char *const kChannelName = "channel_name"; +static constexpr const char *const kOutputTypes = "output_types"; +static constexpr const char *const kOutputShapes = "output_shapes"; + +class HostQueueDatasetOpTest : public DatasetOpsTestBase { + protected: + // Creates `TensorSliceDataset` variant tensor from the input vector of + // tensors. + Status CreateTensorSliceDatasetTensor( + std::vector *const tensor_vector, Tensor *dataset_tensor) { + DatasetBase *tensor_slice_dataset; + TF_RETURN_IF_ERROR(CreateTensorSliceDataset( + "tensor_slice_node", tensor_vector, &tensor_slice_dataset)); + TF_RETURN_IF_ERROR( + StoreDatasetInVariantTensor(tensor_slice_dataset, dataset_tensor)); + return Status::OK(); + } + + // Create a new `HostQueueDataset` op kernel. + Status CreateHostQueueDatasetKernel( + const DataTypeVector &output_types, + const std::vector &output_shapes, + std::unique_ptr *op_kernel, std::string _local_rank_id) { + name_utils::OpNameParams params; + + NodeDef node_def = + test::function::NDef(kNodeName, name_utils::OpName("HostQueue", params), + {"geop_dataset", "input_dataset"}, + {{"channel_name", "channel_001"}, + {"output_types", output_types}, + {"_local_rank_id", _local_rank_id}, + {"_local_device_list", "{0,-1}"}, + {"output_shapes", output_shapes}}); + TF_RETURN_IF_ERROR(CreateOpKernel(node_def, op_kernel)); + return Status::OK(); + } + + // Create a new `HostQueueDataset` op kernel context. + Status CreateHostQueueDatasetContext( + OpKernel *op_kernel, gtl::InlinedVector *const inputs, + std::unique_ptr *context) { + TF_RETURN_IF_ERROR(CheckOpKernelInput(*op_kernel, *inputs)); + TF_RETURN_IF_ERROR(CreateOpKernelContext(op_kernel, inputs, context)); + return Status::OK(); + } + + protected: + virtual void SetUp() {} + virtual void TearDown() {} +}; + +struct TestCase { + std::vector input_tensors; + std::vector expected_outputs; + DataTypeVector expected_output_dtypes; + std::vector expected_output_shapes; +}; + +TestCase NormalizeTestCase() { + return { + /// input_tensors , expected_outputs , expected_output_dtypes, + /// expected_output_shapes + {CreateTensor(TensorShape{10, 1}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9})}, + {CreateTensor(TensorShape{1}, {0})}, + {DT_INT64}, + {PartialTensorShape({1})}, + }; +} + +TEST_F(HostQueueDatasetOpTest, iterator_getnext) { + int thread_num = 2, cpu_num = 2; + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + const TestCase &test_case = NormalizeTestCase(); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + + gtl::InlinedVector inputs_for_host_queue_dataset( + {TensorValue(&tensor_slice_dataset_tensor), + TensorValue(&tensor_slice_dataset_tensor)}); + + std::unique_ptr host_queue_dataset_kernel; + TF_ASSERT_OK(CreateHostQueueDatasetKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &host_queue_dataset_kernel, "-1")); + std::unique_ptr host_queue_dataset_context; + TF_ASSERT_OK(CreateHostQueueDatasetContext(host_queue_dataset_kernel.get(), + &inputs_for_host_queue_dataset, + &host_queue_dataset_context)); + DatasetBase *host_queue_dataset; + TF_ASSERT_OK(CreateDataset(host_queue_dataset_kernel.get(), + host_queue_dataset_context.get(), + &host_queue_dataset)); + core::ScopedUnref scoped_unref(host_queue_dataset); + + EXPECT_EQ(host_queue_dataset->node_name(), kNodeName); + + host_queue_dataset->output_dtypes(); + host_queue_dataset->output_shapes(); + host_queue_dataset->DebugString(); + + SerializationContext context(SerializationContext::Params{}); + GraphDefBuilder b; + DatasetBase::DatasetGraphDefBuilder db(&b); + Node *output; + host_queue_dataset->AsGraphDefInternal(&context, &db, &output); + + std::unique_ptr iterator_context; + TF_ASSERT_OK(CreateIteratorContext(host_queue_dataset_context.get(), + &iterator_context)); + std::unique_ptr iterator; + TF_ASSERT_OK(host_queue_dataset->MakeIterator(iterator_context.get(), + "Iterator", &iterator)); + + bool end_of_sequence = false; + std::vector out_tensors; + sleep(2); + TF_EXPECT_OK(iterator->GetNext(iterator_context.get(), &out_tensors, + &end_of_sequence)); +} + +TEST_F(HostQueueDatasetOpTest, iterator_getnext02) { + int thread_num = 2, cpu_num = 2; + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + const TestCase &test_case = NormalizeTestCase(); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + + gtl::InlinedVector inputs_for_host_queue_dataset( + {TensorValue(&tensor_slice_dataset_tensor), + TensorValue(&tensor_slice_dataset_tensor)}); + + std::unique_ptr host_queue_dataset_kernel; + TF_ASSERT_OK(CreateHostQueueDatasetKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &host_queue_dataset_kernel, "0")); + std::unique_ptr host_queue_dataset_context; + TF_ASSERT_OK(CreateHostQueueDatasetContext(host_queue_dataset_kernel.get(), + &inputs_for_host_queue_dataset, + &host_queue_dataset_context)); + DatasetBase *host_queue_dataset; + TF_ASSERT_OK(CreateDataset(host_queue_dataset_kernel.get(), + host_queue_dataset_context.get(), + &host_queue_dataset)); + core::ScopedUnref scoped_unref(host_queue_dataset); + + EXPECT_EQ(host_queue_dataset->node_name(), kNodeName); + + host_queue_dataset->output_dtypes(); + host_queue_dataset->output_shapes(); + host_queue_dataset->DebugString(); + + SerializationContext context(SerializationContext::Params{}); + GraphDefBuilder b; + DatasetBase::DatasetGraphDefBuilder db(&b); + Node *output; + host_queue_dataset->AsGraphDefInternal(&context, &db, &output); + + std::unique_ptr iterator_context; + TF_ASSERT_OK(CreateIteratorContext(host_queue_dataset_context.get(), + &iterator_context)); + std::unique_ptr iterator; + TF_ASSERT_OK(host_queue_dataset->MakeIterator(iterator_context.get(), + "Iterator", &iterator)); + + bool end_of_sequence = false; + std::vector out_tensors; + sleep(2); + TF_EXPECT_OK(iterator->GetNext(iterator_context.get(), &out_tensors, + &end_of_sequence)); +} + +} // namespace +} // namespace data +} // namespace tensorflow diff --git a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc index 2645fee89..6bc6eddec 100644 --- a/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/st/kernels/testcase/geop_npu_test.cc @@ -142,6 +142,12 @@ TEST_F(GeOpTest, GeOpDynamicInputTest) { EXPECT_TRUE(attrs.find("_dynamic_input") != attrs.end()); EXPECT_TRUE(!attrs["_dynamic_input"].s().empty()); } +TEST_F(GeOpTest, GeOpDynamicInputGetNextTest) { + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_input_lazy_recompile.pbtxt"; + gtl::InlinedVector inputs; + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp11_0").ok()); +} TEST_F(GeOpTest, GeOpDynamicInput1Test) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_execute.pbtxt"; @@ -294,6 +300,11 @@ TEST_F(GeOpTest, GeOpNpuOnnxGraphOpNoModelTest) { gtl::InlinedVector inputs{TensorValue(&in)}; EXPECT_TRUE(GeOpRunGraphAsync(grph_pbtxt_path, inputs, node_def, "GeOp91_0").ok()); } - +TEST_F(GeOpTest, GeOpDpOpTest) { + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dpop.pbtxt"; + gtl::InlinedVector inputs; + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0_dp").ok()); +} } } //end tensorflow \ No newline at end of file diff --git a/tf_adapter/tests/st/kernels/testcase/infeed_outfeed_test.cc b/tf_adapter/tests/st/kernels/testcase/infeed_outfeed_test.cc new file mode 100644 index 000000000..21bc49f19 --- /dev/null +++ b/tf_adapter/tests/st/kernels/testcase/infeed_outfeed_test.cc @@ -0,0 +1,76 @@ +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/public/version.h" +#include +#include "gtest/gtest.h" + + +namespace tensorflow { +namespace { + +#define TF_ASSERT_OK(statement) \ + ASSERT_EQ(::tensorflow::Status::OK(), (statement)) + +#define TF_EXPECT_OK(statement) \ + EXPECT_EQ(::tensorflow::Status::OK(), (statement)) + +class DummyDevice : public DeviceBase { + public: + DummyDevice(Env* env, bool save) : DeviceBase(env), save_(save) {} + bool RequiresRecordingAccessedTensors() const override { return save_; } + Allocator* GetAllocator(AllocatorAttributes /*attr*/) override { return cpu_allocator(); } + private: + bool save_; +}; +} +class InfeedOutfeedTest : public testing::Test { + protected: + virtual void SetUp() {} + virtual void TearDown() {} +}; + +TEST_F(InfeedOutfeedTest, LogSummaryTest) { + DataType data_type = DT_INT32; + std::initializer_list dims = {}; + TensorShapeProto shape_proto; + TensorShape(dims).AsProto(&shape_proto); + + std::string channel_name = "_npu_log"; + + NodeDef outfeed_node; + tensorflow::AttrValue output_shapes; + tensorflow::AttrValue output_types; + *(output_shapes.mutable_list()->add_shape()) = shape_proto; + *(output_shapes.mutable_list()->add_shape()) = shape_proto; + output_types.mutable_list()->add_type(DT_STRING); + output_types.mutable_list()->add_type(DT_INT32); + TF_ASSERT_OK(NodeDefBuilder("out_feed", "OutfeedDequeueOp") + .Attr("channel_name", channel_name) + .Attr("output_types", output_types) + .Attr("output_shapes", output_shapes) + .Finalize(&outfeed_node)); + + DeviceType device_type = DEVICE_CPU; + Env* env = Env::Default(); + auto device = absl::make_unique(env, false); + + Status status; + std::unique_ptr op(CreateOpKernel(device_type, device.get(), + cpu_allocator(), outfeed_node, + TF_GRAPH_DEF_VERSION, &status)); + TF_ASSERT_OK(status); + + OpKernelContext::Params params; + params.device = device.get(); + params.op_kernel = op.get(); + std::unique_ptr cancellation_manager = absl::make_unique(); + params.cancellation_manager = cancellation_manager.get(); + + OpKernelContext ctx(¶ms); + op->Compute(&ctx); + TF_EXPECT_OK(ctx.status()); + +} +} //end tensorflow \ No newline at end of file diff --git a/tf_adapter/tests/st/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc b/tf_adapter/tests/st/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc index 5a69c0b3e..b77674cfb 100644 --- a/tf_adapter/tests/st/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc +++ b/tf_adapter/tests/st/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc @@ -6,6 +6,7 @@ #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" +#include namespace tensorflow { namespace { @@ -95,5 +96,16 @@ TEST_F(DpOptimizationPassTest, DatasetNotInDeviceTest) { "HostQueueDataset->DPGroupDataset;GEOPDataset->HostQueueDataset;DPGroupDataset->MakeIterator"; EXPECT_EQ(DoRunDpOptimizationPassTest(), target_graph); } +TEST_F(DpOptimizationPassTest, NewDatasetNotInDeviceTest) { + string org_graph_def_path = "tf_adapter/tests/ut/optimizers/pbtxt/dp_test_no_dataset_in_device.pbtxt"; + setenv("IS_NEW", "1", true); + InitGraph(org_graph_def_path); + std::string target_graph = "Const->TensorSliceDataset;TensorSliceDataset->BatchDatasetV2;Const->BatchDatasetV2:1;"\ + "Const->BatchDatasetV2:2;BatchDatasetV2->RepeatDataset;Const->RepeatDataset:1;RepeatDataset->OptimizeDataset;"\ + "Const->OptimizeDataset:1;OptimizeDataset->ModelDataset;IteratorV2->MakeIterator:1;ModelDataset->HostQueueDataset:1;"\ + "HostQueueDataset->DPGroupDataset;GEOPDataset->HostQueueDataset;DPGroupDataset->MakeIterator"; + EXPECT_EQ(DoRunDpOptimizationPassTest(), target_graph); + unsetenv("IS_NEW"); +} } // end namespace } // end tensorflow diff --git a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc index 0d0b7c0a0..0ad6bfb3c 100644 --- a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc @@ -82,7 +82,6 @@ TEST_F(GePluginTest, MallocSharedMemOKTest) { EXPECT_EQ(ret, 0); } TEST_F(GePluginTest, NpuCloseTest) { - NpuAttrs::SetUseTdtStatus(0, true); NpuClose(); } TEST_F(GePluginTest, RdmaInitAndRegisterFail1Test) { diff --git a/tf_adapter/tests/ut/CMakeLists.txt b/tf_adapter/tests/ut/CMakeLists.txt index 0bc4fa6ed..f8e65d2c5 100644 --- a/tf_adapter/tests/ut/CMakeLists.txt +++ b/tf_adapter/tests/ut/CMakeLists.txt @@ -23,9 +23,11 @@ file(GLOB_RECURSE UT_SOURCES ${TFADAPTER_DIR}/tf_adapter/optimizers/*.cc ${TFADAPTER_DIR}/tf_adapter/util/*.cc ${TFADAPTER_DIR}/tf_adapter/kernels/*.cc + "optimizers/testcase/*.cc" "util/testcase/*.cc" "kernels/testcase/*.cc" + "kernels/testcase/dataset/*.cc" "common/testcase/*.cc" ) @@ -61,7 +63,6 @@ add_dependencies(tfadapter_utest aoe_tuning) target_link_libraries(tfadapter_utest PUBLIC $ - gtest gtest_main c_sec mmpa_stub indextransform_stub alog_stub datatransfer_stub ge_runner_stub + gtest gtest_main c_sec mmpa_stub indextransform_stub alog_stub datatransfer_stub ge_runner_stub ascendcl_stub ${PYTHON_LIB_PATH} -lrt -ldl -lgcov ) - diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_dpop.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_dpop.pbtxt new file mode 100644 index 000000000..2b67e2a6d --- /dev/null +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_dpop.pbtxt @@ -0,0 +1,697 @@ +node { + name: "GeOp1_0_dp" + op: "GeOp" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + } + } + } + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_auto_tune_mode" + value { + s: "" + } + } + attr { + key: "_buffer_optimize" + value { + s: "l2_optimize" + } + } + attr { + key: "_compress_weight_conf" + value { + s: "" + } + } + attr { + key: "_debug_dir" + value { + s: "" + } + } + attr { + key: "_distribute_config" + value { + s: "" + } + } + attr { + key: "_do_npu_optimizer" + value { + s: "1" + } + } + attr { + key: "_dump_debug_mode" + value { + s: "all" + } + } + attr { + key: "_dump_mode" + value { + s: "output" + } + } + attr { + key: "_dump_path" + value { + s: "./" + } + } + attr { + key: "_dump_step" + value { + s: "1" + } + } + attr { + key: "_dynamic_dims" + value { + s: "" + } + } + attr { + key: "_dynamic_graph_execute_mode" + value { + s: "dynamic_execute" + } + } + attr { + key: "_dynamic_input" + value { + s: "0" + } + } + attr { + key: "_dynamic_node_type" + value { + s: "" + } + } + attr { + key: "_enable_compress_weight" + value { + s: "0" + } + } + attr { + key: "_enable_data_pre_proc" + value { + s: "0" + } + } + attr { + key: "_enable_dump" + value { + s: "1" + } + } + attr { + key: "_enable_dump_debug" + value { + s: "1" + } + } + attr { + key: "_enable_exception_dump" + value { + s: "" + } + } + attr { + key: "_enable_scope_fusion_passes" + value { + s: "" + } + } + attr { + key: "_enable_small_channel" + value { + s: "0" + } + } + attr { + key: "_fusion_switch_file" + value { + s: "" + } + } + attr { + key: "_graph_run_mode" + value { + s: "1" + } + } + attr { + key: "_hcom_multi_mode" + value { + s: "" + } + } + attr { + key: "_hcom_parallel" + value { + s: "0" + } + } + attr { + key: "_in_out_pair" + value { + s: "" + } + } + attr { + key: "_in_out_pair_flag" + value { + s: "1" + } + } + attr { + key: "_input_shape" + value { + s: "" + } + } + attr { + key: "_is_tailing_optimization" + value { + s: "0" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_local_device_list" + value { + s: "" + } + } + attr { + key: "_local_rank_id" + value { + s: "-1" + } + } + attr { + key: "_lower_functional_ops" + value { + s: "0" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_mstune_mode" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_dir" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_mode" + value { + s: "" + } + } + attr { + key: "_op_debug_level" + value { + s: "0" + } + } + attr { + key: "_op_select_implmode" + value { + s: "" + } + } + attr { + key: "_op_tune_mode" + value { + s: "" + } + } + attr { + key: "_optypelist_for_implmode" + value { + s: "" + } + } + attr { + key: "_precision_mode" + value { + s: "" + } + } + attr { + key: "_profiling_mode" + value { + s: "0" + } + } + attr { + key: "_profiling_options" + value { + s: "" + } + } + attr { + key: "_session_device_id" + value { + s: "" + } + } + attr { + key: "_stream_max_parallel_num" + value { + s: "" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "_variable_format_optimize" + value { + s: "1" + } + } + attr { + key: "_work_path" + value { + s: "" + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } + attr { + key: "function" + value { + func { + name: "geop_function_D9x45pM0kZ0" + } + } + } +} +library { + function { + signature { + name: "geop_function_D9x45pM0kZ0" + } + node_def { + name: "dpop_function_FFvj93e0XnN" + op: "DPOP" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } + attr { + key: "func_def" + value { + s: "\n\271\001\n;\n\037__inference_Dataset_map_func_11\022\n\n\006args_0\030\002\032\014\n\010identity\030\002\0326\n\005add/y\022\005Const*\031\n\005value\022\020B\016\010\002\022\0002\010\000\000\000\000\000\000\360?*\013\n\005dtype\022\0020\002\032-\n\003add\022\005AddV2\032\016add/y:output:0\032\006args_0*\007\n\001T\022\0020\002\"\023\n\010identity\022\007add:z:0\n\356\016\n\033\n\031dpop_function_FFvj93e0XnN\032p\n\nbatch_size\022\005Const\",/job:localhost/replica:0/task:0/device:CPU:0*\022\n\005value\022\tB\007\010\t\022\000R\001\002*\013\n\005dtype\022\0020\t2\014\n\nbatch_size\032x\n\016drop_remainder\022\005Const\",/job:localhost/replica:0/task:0/device:CPU:0*\022\n\005value\022\tB\007\010\n\022\000Z\001\000*\013\n\005dtype\022\0020\n2\020\n\016drop_remainder\032\365\002\n\nIteratorV2\022\nIteratorV2\",/job:localhost/replica:0/task:0/device:CPU:0*\033\n\024_iterations_per_loop\022\003\022\0011*\024\n\r_use_off_line\022\003\022\0011*\030\n\021_mix_compile_mode\022\003\022\0010*\021\n\013shared_name\022\002\022\000*\"\n\routput_shapes\022\021\n\017:\r\022\013\010\377\377\377\377\377\377\377\377\377\001*\037\n\006_class\022\025\n\023\022\021loc:@MakeIterator*\037\n\r_NpuOptimizer\022\016\022\014NpuOptimizer*\017\n\tcontainer\022\002\022\000*\025\n\014output_types\022\005\n\0032\001\002*\023\n\004_job\022\013\022\tlocalhost*\034\n\025_enable_data_pre_proc\022\003\022\00112\014\n\nIteratorV2\032\357\002\n\016BatchDatasetV2\022\016BatchDatasetV2\032^DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1:handle:0\032\023batch_size:output:0\032\027drop_remainder:output:0\",/job:localhost/replica:0/task:0/device:CPU:0*\017\n\007_kernel\022\004\022\002dp*\036\n\016_iterator_name\022\014\022\nIteratorV2*\025\n\014output_types\022\005\n\0032\001\002*\023\n\rparallel_copy\022\002(\000*\"\n\routput_shapes\022\021\n\017:\r\022\013\010\377\377\377\377\377\377\377\377\377\0012\020\n\016BatchDatasetV2\032\240\002\n\nMapDataset\022\nMapDataset\032\027BatchDatasetV2:handle:0\",/job:localhost/replica:0/task:0/device:CPU:0*\"\n\routput_shapes\022\021\n\017:\r\022\013\010\377\377\377\377\377\377\377\377\377\001*(\n\001f\022#R!\n\037__inference_Dataset_map_func_11*\036\n\030use_inter_op_parallelism\022\002(\001*\025\n\014output_types\022\005\n\0032\001\002*\020\n\nTarguments\022\002\n\000*\032\n\024preserve_cardinality\022\002(\0002\014\n\nMapDataset\032\265\001\n\014MakeIterator\022\014MakeIterator\032\023MapDataset:handle:0\032\023IteratorV2:handle:0\",/job:localhost/replica:0/task:0/device:CPU:0*\017\n\007_kernel\022\004\022\002dp*\036\n\016_iterator_name\022\014\022\nIteratorV22\016\n\014MakeIterator\032\235\003\nUDeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1\022\022DeviceQueueDataset\",/job:localhost/replica:0/task:0/device:CPU:0*\025\n\routput_shapes\022\004\n\002:\000*[\n\014channel_name\022K\022IQueue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1*\036\n\016_iterator_name\022\014\022\nIteratorV2*\025\n\014output_types\022\005\n\0032\001\0022W\nUDeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1" + } + } + attr { + key: "function" + value { + func { + name: "dpop_function_FFvj93e0XnN" + } + } + } + } + } + function { + signature { + name: "dpop_function_FFvj93e0XnN" + } + node_def { + name: "batch_size" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 2 + } + } + } + experimental_debug_info { + original_node_names: "batch_size" + } + } + node_def { + name: "drop_remainder" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_BOOL + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_BOOL + tensor_shape { + } + bool_val: false + } + } + } + experimental_debug_info { + original_node_names: "drop_remainder" + } + } + node_def { + name: "IteratorV2" + op: "IteratorV2" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_class" + value { + list { + s: "loc:@MakeIterator" + } + } + } + attr { + key: "_enable_data_pre_proc" + value { + s: "1" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_DOUBLE + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } + experimental_debug_info { + original_node_names: "IteratorV2" + } + } + node_def { + name: "BatchDatasetV2" + op: "BatchDatasetV2" + input: "DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1:handle:0" + input: "batch_size:output:0" + input: "drop_remainder:output:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_iterator_name" + value { + s: "IteratorV2" + } + } + attr { + key: "_kernel" + value { + s: "dp" + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_DOUBLE + } + } + } + attr { + key: "parallel_copy" + value { + b: false + } + } + experimental_debug_info { + original_node_names: "BatchDatasetV2" + } + } + node_def { + name: "MapDataset" + op: "MapDataset" + input: "BatchDatasetV2:handle:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "f" + value { + func { + name: "__inference_Dataset_map_func_11" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_DOUBLE + } + } + } + attr { + key: "preserve_cardinality" + value { + b: false + } + } + attr { + key: "use_inter_op_parallelism" + value { + b: true + } + } + experimental_debug_info { + original_node_names: "MapDataset" + } + } + node_def { + name: "MakeIterator" + op: "MakeIterator" + input: "MapDataset:handle:0" + input: "IteratorV2:handle:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_iterator_name" + value { + s: "IteratorV2" + } + } + attr { + key: "_kernel" + value { + s: "dp" + } + } + experimental_debug_info { + original_node_names: "MakeIterator" + } + } + node_def { + name: "DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1" + op: "DeviceQueueDataset" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_iterator_name" + value { + s: "IteratorV2" + } + } + attr { + key: "channel_name" + value { + s: "Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1" + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_DOUBLE + } + } + } + experimental_debug_info { + original_node_names: "DeviceQueue_Queue_Edge_from_TensorSliceDataset_out0_To_BatchDatasetV2_in0_FcR1b09W8p1" + } + } + } +} +versions { + producer: 134 +} diff --git a/tf_adapter/tests/ut/kernels/testcase/dataset/dataset_test_base.cc b/tf_adapter/tests/ut/kernels/testcase/dataset/dataset_test_base.cc new file mode 100644 index 000000000..ba2f05661 --- /dev/null +++ b/tf_adapter/tests/ut/kernels/testcase/dataset/dataset_test_base.cc @@ -0,0 +1,704 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/data/dataset_test_base.h" + +#include "tensorflow/core/common_runtime/executor.h" +#include "tensorflow/core/framework/cancellation.h" +#include "tensorflow/core/framework/versions.pb.h" +#include "tensorflow/core/lib/io/record_writer.h" +#include "iostream" +using namespace std; +namespace tensorflow { +namespace data { + +string ToString(CompressionType compression_type) { + switch (compression_type) { + case CompressionType::ZLIB: + return "ZLIB"; + case CompressionType::GZIP: + return "GZIP"; + case CompressionType::RAW: + return "RAW"; + case CompressionType::UNCOMPRESSED: + return ""; + } +} + +io::ZlibCompressionOptions GetZlibCompressionOptions( + CompressionType compression_type) { + switch (compression_type) { + case CompressionType::ZLIB: + return io::ZlibCompressionOptions::DEFAULT(); + case CompressionType::GZIP: + return io::ZlibCompressionOptions::GZIP(); + case CompressionType::RAW: + return io::ZlibCompressionOptions::RAW(); + case CompressionType::UNCOMPRESSED: + LOG(WARNING) << "ZlibCompressionOptions does not have an option for " + << ToString(compression_type); + return io::ZlibCompressionOptions::DEFAULT(); + } +} + +Status WriteDataToFile(const string& filename, const char* data) { + return WriteDataToFile(filename, data, CompressionParams()); +} + +Status WriteDataToFile(const string& filename, const char* data, + const CompressionParams& params) { + Env* env = Env::Default(); + std::unique_ptr file_writer; + TF_RETURN_IF_ERROR(env->NewWritableFile(filename, &file_writer)); + if (params.compression_type == CompressionType::UNCOMPRESSED) { + TF_RETURN_IF_ERROR(file_writer->Append(data)); + } else if (params.compression_type == CompressionType::ZLIB || + params.compression_type == CompressionType::GZIP || + params.compression_type == CompressionType::RAW) { + auto zlib_compression_options = + GetZlibCompressionOptions(params.compression_type); + io::ZlibOutputBuffer out(file_writer.get(), params.input_buffer_size, + params.output_buffer_size, + zlib_compression_options); + TF_RETURN_IF_ERROR(out.Init()); + TF_RETURN_IF_ERROR(out.Append(data)); + TF_RETURN_IF_ERROR(out.Flush()); + TF_RETURN_IF_ERROR(out.Close()); + } else { + return tensorflow::errors::InvalidArgument( + "Unsupported compression_type: ", ToString(params.compression_type)); + } + + TF_RETURN_IF_ERROR(file_writer->Flush()); + TF_RETURN_IF_ERROR(file_writer->Close()); + + return Status::OK(); +} + +Status WriteDataToTFRecordFile(const string& filename, + const std::vector& records, + const CompressionParams& params) { + Env* env = Env::Default(); + std::unique_ptr file_writer; + TF_RETURN_IF_ERROR(env->NewWritableFile(filename, &file_writer)); + auto options = io::RecordWriterOptions::CreateRecordWriterOptions( + ToString(params.compression_type)); + options.zlib_options.input_buffer_size = params.input_buffer_size; + io::RecordWriter record_writer(file_writer.get(), options); + for (const auto& record : records) { + TF_RETURN_IF_ERROR(record_writer.WriteRecord(record)); + } + TF_RETURN_IF_ERROR(record_writer.Flush()); + TF_RETURN_IF_ERROR(record_writer.Close()); + TF_RETURN_IF_ERROR(file_writer->Flush()); + TF_RETURN_IF_ERROR(file_writer->Close()); + return Status::OK(); +} + +template +Status IsEqual(const Tensor& t1, const Tensor& t2) { + if (t1.dtype() != t2.dtype()) { + return tensorflow::errors::Internal( + "Two tensors have different dtypes: ", DataTypeString(t1.dtype()), + " vs. ", DataTypeString(t2.dtype())); + } + if (!t1.IsSameSize(t2)) { + return tensorflow::errors::Internal( + "Two tensors have different shapes: ", t1.shape().DebugString(), + " vs. ", t2.shape().DebugString()); + } + + auto flat_t1 = t1.flat(); + auto flat_t2 = t2.flat(); + auto length = flat_t1.size(); + + for (int i = 0; i < length; ++i) { + if (flat_t1(i) != flat_t2(i)) { + return tensorflow::errors::Internal( + "Two tensors have different values " + "at [", + i, "]: ", flat_t1(i), " vs. ", flat_t2(i)); + } + } + return Status::OK(); +} + +Status DatasetOpsTestBase::ExpectEqual(const Tensor& a, const Tensor& b) { + switch (a.dtype()) { +#define CASE(DT) \ + case DataTypeToEnum
::value: \ + TF_RETURN_IF_ERROR(IsEqual
(a, b)); \ + break; + TF_CALL_NUMBER_TYPES(CASE); + TF_CALL_tstring(CASE); + TF_CALL_uint32(CASE); + TF_CALL_uint64(CASE); + // TODO(feihugis): figure out how to support variant tensors. +#undef CASE + default: + return errors::Internal("Unsupported dtype: ", a.dtype()); + } + return Status::OK(); +} + +template +bool compare(const Tensor& t1, const Tensor& t2) { + auto flat_t1 = t1.flat(); + auto flat_t2 = t2.flat(); + auto length = std::min(flat_t1.size(), flat_t2.size()); + for (int i = 0; i < length; ++i) { + if (flat_t1(i) < flat_t2(i)) return true; + if (flat_t1(i) > flat_t2(i)) return false; + } + return flat_t1.size() < length; +} + +Status DatasetOpsTestBase::ExpectEqual(std::vector produced_tensors, + std::vector expected_tensors, + bool compare_order) { + if (produced_tensors.size() != expected_tensors.size()) { + return Status(tensorflow::errors::Internal( + "The two tensor vectors have different size (", produced_tensors.size(), + " v.s. ", expected_tensors.size(), ")")); + } + + if (produced_tensors.empty()) return Status::OK(); + if (produced_tensors[0].dtype() != expected_tensors[0].dtype()) { + return Status(tensorflow::errors::Internal( + "The two tensor vectors have different dtypes (", + produced_tensors[0].dtype(), " v.s. ", expected_tensors[0].dtype(), + ")")); + } + + if (!compare_order) { + const DataType& dtype = produced_tensors[0].dtype(); + switch (dtype) { +#define CASE(DT) \ + case DT: \ + std::sort(produced_tensors.begin(), produced_tensors.end(), \ + compare::Type>); \ + std::sort(expected_tensors.begin(), expected_tensors.end(), \ + compare::Type>); \ + break; + CASE(DT_FLOAT); + CASE(DT_DOUBLE); + CASE(DT_INT32); + CASE(DT_UINT8); + CASE(DT_INT16); + CASE(DT_INT8); + CASE(DT_STRING); + CASE(DT_INT64); + CASE(DT_BOOL); + CASE(DT_QINT8); + CASE(DT_QUINT8); + CASE(DT_QINT32); + CASE(DT_QINT16); + CASE(DT_QUINT16); + CASE(DT_UINT16); + CASE(DT_HALF); + CASE(DT_UINT32); + CASE(DT_UINT64); + // TODO(feihugis): support other dtypes. +#undef CASE + default: + return errors::Internal("Unsupported dtype: ", dtype); + } + } + + for (int i = 0; i < produced_tensors.size(); ++i) { + TF_RETURN_IF_ERROR(DatasetOpsTestBase::ExpectEqual(produced_tensors[i], + expected_tensors[i])); + } + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateTensorSliceDatasetKernel( + StringPiece node_name, const DataTypeVector& dtypes, + const std::vector& shapes, + std::unique_ptr* tensor_slice_dataset_kernel) { + std::vector components; + components.reserve(dtypes.size()); + for (int i = 0; i < dtypes.size(); ++i) { + // Create the placeholder names for the input components of + // `TensorSliceDataset`. + components.emplace_back(strings::StrCat("component_", i)); + } + NodeDef node_def = test::function::NDef( + node_name, "TensorSliceDataset", components, + {{"Toutput_types", dtypes}, {"output_shapes", shapes}}); + TF_RETURN_IF_ERROR(CreateOpKernel(node_def, tensor_slice_dataset_kernel)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateTensorSliceDataset( + StringPiece node_name, std::vector* const components, + DatasetBase** tensor_slice_dataset) { + std::unique_ptr tensor_slice_dataset_kernel; + DataTypeVector dtypes; + dtypes.reserve(components->size()); + std::vector shapes; + shapes.reserve(components->size()); + for (const auto& t : *components) { + dtypes.push_back(t.dtype()); + gtl::InlinedVector partial_dim_sizes; + for (int i = 1; i < t.dims(); ++i) { + partial_dim_sizes.push_back(t.dim_size(i)); + } + shapes.emplace_back(std::move(partial_dim_sizes)); + } + TF_RETURN_IF_ERROR(CreateTensorSliceDatasetKernel( + node_name, dtypes, shapes, &tensor_slice_dataset_kernel)); + gtl::InlinedVector inputs; + for (auto& tensor : *components) { + inputs.emplace_back(&tensor); + } + TF_RETURN_IF_ERROR(CheckOpKernelInput(*tensor_slice_dataset_kernel, inputs)); + std::unique_ptr context; + TF_RETURN_IF_ERROR(CreateOpKernelContext(tensor_slice_dataset_kernel.get(), + &inputs, &context)); + TF_RETURN_IF_ERROR( + RunOpKernel(tensor_slice_dataset_kernel.get(), context.get())); + TF_RETURN_IF_ERROR( + GetDatasetFromContext(context.get(), 0, tensor_slice_dataset)); + return Status::OK(); +} + +// Create a `RangeDataset` dataset as a variant tensor. +Status DatasetOpsTestBase::MakeRangeDataset( + const Tensor& start, const Tensor& stop, const Tensor& step, + const DataTypeVector& output_types, + const std::vector& output_shapes, + Tensor* range_dataset) { + GraphConstructorOptions graph_opts; + graph_opts.allow_internal_ops = true; + graph_opts.expect_device_spec = false; + TF_RETURN_IF_ERROR( + RunFunction(test::function::MakeRangeDataset(), + /*attrs*/ + {{RangeDatasetOp::kOutputTypes, output_types}, + {RangeDatasetOp::kOutputShapes, output_shapes}}, + /*inputs*/ {start, stop, step}, graph_opts, + /*rets*/ {range_dataset})); + return Status::OK(); +} + +// Create a `RangeDataset` dataset as a variant tensor. +Status DatasetOpsTestBase::MakeRangeDataset( + const RangeDatasetParams& range_dataset_params, Tensor* range_dataset) { + GraphConstructorOptions graph_opts; + graph_opts.allow_internal_ops = true; + graph_opts.expect_device_spec = false; + TF_RETURN_IF_ERROR(RunFunction( + test::function::MakeRangeDataset(), + /*attrs*/ + {{RangeDatasetOp::kOutputTypes, range_dataset_params.output_dtypes}, + {RangeDatasetOp::kOutputShapes, range_dataset_params.output_shapes}}, + /*inputs*/ + {range_dataset_params.start, range_dataset_params.stop, + range_dataset_params.step}, + graph_opts, + /*rets*/ {range_dataset})); + return Status::OK(); +} + +// Create a `TakeDataset` dataset as a variant tensor. +Status DatasetOpsTestBase::MakeTakeDataset( + const Tensor& input_dataset, int64 count, + const DataTypeVector& output_types, + const std::vector& output_shapes, + Tensor* take_dataset) { + GraphConstructorOptions graph_opts; + graph_opts.allow_internal_ops = true; + graph_opts.expect_device_spec = false; + + Tensor count_tensor = CreateTensor(TensorShape({}), {count}); + TF_RETURN_IF_ERROR( + RunFunction(test::function::MakeTakeDataset(), + /*attrs*/ + {{TakeDatasetOp::kOutputTypes, output_types}, + {TakeDatasetOp::kOutputShapes, output_shapes}}, + /*inputs*/ {input_dataset, count_tensor}, graph_opts, + /*rets*/ {take_dataset})); + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateOpKernel( + const NodeDef& node_def, std::unique_ptr* op_kernel) { + OpKernel* kernel; + TF_RETURN_IF_ERROR(tensorflow::CreateOpKernel(device_type_, device_.get(), + allocator_, flr_, node_def, + TF_GRAPH_DEF_VERSION, &kernel)); + op_kernel->reset(kernel); + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateDatasetContext( + OpKernel* const dateset_kernel, + gtl::InlinedVector* const inputs, + std::unique_ptr* dataset_context) { + TF_RETURN_IF_ERROR(CheckOpKernelInput(*dateset_kernel, *inputs)); + TF_RETURN_IF_ERROR( + CreateOpKernelContext(dateset_kernel, inputs, dataset_context)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateDataset(OpKernel* kernel, + OpKernelContext* context, + DatasetBase** const dataset) { + TF_RETURN_IF_ERROR(RunOpKernel(kernel, context)); + // Assume that DatasetOp has only one output. + DCHECK_EQ(context->num_outputs(), 1); + TF_RETURN_IF_ERROR(GetDatasetFromContext(context, 0, dataset)); + return Status::OK(); +} + +Status DatasetOpsTestBase::RestoreIterator( + IteratorContext* ctx, IteratorStateReader* reader, + const string& output_prefix, const DatasetBase& dataset, + std::unique_ptr* iterator) { + TF_RETURN_IF_ERROR(dataset.MakeIterator(ctx, output_prefix, iterator)); + TF_RETURN_IF_ERROR((*iterator)->Restore(ctx, reader)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateIteratorContext( + OpKernelContext* const op_context, + std::unique_ptr* iterator_context) { + IteratorContext::Params params(op_context); + params.resource_mgr = op_context->resource_manager(); + function_handle_cache_ = absl::make_unique(flr_); + params.function_handle_cache = function_handle_cache_.get(); + params.cancellation_manager = cancellation_manager_.get(); + *iterator_context = absl::make_unique(params); + return Status::OK(); +} + +Status DatasetOpsTestBase::GetDatasetFromContext(OpKernelContext* context, + int output_index, + DatasetBase** const dataset) { + Tensor* output = context->mutable_output(output_index); + Status status = GetDatasetFromVariantTensor(*output, dataset); + (*dataset)->Ref(); + return status; +} + +Status DatasetOpsTestBase::InitThreadPool(int thread_num) { + if (thread_num < 1) { + return errors::InvalidArgument( + "The `thread_num` argument should be positive but got: ", thread_num); + } + thread_pool_ = absl::make_unique( + Env::Default(), ThreadOptions(), "test_thread_pool", thread_num); + return Status::OK(); +} + +Status DatasetOpsTestBase::InitFunctionLibraryRuntime( + const std::vector& flib, int cpu_num) { + if (cpu_num < 1) { + return errors::InvalidArgument( + "The `cpu_num` argument should be positive but got: ", cpu_num); + } + SessionOptions options; + auto* device_count = options.config.mutable_device_count(); + device_count->insert({"CPU", cpu_num}); + std::vector> devices; + TF_RETURN_IF_ERROR(DeviceFactory::AddDevices( + options, "/job:localhost/replica:0/task:0", &devices)); + device_mgr_ = absl::make_unique(std::move(devices)); + resource_mgr_ = absl::make_unique("default_container"); + + FunctionDefLibrary proto; + for (const auto& fdef : flib) *(proto.add_function()) = fdef; + lib_def_ = + absl::make_unique(OpRegistry::Global(), proto); + + OptimizerOptions opts; + pflr_ = absl::make_unique( + device_mgr_.get(), Env::Default(), TF_GRAPH_DEF_VERSION, lib_def_.get(), + opts, thread_pool_.get(), nullptr /* cluster_flr */); + flr_ = pflr_->GetFLR("/job:localhost/replica:0/task:0/cpu:0"); + if (thread_pool_ == nullptr) { + runner_ = [](std::function fn) { fn(); }; + } else { + runner_ = [this](std::function fn) { + thread_pool_->Schedule(std::move(fn)); + }; + } + return Status::OK(); +} + +Status DatasetOpsTestBase::RunOpKernel(OpKernel* op_kernel, + OpKernelContext* context) { + device_->Compute(op_kernel, context); + return context->status(); +} + +Status DatasetOpsTestBase::RunFunction( + const FunctionDef& fdef, test::function::Attrs attrs, + const std::vector& args, + const GraphConstructorOptions& graph_options, std::vector rets) { + std::unique_ptr exec; + InstantiationResult result; + auto GetOpSig = [](const string& op, const OpDef** sig) { + return OpRegistry::Global()->LookUpOpDef(op, sig); + }; + TF_RETURN_IF_ERROR(InstantiateFunction(fdef, attrs, GetOpSig, &result)); + + DataTypeVector arg_types = result.arg_types; + DataTypeVector ret_types = result.ret_types; + + std::unique_ptr g(new Graph(OpRegistry::Global())); + TF_RETURN_IF_ERROR( + ConvertNodeDefsToGraph(graph_options, result.nodes, g.get())); + + const int version = g->versions().producer(); + LocalExecutorParams params; + params.function_library = flr_; + params.device = device_.get(); + params.create_kernel = [this, version](const NodeDef& ndef, + OpKernel** kernel) { + return CreateNonCachedKernel(device_.get(), this->flr_, ndef, version, + kernel); + }; + params.delete_kernel = [](OpKernel* kernel) { + DeleteNonCachedKernel(kernel); + }; + params.rendezvous_factory = [](const int64, const DeviceMgr* device_mgr, + Rendezvous** r) { + *r = new IntraProcessRendezvous(device_mgr); + return Status::OK(); + }; + + Executor* cur_exec; + TF_RETURN_IF_ERROR(NewLocalExecutor(params, std::move(g), &cur_exec)); + exec.reset(cur_exec); + FunctionCallFrame frame(arg_types, ret_types); + TF_RETURN_IF_ERROR(frame.SetArgs(args)); + Executor::Args exec_args; + exec_args.call_frame = &frame; + exec_args.runner = runner_; + TF_RETURN_IF_ERROR(exec->Run(exec_args)); + std::vector computed; + TF_RETURN_IF_ERROR(frame.GetRetvals(&computed)); + if (computed.size() != rets.size()) { + return errors::InvalidArgument( + "The result does not match the expected number of return outpus", + ". Expected: ", rets.size(), ". Actual: ", computed.size()); + } + for (int i = 0; i < rets.size(); ++i) { + *(rets[i]) = computed[i]; + } + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateOpKernelContext( + OpKernel* kernel, gtl::InlinedVector* inputs, + std::unique_ptr* context) { + params_ = absl::make_unique(); + cancellation_manager_ = absl::make_unique(); + params_->cancellation_manager = cancellation_manager_.get(); + params_->device = device_.get(); + params_->frame_iter = FrameAndIter(0, 0); + params_->function_library = flr_; + params_->inputs = inputs; + params_->op_kernel = kernel; + params_->resource_manager = resource_mgr_.get(); + params_->runner = &runner_; + checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper; + slice_reader_cache_ = + absl::make_unique(); + params_->slice_reader_cache = slice_reader_cache_.get(); + step_container_ = + absl::make_unique(0, [](const string&) {}); + params_->step_container = step_container_.get(); + + // Set the allocator attributes for the outputs. + allocator_attrs_.clear(); + for (int index = 0; index < params_->op_kernel->num_outputs(); index++) { + AllocatorAttributes attr; + const bool on_host = + (params_->op_kernel->output_memory_types()[index] == HOST_MEMORY); + attr.set_on_host(on_host); + allocator_attrs_.emplace_back(attr); + } + params_->output_attr_array = gtl::vector_as_array(&allocator_attrs_); + + *context = absl::make_unique(params_.get()); + return Status::OK(); +} + +Status DatasetOpsTestBase::CreateSerializationContext( + std::unique_ptr* context) { + *context = + absl::make_unique(SerializationContext::Params{}); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckOpKernelInput( + const OpKernel& kernel, const gtl::InlinedVector& inputs) { + if (kernel.input_types().size() != inputs.size()) { + cout<<"++"<* inputs, DataTypeVector input_types, + DataType dtype, const TensorShape& shape) { + if (input_types.size() < inputs->size()) { + return errors::InvalidArgument("Adding more inputs than types: ", + inputs->size(), " vs. ", input_types.size()); + } + bool is_ref = IsRefType(input_types[inputs->size()]); + std::unique_ptr input = + absl::make_unique(allocator_, dtype, shape); + + if (is_ref) { + DataType expected_dtype = RemoveRefType(input_types[inputs->size()]); + if (expected_dtype != dtype) { + return errors::InvalidArgument("The input data type is ", dtype, + " , but expected: ", expected_dtype); + } + inputs->push_back({&lock_for_refs_, input.get()}); + } else { + if (input_types[inputs->size()] != dtype) { + return errors::InvalidArgument( + "The input data type is ", dtype, + " , but expected: ", input_types[inputs->size()]); + } + inputs->push_back({nullptr, input.get()}); + } + + // TODO(jsimsa): Figure out how to avoid using a member variable to garbage + // collect the inputs. + tensors_.push_back(std::move(input)); + + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckIteratorGetNext( + const std::vector& expected_outputs, bool compare_order) { + bool end_of_sequence = false; + std::vector out_tensors; + while (!end_of_sequence) { + std::vector next; + TF_RETURN_IF_ERROR( + iterator_->GetNext(iterator_ctx_.get(), &next, &end_of_sequence)); + out_tensors.insert(out_tensors.end(), next.begin(), next.end()); + } + + TF_EXPECT_OK(ExpectEqual(out_tensors, expected_outputs, + /*compare_order=*/compare_order)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckDatasetNodeName( + const string& expected_dataset_node_name) { + EXPECT_EQ(dataset_->node_name(), expected_dataset_node_name); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckDatasetTypeString( + const string& expected_type_str) { + EXPECT_EQ(dataset_->type_string(), expected_type_str); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckDatasetOutputDtypes( + const DataTypeVector& expected_output_dtypes) { + TF_EXPECT_OK( + VerifyTypesMatch(dataset_->output_dtypes(), expected_output_dtypes)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckDatasetOutputShapes( + const std::vector& expected_output_shapes) { + TF_EXPECT_OK(VerifyShapesCompatible(dataset_->output_shapes(), + expected_output_shapes)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckDatasetCardinality(int expected_cardinality) { + EXPECT_EQ(dataset_->Cardinality(), expected_cardinality); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckIteratorOutputDtypes( + const DataTypeVector& expected_output_dtypes) { + TF_EXPECT_OK( + VerifyTypesMatch(iterator_->output_dtypes(), expected_output_dtypes)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckIteratorOutputShapes( + const std::vector& expected_output_shapes) { + TF_EXPECT_OK(VerifyShapesCompatible(iterator_->output_shapes(), + expected_output_shapes)); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckIteratorPrefix( + const string& expected_iterator_prefix) { + EXPECT_EQ(iterator_->prefix(), expected_iterator_prefix); + return Status::OK(); +} + +Status DatasetOpsTestBase::CheckIteratorSaveAndRestore( + const string& iterator_prefix, const std::vector& expected_outputs, + const std::vector& breakpoints) { + std::unique_ptr iterator; + TF_RETURN_IF_ERROR( + dataset_->MakeIterator(iterator_ctx_.get(), iterator_prefix, &iterator)); + std::unique_ptr serialization_ctx; + TF_RETURN_IF_ERROR(CreateSerializationContext(&serialization_ctx)); + bool end_of_sequence = false; + std::vector out_tensors; + int cur_iteration = 0; + auto expected_outputs_it = expected_outputs.begin(); + for (int breakpoint : breakpoints) { + VariantTensorData data; + VariantTensorDataWriter writer(&data); + TF_EXPECT_OK(iterator->Save(serialization_ctx.get(), &writer)); + TF_RETURN_IF_ERROR(writer.Flush()); + VariantTensorDataReader reader(&data); + TF_EXPECT_OK(RestoreIterator(iterator_ctx_.get(), &reader, iterator_prefix, + *dataset_, &iterator)); + + while (cur_iteration <= breakpoint) { + TF_RETURN_IF_ERROR(iterator->GetNext(iterator_ctx_.get(), &out_tensors, + &end_of_sequence)); + if (!end_of_sequence) { + EXPECT_NE(expected_outputs_it, expected_outputs.end()); + TF_EXPECT_OK(ExpectEqual(out_tensors.back(), *expected_outputs_it)); + expected_outputs_it++; + } + cur_iteration++; + } + + if (breakpoint >= expected_outputs.size()) { + EXPECT_TRUE(end_of_sequence); + EXPECT_EQ(expected_outputs_it, expected_outputs.end()); + } else { + EXPECT_FALSE(end_of_sequence); + } + } + return Status::OK(); +} + +} // namespace data +} // namespace tensorflow diff --git a/tf_adapter/tests/ut/kernels/testcase/dataset/function_testlib.cc b/tf_adapter/tests/ut/kernels/testcase/dataset/function_testlib.cc new file mode 100644 index 000000000..ad82bb6f6 --- /dev/null +++ b/tf_adapter/tests/ut/kernels/testcase/dataset/function_testlib.cc @@ -0,0 +1,649 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/function_testlib.h" + +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/versions.pb.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { +namespace test { +namespace function { + +typedef FunctionDefHelper FDH; + +GraphDef GDef(gtl::ArraySlice nodes, + gtl::ArraySlice funcs) { + GraphDef g; + VersionDef* versions = g.mutable_versions(); + versions->set_producer(TF_GRAPH_DEF_VERSION); + versions->set_min_consumer(TF_GRAPH_DEF_VERSION_MIN_CONSUMER); + for (const auto& n : nodes) { + *(g.add_node()) = n; + } + auto lib = g.mutable_library(); + for (const auto& f : funcs) { + *(lib->add_function()) = f; + } + return g; +} + +// Helper to construct a NodeDef. +NodeDef NDef(StringPiece name, StringPiece op, gtl::ArraySlice inputs, + gtl::ArraySlice> attrs, + const string& device) { + NodeDef n; + n.set_name(string(name)); + n.set_op(string(op)); + for (const auto& in : inputs) n.add_input(in); + n.set_device(device); + for (auto na : attrs) n.mutable_attr()->insert({na.first, na.second.proto}); + return n; +} + +FunctionDef NonZero() { + return FDH::Define( + // Name + "NonZero", + // Args + {"x:T"}, + // Return values + {"y:T"}, + // Attr def + {"T:{float, double, int32, int64, string}"}, + // Nodes + { + {{"y"}, "Identity", {"x"}, {{"T", "$T"}}}, + }); +} + +FunctionDef IsZero() { + const Tensor kZero = test::AsScalar(0); + return FDH::Define( + // Name + "IsZero", + // Args + {"x: T"}, + // Return values + {"equal: bool"}, + // Attr def + {"T:{float, double, int32, int64, string}"}, + { + {{"zero"}, "Const", {}, {{"value", kZero}, {"dtype", DT_INT64}}}, + {{"cast"}, "Cast", {"zero"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"equal"}, "Equal", {"x", "cast"}, {{"T", "$T"}}}, + }); +} + +FunctionDef RandomUniform() { + const Tensor kZero = test::AsScalar(0); + + return FDH::Define( + // Name + "RandomUniform", + // Args + {"x: T"}, + // Return values + {"random_uniform: int64"}, + // Attr def + {"T:{float, double, int32, int64, string}"}, + {{{"random_uniform/shape"}, + "Const", + {}, + {{"value", kZero}, {"dtype", DT_INT64}}}, + {{"random_uniform"}, + "RandomUniform", + {"random_uniform/shape"}, + {{"T", DT_INT32}, + {"Tout", DT_FLOAT}, + {"seed", 87654321}, + {"seed2", 42}}}}); +} + +FunctionDef XTimesTwo() { + const Tensor kTwo = test::AsScalar(2); + return FDH::Define( + // Name + "XTimesTwo", + // Args + {"x: T"}, + // Return values + {"y: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, + {{"scale"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"y"}, "Mul", {"x", "scale"}, {{"T", "$T"}}}, + }); +} + +FunctionDef TwoDeviceMult() { + const Tensor kTwo = test::AsScalar(2); + const Tensor kThree = test::AsScalar(3); + return FDH::Create( + // Name + "TwoDeviceMult", + // Args + {"x: T"}, + // Return values + {"y_cpu: T", "y_gpu: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, + {{"num_3"}, "Const", {}, {{"value", kThree}, {"dtype", DT_INT64}}}, + {{"factor_2"}, + "Cast", + {"num_2:output:0"}, + {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"factor_3"}, + "Cast", + {"num_3:output:0"}, + {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"y_cpu"}, + "Mul", + {"x", "factor_2:y:0"}, + {{"T", "$T"}}, + {}, + "/device:CPU:0"}, + {{"y_gpu"}, + "Mul", + {"x", "factor_3:y:0"}, + {{"T", "$T"}}, + {}, + "/device:GPU:0"}, + }, + {{"y_cpu", "y_cpu:z:0"}, {"y_gpu", "y_gpu:z:0"}}); +} + +FunctionDef TwoDeviceInputOutput() { + const Tensor kTwo = test::AsScalar(2); + const Tensor kThree = test::AsScalar(3); + return FDH::Create( + // Name + "TwoDeviceInputOutput", + // Args + {"x1: T", "x2: T"}, + // Return values + {"y_cpu: T", "y_gpu: T"}, + // Attr def + {"T: {float}"}, + // Nodes + { + {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}}, + {{"num_3"}, "Const", {}, {{"value", kThree}, {"dtype", DT_FLOAT}}}, + {{"y_cpu"}, + "Mul", + {"x1", "num_2:output:0"}, + {{"T", "$T"}}, + {}, + "/device:CPU:0"}, + {{"y_gpu"}, + "Mul", + {"x2", "num_3:output:0"}, + {{"T", "$T"}}, + {}, + "/device:GPU:0"}, + }, + {{"y_cpu", "y_cpu:z:0"}, {"y_gpu", "y_gpu:z:0"}}); +} + +FunctionDef FuncWithListInput() { + const Tensor kTwo = test::AsScalar(2); + return FDH::Create( + // Name + "FuncWithListInput", + // Args + {"x1: N * T"}, + // Return values + {}, + // Attr def + {"T: {float}", "N: int >= 1"}, + // Nodes + { + {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}}, + }, + {}); +} + +FunctionDef FuncWithListOutput() { + const Tensor kTwo = test::AsScalar(2); + return FDH::Create( + // Name + "FuncWithListOutput", + // Args + {}, + // Return values + {"y: N * T"}, + // Attr def + {"T: {float}", "N: int >= 1"}, + // Nodes + { + {{"num_2"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}}, + }, + {{"y", "num_2:output:0"}}); +} + +FunctionDef XAddX() { + return FDH::Define( + // Name + "XAddX", + // Args + {"x: T"}, + // Return values + {"y: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"y"}, "Add", {"x", "x"}, {{"T", "$T"}}}, + }); +} + +FunctionDef XAddY() { + return FDH::Define( + // Name + "XAddY", + // Args + {"x: T", "y: T"}, + // Return values + {"z: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"z"}, "Add", {"x", "y"}, {{"T", "$T"}}}, + }); +} + +FunctionDef XTimesTwoInt32() { + const Tensor kTwo = test::AsScalar(2); + return FDH::Define( + // Name + "XTimesTwoInt32", + // Args + {"x: int32"}, + // Return values + {"y: int32"}, {}, + // Nodes + { + {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, + {{"scale"}, + "Cast", + {"two"}, + {{"SrcT", DT_INT64}, {"DstT", DT_INT32}}}, + {{"y"}, "Mul", {"x", "scale"}, {{"T", DT_INT32}}}, + }); +} + +FunctionDef XTimesFour() { + return FDH::Create( + // Name + "XTimesFour", + // Args + {"x: T"}, + // Return values + {"y: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"x2"}, "XTimesTwo", {"x"}, {{"T", "$T"}}}, + {{"y"}, "XTimesTwo", {"x2:y:0"}, {{"T", "$T"}}}, + }, + {{"y", "y:y:0"}}); +} + +FunctionDef XTimes16() { + return FDH::Create( + // Name + "XTimes16", + // Args + {"x: T"}, + // Return values + {"y: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"x4"}, "XTimesFour", {"x"}, {{"T", "$T"}}}, + {{"y"}, "XTimesFour", {"x4:y:0"}, {{"T", "$T"}}}, + }, + {{"y", "y:y:0"}}); +} + +FunctionDef WXPlusB() { + return FDH::Define( + // Name + "WXPlusB", + // Args + {"w: T", "x: T", "b: T"}, + // Return values + {"y: T"}, + // Attr def + {"T: {float, double}"}, + // Nodes + {{{"mm"}, + "MatMul", + {"w", "x"}, + {{"T", "$T"}, + {"transpose_a", false}, + {"transpose_b", false}, + {"_kernel", "eigen"}}}, + {{"y"}, "Add", {"mm", "b"}, {{"T", "$T"}}}}); +} + +FunctionDef Swap() { + return FDH::Define( + // Name + "Swap", + // Args + {"i0: T", "i1: T"}, + // Return values + {"o0: T", "o1: T"}, + // Attr def + {"T: {float, double, resource}"}, + // Nodes + {{{"o0"}, "Identity", {"i1"}, {{"T", "$T"}}}, + {{"o1"}, "Identity", {"i0"}, {{"T", "$T"}}}}); +} + +FunctionDef EmptyBodySwap() { + return FDH::Create( + // Name + "EmptyBodySwap", + // Args + {"i0: T", "i1: T"}, + // Return values + {"o0: T", "o1: T"}, + // Attr def + {"T: {float, double, resource}"}, + // Nodes + {}, + // Output mapping + {{"o0", "i1"}, {"o1", "i0"}}); +} + +FunctionDef ResourceOutput() { + const Tensor kTwo = test::AsScalar(2); + return FDH::Create( + // Name + "ResourceOutput", + // Args + {"x: float", "y: resource"}, + // Return values + {"y_out: resource", "two_x: float"}, + // Attr def + {}, + // Nodes + { + {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}}, + {{"mul"}, "Mul", {"x", "two:output:0"}, {{"T", DT_FLOAT}}, {}}, + }, + {{"y_out", "y"}, {"two_x", "mul:z:0"}}); +} + +FunctionDef ResourceIdentity() { + return FDH::Create( + // Name + "ResourceIdentity", + // Args + {"x: resource"}, + // Return values + {"y: resource"}, + // Attr def + {}, + // Nodes + {}, + // Output mapping + {{"y", "x"}}); +} + +FunctionDef ReadResourceVariable() { + return FDH::Create( + // Name + "ReadResourceVariable", + // Args + {"x: resource"}, + // Return values + {"y: float"}, + // Attr def + {}, + // Nodes + { + {{"read"}, "ReadVariableOp", {"x"}, {{"dtype", DT_FLOAT}}, {}}, + }, + {{"y", "read:value:0"}}); +} + +FunctionDef InvalidControlFlow() { + return FDH::Create( + // Name + "InvalidControlFlow", + // Args + {"i: int32"}, + // Return values + {"o: int32"}, + // Attr def + {}, + // Nodes + {{{"enter"}, "Enter", {"i"}, {{"T", DT_INT32}, {"frame_name", "while"}}}, + {{"add"}, "Add", {"enter:output", "i"}, {{"T", DT_INT32}}}}, + // Output mapping + {{"o", "add:z"}}); +} + +FunctionDef LessThanOrEqualToN(int64 N) { + const Tensor kN = test::AsScalar(N); + return FDH::Define( + // Name + "LessThanOrEqualToN", + // Args + {"x: T"}, + // Return values + {"z: bool"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"N"}, "Const", {}, {{"value", kN}, {"dtype", DT_INT64}}}, + {{"y"}, "Cast", {"N"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"z"}, "LessEqual", {"x", "y"}, {{"T", "$T"}}}, + }); +} + +FunctionDef XPlusOneXTimesY() { + const Tensor kOne = test::AsScalar(1); + return FDH::Define( + // Name + "XPlusOneXTimesY", + // Args + {"x: T", "y: T"}, + // Return values + {"s: T", "t: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + {{{"one"}, "Const", {}, {{"value", kOne}, {"dtype", DT_INT64}}}, + {{"increment"}, "Cast", {"one"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"s"}, "Add", {"x", "increment"}, {{"T", "$T"}}}, + {{"t"}, "Mul", {"x", "y"}, {{"T", "$T"}}}}); +} + +FunctionDef XYXLessThanOrEqualToN(int64 N) { + const Tensor kN = test::AsScalar(N); + return FDH::Define( + // Name + "XYXLessThanOrEqualToN", + // Args + {"x: T", "y: T"}, + // Return values + {"z: bool"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"N"}, "Const", {}, {{"value", kN}, {"dtype", DT_INT64}}}, + {{"N1"}, "Cast", {"N"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"z"}, "LessEqual", {"x", "N1"}, {{"T", "$T"}}}, + }); +} + +FunctionDef RandomUniformLess() { + const Tensor kZero = test::AsScalar(0); + const Tensor kOne = test::AsScalar(1); + const Tensor k005 = test::AsScalar(0.05); + + return FDH::Define( + // Name + "RandomUniformLess", + // Args + {"arg0: int64"}, + // Return values + {"strided_slice: bool"}, + // Attr def + {"T:{float, double, int32, int64, string}"}, + {{{"random_uniform/shape"}, + "Const", + {}, + {{"value", kZero}, {"dtype", DT_INT32}}}, + + {{"random_uniform/RandomUniform"}, + "RandomUniform", + {"random_uniform/shape"}, + {{"T", DT_INT32}, {"Tout", DT_FLOAT}, {"seed", 0}, {"seed2", 0}}}, + + {{"Less/y"}, "Const", {}, {{"value", k005}, {"dtype", DT_FLOAT}}}, + + {{"Less"}, + "Less", + {"random_uniform/RandomUniform", "Less/y"}, + {{"T", DT_FLOAT}}}, + + {{"strided_slice/stack"}, + "Const", + {}, + {{"value", kZero}, {"dtype", DT_INT32}}}, + + {{"strided_slice/stack_1"}, + "Const", + {}, + {{"value", kOne}, {"dtype", DT_INT32}}}, + + {{"strided_slice/stack_2"}, + "Const", + {}, + {{"value", kOne}, {"dtype", DT_INT32}}}, + + {{"strided_slice"}, + "StridedSlice", + {"Less", "strided_slice/stack", "strided_slice/stack_1", + "strided_slice/stack_2"}, + {{"Index", DT_INT32}, + {"T", DT_BOOL}, + {"begin_mask", 0}, + {"ellipsis_mask", 0}, + {"end_mask", 0}, + {"new_axis_mask", 0}, + {"shrink_axis_mask", 0}}}}); +} + +FunctionDef MakeRangeDataset() { + return FDH::Define( + // Name + "MakeRangeDataset", + // Args + {"start: int64", "stop: int64", "step: int64"}, + // Return values + {"y:variant"}, + // Attr def + {"output_types: list(type) >= 1", "output_shapes: list(shape) >= 1"}, + // Nodes + {{{"y"}, + "RangeDataset", + {"start", "stop", "step"}, + {{"output_types", "$output_types"}, + {"output_shapes", "$output_shapes"}}}}); +} + +FunctionDef MakeTakeDataset() { + return FDH::Define( + // Name + "TakeDataset", + // Args + {"input_dataset: variant", "count: int64"}, + // Return values + {"y:variant"}, + // Attr def + {"output_types: list(type) >= 1", "output_shapes: list(shape) >= 1"}, + // Nodes + {{{"y"}, + "TakeDataset", + {"input_dataset", "count"}, + {{"output_types", "$output_types"}, + {"output_shapes", "$output_shapes"}}}}); +} + +FunctionDef MakeTensorSliceDataset() { + return FDH::Define( + // Name + "MakeTensorSliceDataset", + // Args + {"x: Toutput_types"}, + // Return values + {"y: variant"}, + // Attr def + {"Toutput_types: list(type) >= 1", "output_shapes: list(shape) >= 1"}, + // Nodes + {{{"y"}, + "TensorSliceDataset", + {"x"}, + {{"Toutput_types", "$Toutput_types"}, + {"output_shapes", "$output_shapes"}}}}); +} + +FunctionDef Unique() { + return FDH::Create( + // Name + "GetUnique", + // Args + {"x:T"}, + // Return values + {"y:T", "idx: out_idx"}, + // Attr def + {"T: type", "out_idx: {int32, int64} = DT_INT32"}, + // Nodes + { + {{"result"}, "Unique", {"x"}, {{"T", "$T"}, {"out_idx", "$out_idx"}}}, + }, + {{"y", "result:y:0"}, {"idx", "result:idx:0"}}); +} + +void FunctionTestSchedClosure(std::function fn) { + static thread::ThreadPool* w = + new thread::ThreadPool(Env::Default(), "Test", 8); + w->Schedule(std::move(fn)); +} + +} // end namespace function +} // end namespace test +} // end namespace tensorflow diff --git a/tf_adapter/tests/ut/kernels/testcase/dataset/host_queue_dats_set_ut.cc b/tf_adapter/tests/ut/kernels/testcase/dataset/host_queue_dats_set_ut.cc new file mode 100644 index 000000000..ce3a9d7fc --- /dev/null +++ b/tf_adapter/tests/ut/kernels/testcase/dataset/host_queue_dats_set_ut.cc @@ -0,0 +1,203 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#define protected public +#include "gtest/gtest.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/kernels/data/dataset_test_base.h" +class HostQueueDatasetOp; +namespace tensorflow { +namespace data { +namespace { + +static constexpr char kNodeName[] = "host_queue_dataset"; +static constexpr const char *const kChannelName = "channel_name"; +static constexpr const char *const kOutputTypes = "output_types"; +static constexpr const char *const kOutputShapes = "output_shapes"; + +class HostQueueDatasetOpTest : public DatasetOpsTestBase { + protected: + // Creates `TensorSliceDataset` variant tensor from the input vector of + // tensors. + Status CreateTensorSliceDatasetTensor( + std::vector *const tensor_vector, Tensor *dataset_tensor) { + DatasetBase *tensor_slice_dataset; + TF_RETURN_IF_ERROR(CreateTensorSliceDataset( + "tensor_slice_node", tensor_vector, &tensor_slice_dataset)); + TF_RETURN_IF_ERROR( + StoreDatasetInVariantTensor(tensor_slice_dataset, dataset_tensor)); + return Status::OK(); + } + + // Create a new `HostQueueDataset` op kernel. + Status CreateHostQueueDatasetKernel( + const DataTypeVector &output_types, + const std::vector &output_shapes, + std::unique_ptr *op_kernel, std::string _local_rank_id) { + name_utils::OpNameParams params; + + NodeDef node_def = + test::function::NDef(kNodeName, name_utils::OpName("HostQueue", params), + {"geop_dataset", "input_dataset"}, + {{"channel_name", "channel_001"}, + {"output_types", output_types}, + {"_local_rank_id", _local_rank_id}, + {"_local_device_list", "{0,-1}"}, + {"output_shapes", output_shapes}}); + TF_RETURN_IF_ERROR(CreateOpKernel(node_def, op_kernel)); + return Status::OK(); + } + + // Create a new `HostQueueDataset` op kernel context. + Status CreateHostQueueDatasetContext( + OpKernel *op_kernel, gtl::InlinedVector *const inputs, + std::unique_ptr *context) { + TF_RETURN_IF_ERROR(CheckOpKernelInput(*op_kernel, *inputs)); + TF_RETURN_IF_ERROR(CreateOpKernelContext(op_kernel, inputs, context)); + return Status::OK(); + } + + protected: + virtual void SetUp() {} + virtual void TearDown() {} +}; + +struct TestCase { + std::vector input_tensors; + std::vector expected_outputs; + DataTypeVector expected_output_dtypes; + std::vector expected_output_shapes; +}; + +TestCase NormalizeTestCase() { + return { + // input_tensors expected_outputs expected_output_dtypes + // expected_output_shapes + {CreateTensor(TensorShape{10, 1}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9})}, + {CreateTensor(TensorShape{1}, {0})}, + {DT_INT64}, + {PartialTensorShape({1})}, + }; +} + +TEST_F(HostQueueDatasetOpTest, iterator_getnext) { + int thread_num = 2, cpu_num = 2; + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + const TestCase &test_case = NormalizeTestCase(); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + + gtl::InlinedVector inputs_for_host_queue_dataset( + {TensorValue(&tensor_slice_dataset_tensor), + TensorValue(&tensor_slice_dataset_tensor)}); + + std::unique_ptr host_queue_dataset_kernel; + TF_ASSERT_OK(CreateHostQueueDatasetKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &host_queue_dataset_kernel, "-1")); + std::unique_ptr host_queue_dataset_context; + TF_ASSERT_OK(CreateHostQueueDatasetContext(host_queue_dataset_kernel.get(), + &inputs_for_host_queue_dataset, + &host_queue_dataset_context)); + DatasetBase *host_queue_dataset; + TF_ASSERT_OK(CreateDataset(host_queue_dataset_kernel.get(), + host_queue_dataset_context.get(), + &host_queue_dataset)); + core::ScopedUnref scoped_unref(host_queue_dataset); + + EXPECT_EQ(host_queue_dataset->node_name(), kNodeName); + + host_queue_dataset->output_dtypes(); + host_queue_dataset->output_shapes(); + host_queue_dataset->DebugString(); + + SerializationContext context(SerializationContext::Params{}); + GraphDefBuilder b; + DatasetBase::DatasetGraphDefBuilder db(&b); + Node *output; + host_queue_dataset->AsGraphDefInternal(&context, &db, &output); + + std::unique_ptr iterator_context; + TF_ASSERT_OK(CreateIteratorContext(host_queue_dataset_context.get(), + &iterator_context)); + std::unique_ptr iterator; + TF_ASSERT_OK(host_queue_dataset->MakeIterator(iterator_context.get(), + "Iterator", &iterator)); + + bool end_of_sequence = false; + std::vector out_tensors; + sleep(2); + TF_EXPECT_OK(iterator->GetNext(iterator_context.get(), &out_tensors, + &end_of_sequence)); +} + +TEST_F(HostQueueDatasetOpTest, iterator_getnext02) { + int thread_num = 2, cpu_num = 2; + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + const TestCase &test_case = NormalizeTestCase(); + Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({})); + std::vector inputs_for_tensor_slice_dataset = test_case.input_tensors; + TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset, + &tensor_slice_dataset_tensor)); + + gtl::InlinedVector inputs_for_host_queue_dataset( + {TensorValue(&tensor_slice_dataset_tensor), + TensorValue(&tensor_slice_dataset_tensor)}); + + std::unique_ptr host_queue_dataset_kernel; + TF_ASSERT_OK(CreateHostQueueDatasetKernel(test_case.expected_output_dtypes, + test_case.expected_output_shapes, + &host_queue_dataset_kernel, "0")); + std::unique_ptr host_queue_dataset_context; + TF_ASSERT_OK(CreateHostQueueDatasetContext(host_queue_dataset_kernel.get(), + &inputs_for_host_queue_dataset, + &host_queue_dataset_context)); + DatasetBase *host_queue_dataset; + TF_ASSERT_OK(CreateDataset(host_queue_dataset_kernel.get(), + host_queue_dataset_context.get(), + &host_queue_dataset)); + core::ScopedUnref scoped_unref(host_queue_dataset); + + EXPECT_EQ(host_queue_dataset->node_name(), kNodeName); + + host_queue_dataset->output_dtypes(); + host_queue_dataset->output_shapes(); + host_queue_dataset->DebugString(); + + SerializationContext context(SerializationContext::Params{}); + GraphDefBuilder b; + DatasetBase::DatasetGraphDefBuilder db(&b); + Node *output; + host_queue_dataset->AsGraphDefInternal(&context, &db, &output); + + std::unique_ptr iterator_context; + TF_ASSERT_OK(CreateIteratorContext(host_queue_dataset_context.get(), + &iterator_context)); + std::unique_ptr iterator; + TF_ASSERT_OK(host_queue_dataset->MakeIterator(iterator_context.get(), + "Iterator", &iterator)); + + bool end_of_sequence = false; + std::vector out_tensors; + sleep(2); + TF_EXPECT_OK(iterator->GetNext(iterator_context.get(), &out_tensors, + &end_of_sequence)); +} + +} // namespace +} // namespace data +} // namespace tensorflow diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc index 2645fee89..cc5ac3047 100644 --- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc @@ -142,6 +142,12 @@ TEST_F(GeOpTest, GeOpDynamicInputTest) { EXPECT_TRUE(attrs.find("_dynamic_input") != attrs.end()); EXPECT_TRUE(!attrs["_dynamic_input"].s().empty()); } +TEST_F(GeOpTest, GeOpDynamicInputGetNextTest) { + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_input_lazy_recompile.pbtxt"; + gtl::InlinedVector inputs; + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp11_0").ok()); +} TEST_F(GeOpTest, GeOpDynamicInput1Test) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_execute.pbtxt"; @@ -250,7 +256,6 @@ TEST_F(GeOpTest, GeOpFuncSubGraphTest) { gtl::InlinedVector inputs{TensorValue(&a)}; EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp12_0").ok()); } - TEST_F(GeOpTest, GeOpDynamicDimsTest) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_dims.pbtxt"; @@ -261,14 +266,12 @@ TEST_F(GeOpTest, GeOpDynamicDimsTest) { EXPECT_TRUE(attrs.find("_input_shape") != attrs.end()); EXPECT_TRUE(!attrs["_input_shape"].s().empty()); } - TEST_F(GeOpTest, GeOpWhileLoopV1Test) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_while_loop.pbtxt"; gtl::InlinedVector inputs; EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp13_0").ok()); } - TEST_F(GeOpTest, GeOpWhileLoopV2Test) { setenv("ENABLE_FORCE_V2_CONTROL", "1", true); NodeDef node_def; @@ -276,7 +279,6 @@ TEST_F(GeOpTest, GeOpWhileLoopV2Test) { gtl::InlinedVector inputs; EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp13_0").ok()); } - TEST_F(GeOpTest, GeOpNpuOnnxGraphOpTest) { NodeDef node_def; std::string grph_pbtxt_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_npu_onnx_graph_op.pbtxt"; @@ -285,7 +287,6 @@ TEST_F(GeOpTest, GeOpNpuOnnxGraphOpTest) { gtl::InlinedVector inputs{TensorValue(&in)}; EXPECT_TRUE(GeOpRunGraphAsync(grph_pbtxt_path, inputs, node_def, "GeOp91_0").ok()); } - TEST_F(GeOpTest, GeOpNpuOnnxGraphOpNoModelTest) { NodeDef node_def; std::string grph_pbtxt_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_npu_onnx_graph_op_parse.pbtxt"; @@ -294,6 +295,11 @@ TEST_F(GeOpTest, GeOpNpuOnnxGraphOpNoModelTest) { gtl::InlinedVector inputs{TensorValue(&in)}; EXPECT_TRUE(GeOpRunGraphAsync(grph_pbtxt_path, inputs, node_def, "GeOp91_0").ok()); } - +TEST_F(GeOpTest, GeOpDpOpTest) { + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dpop.pbtxt"; + gtl::InlinedVector inputs; + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0_dp").ok()); +} } } //end tensorflow \ No newline at end of file diff --git a/tf_adapter/tests/ut/kernels/testcase/infeed_outfeed_test.cc b/tf_adapter/tests/ut/kernels/testcase/infeed_outfeed_test.cc new file mode 100644 index 000000000..21bc49f19 --- /dev/null +++ b/tf_adapter/tests/ut/kernels/testcase/infeed_outfeed_test.cc @@ -0,0 +1,76 @@ +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/public/version.h" +#include +#include "gtest/gtest.h" + + +namespace tensorflow { +namespace { + +#define TF_ASSERT_OK(statement) \ + ASSERT_EQ(::tensorflow::Status::OK(), (statement)) + +#define TF_EXPECT_OK(statement) \ + EXPECT_EQ(::tensorflow::Status::OK(), (statement)) + +class DummyDevice : public DeviceBase { + public: + DummyDevice(Env* env, bool save) : DeviceBase(env), save_(save) {} + bool RequiresRecordingAccessedTensors() const override { return save_; } + Allocator* GetAllocator(AllocatorAttributes /*attr*/) override { return cpu_allocator(); } + private: + bool save_; +}; +} +class InfeedOutfeedTest : public testing::Test { + protected: + virtual void SetUp() {} + virtual void TearDown() {} +}; + +TEST_F(InfeedOutfeedTest, LogSummaryTest) { + DataType data_type = DT_INT32; + std::initializer_list dims = {}; + TensorShapeProto shape_proto; + TensorShape(dims).AsProto(&shape_proto); + + std::string channel_name = "_npu_log"; + + NodeDef outfeed_node; + tensorflow::AttrValue output_shapes; + tensorflow::AttrValue output_types; + *(output_shapes.mutable_list()->add_shape()) = shape_proto; + *(output_shapes.mutable_list()->add_shape()) = shape_proto; + output_types.mutable_list()->add_type(DT_STRING); + output_types.mutable_list()->add_type(DT_INT32); + TF_ASSERT_OK(NodeDefBuilder("out_feed", "OutfeedDequeueOp") + .Attr("channel_name", channel_name) + .Attr("output_types", output_types) + .Attr("output_shapes", output_shapes) + .Finalize(&outfeed_node)); + + DeviceType device_type = DEVICE_CPU; + Env* env = Env::Default(); + auto device = absl::make_unique(env, false); + + Status status; + std::unique_ptr op(CreateOpKernel(device_type, device.get(), + cpu_allocator(), outfeed_node, + TF_GRAPH_DEF_VERSION, &status)); + TF_ASSERT_OK(status); + + OpKernelContext::Params params; + params.device = device.get(); + params.op_kernel = op.get(); + std::unique_ptr cancellation_manager = absl::make_unique(); + params.cancellation_manager = cancellation_manager.get(); + + OpKernelContext ctx(¶ms); + op->Compute(&ctx); + TF_EXPECT_OK(ctx.status()); + +} +} //end tensorflow \ No newline at end of file diff --git a/tf_adapter/tests/ut/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc b/tf_adapter/tests/ut/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc index 5a69c0b3e..47111654a 100644 --- a/tf_adapter/tests/ut/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc +++ b/tf_adapter/tests/ut/optimizers/testcase/dp_tf_ge_conversion_pass_test.cc @@ -6,6 +6,7 @@ #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" +#include namespace tensorflow { namespace { @@ -95,5 +96,16 @@ TEST_F(DpOptimizationPassTest, DatasetNotInDeviceTest) { "HostQueueDataset->DPGroupDataset;GEOPDataset->HostQueueDataset;DPGroupDataset->MakeIterator"; EXPECT_EQ(DoRunDpOptimizationPassTest(), target_graph); } +TEST_F(DpOptimizationPassTest, NewDatasetNotInDeviceTest) { + string org_graph_def_path = "tf_adapter/tests/ut/optimizers/pbtxt/dp_test_no_dataset_in_device.pbtxt"; + setenv("IS_NEW", "1", true); + InitGraph(org_graph_def_path); + std::string target_graph = "Const->TensorSliceDataset;TensorSliceDataset->BatchDatasetV2;Const->BatchDatasetV2:1;"\ + "Const->BatchDatasetV2:2;BatchDatasetV2->RepeatDataset;Const->RepeatDataset:1;RepeatDataset->OptimizeDataset;"\ + "Const->OptimizeDataset:1;OptimizeDataset->ModelDataset;IteratorV2->MakeIterator:1;ModelDataset->HostQueueDataset:1;"\ + "HostQueueDataset->DPGroupDataset;GEOPDataset->HostQueueDataset;DPGroupDataset->MakeIterator"; + EXPECT_EQ(DoRunDpOptimizationPassTest(), target_graph); + unsetenv("IS_NEW"); +} } // end namespace } // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc index 0d0b7c0a0..0ad6bfb3c 100644 --- a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc @@ -82,7 +82,6 @@ TEST_F(GePluginTest, MallocSharedMemOKTest) { EXPECT_EQ(ret, 0); } TEST_F(GePluginTest, NpuCloseTest) { - NpuAttrs::SetUseTdtStatus(0, true); NpuClose(); } TEST_F(GePluginTest, RdmaInitAndRegisterFail1Test) { diff --git a/tf_adapter/util/acl_channel.cc b/tf_adapter/util/acl_channel.cc new file mode 100644 index 000000000..38b701022 --- /dev/null +++ b/tf_adapter/util/acl_channel.cc @@ -0,0 +1,228 @@ +/* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tf_adapter/common/common.h" +#include "tf_adapter/common/adp_logger.h" +#include "tf_adapter/util/acl_channel.h" +#include "acl/error_codes/rt_error_codes.h" +#include "securec.h" + +namespace tensorflow { + +Status MappingTfDtypeToAcl(const tensorflow::DataType tf_type, aclDataType &acl_type) { + const static std::map type_mapping = { + {DT_FLOAT, ACL_FLOAT}, {DT_HALF, ACL_FLOAT16}, {DT_INT8, ACL_INT8}, {DT_INT32, ACL_INT32}, + {DT_UINT8, ACL_UINT8}, {DT_INT16, ACL_INT16}, {DT_UINT16, ACL_UINT16}, {DT_UINT32, ACL_UINT32}, + {DT_INT64, ACL_INT64}, {DT_UINT64, ACL_UINT64}, {DT_DOUBLE, ACL_DOUBLE}, {DT_BOOL, ACL_BOOL}, + {DT_STRING, ACL_STRING}}; + auto found = type_mapping.find(tf_type); + if (found == type_mapping.end()) { + return errors::Internal("Unsupported tf data type", DataTypeString(tf_type), " by acl."); + } + acl_type = found->second; + return Status::OK(); +} + +Status MappingAclDtypeToTf(const aclDataType &acl_type, tensorflow::DataType &tf_type) { + const static std::map type_mapping = { + {ACL_FLOAT, DT_FLOAT}, {ACL_FLOAT16, DT_HALF}, {ACL_INT8, DT_INT8}, {ACL_INT32, DT_INT32}, + {ACL_UINT8, DT_UINT8}, {ACL_INT16, DT_INT16}, {ACL_UINT16, DT_UINT16}, {ACL_UINT32, DT_UINT32}, + {ACL_INT64, DT_INT64}, {ACL_UINT64, DT_UINT64}, {ACL_DOUBLE, DT_DOUBLE}, {ACL_BOOL, DT_BOOL}, + {ACL_STRING, DT_STRING}}; + auto found = type_mapping.find(acl_type); + if (found == type_mapping.end()) { return errors::Internal("Acl channel receive unsupported data type", acl_type); } + tf_type = found->second; + return Status::OK(); +} + +Status AssembleAclTensor2Tensor(acltdtDataItem *item, std::vector &tensors, bool call_by_channel_receive) { + acltdtTensorType acl_type = acltdtGetTensorTypeFromItem(item); + if (acl_type == ACL_TENSOR_DATA_END_OF_SEQUENCE) { + LOG(INFO) << "Acl channel received end-of-sequence for out-feed op."; + return Status::OK(); + } else if (acl_type == ACL_TENSOR_DATA_ABNORMAL) { + LOG(INFO) << "Acl channel received abnormal for out-feed op."; + return Status::OK(); + } else if (acl_type == ACL_TENSOR_DATA_UNDEFINED) { + LOG(INFO) << "Acl channel received undefined message type for out-feed op."; + return errors::Internal("Acl channel received undefined message type for out-feed op."); + } + tensorflow::DataType tf_type; + TF_RETURN_IF_ERROR(MappingAclDtypeToTf(acltdtGetDataTypeFromItem(item), tf_type)); + size_t dim_num = acltdtGetDimNumFromItem(item); + size_t acl_data_len = acltdtGetDataSizeFromItem(item); + char *acl_data = reinterpret_cast(acltdtGetDataAddrFromItem(item)); + if (acl_data == nullptr) { return errors::Internal("Acl get data addr from item failed when receive tensor data."); } + if (tf_type == DT_STRING) { + if (dim_num != 0) { return errors::Internal("Acl channel receive unsupported non-scalar string type"); } + Tensor tensor(tf_type, TensorShape({})); + tensor.scalar()() = std::move(string(acl_data, acl_data_len)); + tensors.emplace_back(std::move(tensor)); + } else if (DataTypeCanUseMemcpy(tf_type)) { + std::vector dims; + dims.resize(dim_num); + if (acltdtGetDimsFromItem(item, dims.data(), dim_num) != ACL_ERROR_NONE) { + return errors::Internal("Failed get dim-size from acl channel data"); + } + TensorShape tf_shape; + for (auto dim : dims) { tf_shape.AddDim(dim); } + Tensor tensor = Tensor(tf_type, tf_shape); + auto tensor_data = const_cast(tensor.tensor_data().data()); + auto tensor_size = tensor.tensor_data().size(); + if (tensor_size != acl_data_len) { + return errors::Internal("Acl channel receive size mismatch tensor size acl:", + acl_data_len, "vs. tf:", tensor_size); + } + do { + auto copy_size = (tensor_size > SECUREC_MEM_MAX_LEN) ? SECUREC_MEM_MAX_LEN : tensor_size; + LOG(INFO) << "tensor data:" << reinterpret_cast(tensor_data) << ", tensor_size:" << tensor_size + << ", acl_data:" << reinterpret_cast(acl_data) << ", copy_size:" << copy_size; + if (memcpy_s(tensor_data, tensor_size, acl_data, copy_size) != EOK) { + return errors::Internal("Failed copy acl channel data to tensorflow."); + } + tensor_size -= copy_size; + tensor_data += copy_size; + acl_data += copy_size; + } while (tensor_size > 0); + tensors.emplace_back(std::move(tensor)); + } else { return errors::InvalidArgument("Acl channel receive uncopyable tf data type", DataTypeString(tf_type)); } + return Status::OK(); +} + +Status AssembleAclDataset2Tensors(acltdtDataset *acl_dataset, std::vector &out_tensors, + bool call_by_channel_receive) { + for (size_t i = 0; i < acltdtGetDatasetSize(acl_dataset); i++) { + auto acl_data = acltdtGetDataItem(acl_dataset, i); + if (acl_data == nullptr) { + return errors::Internal("Acl get tensor data from dataset failed when receive tensor data."); + } + TF_RETURN_IF_ERROR(AssembleAclTensor2Tensor(acl_data, out_tensors, call_by_channel_receive)); + } + return Status::OK(); +} + +Status AssembleTensors2AclDataset(acltdtTensorType acl_type, const std::vector &tensors, + acltdtDataset **output_acl_dataset) { + auto acl_dataset = acltdtCreateDataset(); + if (acl_dataset == nullptr) { return errors::Internal("Acl create tensor dataset failed"); } + auto status = AssembleTensors2AclDataset(acl_type, tensors, acl_dataset); + if (!status.ok()) { + ADAPTER_LOG_IF_ERROR(DestroyAclDataset(acl_dataset)); + return status; + } + *output_acl_dataset = acl_dataset; + return Status::OK(); +} + +Status AssembleTensors2AclDataset(acltdtTensorType acl_type, const std::vector &tensors, + acltdtDataset *acl_dataset) { + if (TF_PREDICT_FALSE(acl_type != ACL_TENSOR_DATA_TENSOR)) { + acltdtDataItem *acl_data = acltdtCreateDataItem(acl_type, nullptr, 0, ACL_BOOL /* whatever */, nullptr, 0); + if (acl_data == nullptr) { return errors::Internal("Acl create tensor item failed when send end-of-sequence."); } + if (acltdtAddDataItem(acl_dataset, acl_data) != ACL_ERROR_NONE) { + if (acltdtDestroyDataItem(acl_data) != ACL_ERROR_NONE) { + LOG(ERROR) << "Acl destroy tensor data item failed when send data with type " + << (acl_type == ACL_TENSOR_DATA_END_OF_SEQUENCE ? "ACL_TENSOR_DATA_END_OF_SEQUENCE" + : "ACL_TENSOR_DATA_ABNORMAL"); + } + return errors::Internal("Acl add tensor data to dataset failed when send data with type ", acl_type); + } + return Status::OK(); + } + for (auto &tensor : tensors) { + aclDataType acl_data_type; + TF_RETURN_IF_ERROR(MappingTfDtypeToAcl(tensor.dtype(), acl_data_type)); + acltdtDataItem *acl_data = nullptr; + if (DataTypeCanUseMemcpy(tensor.dtype())) { + auto dims = tensor.shape().dim_sizes(); + acl_data = acltdtCreateDataItem( + ACL_TENSOR_DATA_TENSOR, (dims.empty() ? nullptr : reinterpret_cast(dims.data())), + dims.size(), acl_data_type, const_cast(tensor.tensor_data().data()), tensor.tensor_data().size()); + } else if (tensor.dtype() == DT_STRING) { + if (tensor.dims() != 0) { + return errors::Internal("Acl send got unexpected non-scalar string tensor with dim ", tensor.dims()); + } + auto value = reinterpret_cast(const_cast(tensor.tensor_data().data())); + // for scalar type, *dims is nullptr and dim_num is 0 + acl_data = acltdtCreateDataItem(ACL_TENSOR_DATA_TENSOR, nullptr, 0, acl_data_type, + const_cast(value->c_str()), value->size()); + } else { + return errors::Internal("Acl send got unexpected data type ", DataTypeString(tensor.dtype())); + } + if (acl_data == nullptr) { + return errors::Internal("Acl create tensor item failed when send tensor data ", tensor.DebugString()); + } + if (acltdtAddDataItem(acl_dataset, acl_data) != ACL_ERROR_NONE) { + if (acltdtDestroyDataItem(acl_data) != ACL_ERROR_NONE) { + ADP_LOG(ERROR) << "Acl destroy tensor data item failed when send data with type ACL_TENSOR_DATA_TENSOR."; + } + return errors::Internal("Acl add tensor data to dataset failed when send tensor data."); + } + } + return Status::OK(); +} + +Status DestroyAclDataset(acltdtDataset *acl_dataset, bool include_data_item) { + if (include_data_item) { + for (size_t i = 0; i < acltdtGetDatasetSize(acl_dataset); i++) { + if (acltdtDestroyDataItem(acltdtGetDataItem(acl_dataset, i)) != ACL_ERROR_NONE) { + return errors::Internal("Acl destroy tensor data failed."); + } + } + } + if (acltdtDestroyDataset(acl_dataset) != ACL_ERROR_NONE) { + return errors::Internal("Acl destroy tensor dataset failed."); + } + return Status::OK(); +} + +Status RecvTensorByAcl(acltdtChannelHandle *acl_handle, std::vector &tensors) { + auto acl_dataset = acltdtCreateDataset(); + if (acl_dataset == nullptr) { return errors::Internal("Failed create acl channel."); } + auto acl_status = acltdtReceiveTensor(acl_handle, acl_dataset, -1 /* no timeout */); + + if (acl_status != ACL_ERROR_NONE) { + ADAPTER_LOG_IF_ERROR(DestroyAclDataset(acl_dataset, false)); + return errors::Internal("Failed receive data from acl channel, acl status:", acl_status); + } + + auto status = AssembleAclDataset2Tensors(acl_dataset, tensors, true /* call by channel receive */); + if (!status.ok()) { + ADAPTER_LOG_IF_ERROR(DestroyAclDataset(acl_dataset, false)); + return status; + } + TF_RETURN_IF_ERROR(DestroyAclDataset(acl_dataset, false)); + return Status::OK(); +} + +aclError SendTensorsByAcl(const acltdtChannelHandle *acl_handle, + acltdtTensorType acl_type, + const std::vector &tensors) { + acltdtDataset *acl_dataset = nullptr; + auto assemble_status = + AssembleTensors2AclDataset(acl_type, tensors, &acl_dataset); + if (!assemble_status.ok()) { + ADP_LOG(ERROR) << "Call AssembleTensors2AclDataset failed."; + return ACL_ERROR_RT_PARAM_INVALID; + } + auto acl_status = acltdtSendTensor(acl_handle, acl_dataset, 1000); + auto ds_data_status = DestroyAclDataset(acl_dataset); + if (!ds_data_status.ok()) { + ADP_LOG(ERROR) << "Call DestroyAclDataset failed."; + return ACL_ERROR_RT_PARAM_INVALID; + } + return acl_status; +} + +} // namespace tensorflow \ No newline at end of file diff --git a/tf_adapter/util/acl_channel.h b/tf_adapter/util/acl_channel.h new file mode 100644 index 000000000..721824929 --- /dev/null +++ b/tf_adapter/util/acl_channel.h @@ -0,0 +1,46 @@ +/* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_ACL_CHANNEL_H_ +#define TENSORFLOW_ACL_CHANNEL_H_ + +#include "acl/acl_tdt.h" +#include "tensorflow/core/framework/tensor.h" + +namespace tensorflow { + +Status MappingTfDtypeToAcl(const tensorflow::DataType tf_type, aclDataType &acl_type); + +Status MappingAclDtypeToTf(const aclDataType &acl_type, tensorflow::DataType &tf_type); + +Status AssembleAclTensor2Tensor(acltdtDataItem *item, std::vector &tensors, bool call_by_channel_receive); + +Status AssembleAclDataset2Tensors(acltdtDataset *acl_dataset, std::vector &out_tensors, + bool call_by_channel_receive); + +Status AssembleTensors2AclDataset(acltdtTensorType acl_type, const std::vector &tensors, + acltdtDataset **acl_dataset); + +Status AssembleTensors2AclDataset(acltdtTensorType acl_type, const std::vector &tensors, + acltdtDataset *acl_dataset); + +Status DestroyAclDataset(acltdtDataset *acl_dataset, bool include_data_item = true); + +Status RecvTensorByAcl(acltdtChannelHandle *acl_handle, std::vector &tensors); + +aclError SendTensorsByAcl(const acltdtChannelHandle *acl_handle, acltdtTensorType acl_type, const std::vector &tensors); + +} // namespace tensorflow + +#endif // TENSORFLOW_ACL_CHANNEL_H_ diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index 35e5210de..3c64b261e 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -20,7 +20,6 @@ #include "framework/omg/omg_inner_types.h" #include "ge/ge_api.h" #include "ge/ge_api_types.h" -#include "tdt/tdt_host_interface.h" #include "tensorflow/core/util/env_var.h" #include "tf_adapter/common/adp_logger.h" #include "tf_adapter/common/common.h" @@ -31,7 +30,6 @@ using json = nlohmann::json; using namespace tensorflow; -using namespace tdt; constexpr int kFatalSleepTime = 3000; namespace { inline string ToString(ge::Status status) { return ::ge::StatusFactory::Instance()->GetErrDesc(status); } @@ -56,7 +54,6 @@ void GeFinalize() { } // namespace GePlugin::GePlugin() - : device_id_(0), isInit_(false), isGlobal_(false) { ADP_LOG(INFO) << "[GePlugin] new constructor"; } @@ -217,22 +214,6 @@ void GePlugin::Init(std::map &init_options, bool is_gl ADP_LOG(INFO) << "[GePlugin] optypelist_for_implmode :" << init_options[ge::OPTYPELIST_FOR_IMPLMODE]; - const char *tdt_uninit_env = std::getenv("ASCEND_TDT_UNINIT"); - bool tdt_init = true; - if (tdt_uninit_env != nullptr && std::atoi(tdt_uninit_env) == 1) { - tdt_init = false; - } - if (tdt_init) { - // Open TsdClient first, then call GEInitialize - ADP_LOG(INFO) << "[GePlugin] Open TsdClient and Init tdt host."; - int32_t ret = tdt::TdtOutFeedInit(static_cast(device_id_)); - if (ret != 0) { - std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime)); - ADP_LOG(FATAL) << "[GePlugin] Tdt host init failed, tdt error code : " << ret; - LOG(FATAL) << "[GePlugin] Tdt host init failed, tdt error code : " << ret; - } - } - // ge Initialize ge::Status status = ge::GEInitialize(init_options); if (status != ge::SUCCESS) { @@ -290,20 +271,6 @@ void GePlugin::Finalize() { // ge finalize GeFinalize(); - - const char *tdt_uninit_env = std::getenv("ASCEND_TDT_UNINIT"); - bool tdt_init = true; - if (tdt_uninit_env != nullptr && std::atoi(tdt_uninit_env) == 1) { - tdt_init = false; - } - if (tdt_init) { - ADP_LOG(INFO) << "[GePlugin] Close TsdClient and destroy tdt."; - int32_t ret = tdt::TdtOutFeedDestroy(); - if (ret != 0) { - LOG(ERROR) << "[GePlugin] Close tdt host failed."; - ADP_LOG(ERROR) << "[GePlugin] Close tdt host failed."; - } - } isInit_ = false; } @@ -338,19 +305,6 @@ void PluginFinalize() { */ void NpuClose() { GeFinalize(); - uint32_t device_id = 0; - (void)GetEnvDeviceID(device_id); - if (NpuAttrs::GetUseTdtStatus(device_id)) { - ADP_LOG(INFO) << "[GePlugin] the process has turned on TDT resource, finalize resource at exit."; - int32_t tdt_status = TdtInFeedDestroy(device_id); - if (tdt_status != 0) { - ADP_LOG(ERROR) << "[GePlugin] Tdt client close failed."; - LOG(ERROR) << "[GePlugin] Tdt client close failed."; - } else { - ADP_LOG(INFO) << "[GePlugin] Tdt client close success."; - NpuAttrs::SetUseTdtStatus(device_id, false); - } - } ADP_LOG(INFO) << "[GePlugin] npu finalize resource success"; } diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 23b93672b..b77a2a31d 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -27,6 +27,7 @@ namespace tensorflow { std::map NpuAttrs::turn_on_tdt_info_; std::map NpuAttrs::use_adp_info_; +std::map NpuAttrs::dataset_execute_info_; std::string GetDumpPath() { char *npu_collect_path = std::getenv("NPU_COLLECT_PATH"); @@ -262,15 +263,6 @@ inline Status checkEnableDp(bool enable_dp) { } } -bool NpuAttrs::GetUseTdtStatus(int32_t device_id) { - if (turn_on_tdt_info_.count(device_id) > 0) { - ADP_LOG(INFO) << "get device: " << device_id << " turn_on_tdt_info_: " << turn_on_tdt_info_[device_id]; - return turn_on_tdt_info_[device_id]; - } else { - return false; - } -} - void NpuAttrs::SetUseTdtStatus(int32_t device_id, bool is_turn_on_tdt) { turn_on_tdt_info_[device_id] = is_turn_on_tdt; ADP_LOG(INFO) << "set device: " << device_id << " turn_on_tdt_info_: " << turn_on_tdt_info_[device_id]; @@ -290,6 +282,20 @@ void NpuAttrs::SetUseAdpStatus(std::string iterator_name, bool is_use_adp) { ADP_LOG(INFO) << "set iterator: " << iterator_name << " use_adp_info_: " << use_adp_info_[iterator_name]; } +bool NpuAttrs::IsDatasetExecuteInDevice(std::string iterator_name) { + if (dataset_execute_info_.count(iterator_name) > 0) { + ADP_LOG(INFO) << "get data pre-process graph: " << iterator_name << " dataset_execute_info_: " << dataset_execute_info_[iterator_name]; + return dataset_execute_info_[iterator_name]; + } else { + return false; + } +} + +void NpuAttrs::SetDatasetExecuteInDeviceStatus(std::string iterator_name, bool is_dataset_execute_device) { + dataset_execute_info_[iterator_name] = is_dataset_execute_device; + ADP_LOG(INFO) << "data pre-process graph: " << iterator_name << " dataset_execute_info_: " << dataset_execute_info_[iterator_name]; +} + std::map NpuAttrs::GetSessOptions(OpKernelConstruction *ctx) { std::map sess_options; std::string variable_format_optimize = std::to_string(true); diff --git a/tf_adapter/util/npu_attrs.h b/tf_adapter/util/npu_attrs.h index 2ca3e4d4e..e57a21a95 100644 --- a/tf_adapter/util/npu_attrs.h +++ b/tf_adapter/util/npu_attrs.h @@ -44,13 +44,15 @@ class NpuAttrs { static std::map GetDefaultPassOptions(); static Status SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options, Node *node); static void LogOptions(const std::map& options); - static bool GetUseTdtStatus(int32_t device_id); static void SetUseTdtStatus(int32_t device_id, bool is_turn_on_tdt); static bool GetUseAdpStatus(std::string iterator_name); static void SetUseAdpStatus(std::string iterator_name, bool is_use_adp); + static bool IsDatasetExecuteInDevice(std::string iterator_name); + static void SetDatasetExecuteInDeviceStatus(std::string iterator_name, bool is_dataset_execute_device); private: static std::map turn_on_tdt_info_; static std::map use_adp_info_; + static std::map dataset_execute_info_; }; } // namespace tensorflow -- Gitee