From 909707a43c1c93a1f81a635739d91fa004f91fc3 Mon Sep 17 00:00:00 2001 From: medivh-x Date: Tue, 15 Jun 2021 16:08:18 +0800 Subject: [PATCH 01/21] compat with xla for tf 2.5 --- tf_adapter_2.x/python/npu_device/npu_device.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tf_adapter_2.x/python/npu_device/npu_device.py b/tf_adapter_2.x/python/npu_device/npu_device.py index 48724abc0..840e70c08 100644 --- a/tf_adapter_2.x/python/npu_device/npu_device.py +++ b/tf_adapter_2.x/python/npu_device/npu_device.py @@ -135,9 +135,10 @@ _thread_local = threading.local() def never_nested_function(func=None, *args, **kwargs): def never_nested_decorator(f): - if kwargs.get('experimental_compile'): + if kwargs.get('experimental_compile') or kwargs.get('jit_compile'): logging.info("Skip xla compile tf function %s on npu", f.__name__) kwargs['experimental_compile'] = False + kwargs['jit_compile'] = False tf_decorated_func = _hacked_tensorflow_function(*args, **kwargs)(f) def wrapper(*func_args, **func_kwargs): -- Gitee From 48fb974968aa36121f52a044b98680ada19e3899 Mon Sep 17 00:00:00 2001 From: wangkai Date: Fri, 18 Jun 2021 10:43:25 +0800 Subject: [PATCH 02/21] add link header targets Signed-off-by: wangkai --- CMakeLists.txt | 5 +++++ tf_adapter/kernels/geop_npu.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c31967f32..ba8fd5ffb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,7 @@ if (ENABLE_OPEN_SRC) include(${CMAKE_CURRENT_LIST_DIR}/cmake/tensorflow.cmake) include_directories(${CMAKE_CURRENT_LIST_DIR}) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc) + include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/toolchain) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/external) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/soft_dp) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/graphengine/inc) @@ -169,6 +170,10 @@ else() target_link_libraries(tf_adapter PUBLIC $ + $ + $ + $ + $ -Wl,--no-as-needed c_sec ge_runner diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h index 730c30c16..e8f8eae70 100644 --- a/tf_adapter/kernels/geop_npu.h +++ b/tf_adapter/kernels/geop_npu.h @@ -38,7 +38,7 @@ limitations under the License. #include "ge/ge_api_types.h" #include "graph/tensor.h" #include "graph/utils/graph_utils.h" -#include "toolchain/tuning_tool/tune_api.h" +#include "tuning_tool/tune_api.h" #include namespace tensorflow { -- Gitee From 5c29860b512418b26aab3fa6fe09cf6f34703170 Mon Sep 17 00:00:00 2001 From: lianghuikang <505519763@qq.com> Date: Fri, 18 Jun 2021 14:29:38 +0800 Subject: [PATCH 03/21] fix miss run_config --- tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py b/tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py index 577d0e1e7..708277bcb 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/keras_to_npu.py @@ -42,6 +42,7 @@ from tensorflow.python.training import training_util from tensorflow.python.training.tracking import graph_view from tensorflow.python.training.tracking import util as trackable_util from tensorflow.python.util import compat_internal +from tensorflow_estimator.python.estimator import run_config from tensorflow_estimator.python.estimator import estimator as estimator_lib from tensorflow_estimator.python.estimator import model_fn as model_fn_lib from tensorflow_estimator.python.estimator.export import export_lib -- Gitee From 991178530c3a0e9035118c2b733c2174d4794220 Mon Sep 17 00:00:00 2001 From: medivh-x Date: Mon, 21 Jun 2021 14:09:11 +0800 Subject: [PATCH 04/21] add keep dtype scope as api npu.keep_dtype_scope --- tf_adapter_2.x/python/npu_device/__init__.py | 2 ++ tf_adapter_2.x/python/npu_device/npu_device.py | 9 ++++++--- tf_adapter_2.x/python/npu_device/utils/scope.py | 9 +++++++++ 3 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 tf_adapter_2.x/python/npu_device/utils/scope.py diff --git a/tf_adapter_2.x/python/npu_device/__init__.py b/tf_adapter_2.x/python/npu_device/__init__.py index 1eaf9267f..747806540 100644 --- a/tf_adapter_2.x/python/npu_device/__init__.py +++ b/tf_adapter_2.x/python/npu_device/__init__.py @@ -3,6 +3,8 @@ from npu_device.npu_device import never_nested_function from npu_device.npu_device import gen_npu_ops from npu_device.npu_device import global_options +from npu_device.utils.scope import keep_dtype_scope + from npu_device._api import distribute from npu_device._api import train from npu_device._api import ops diff --git a/tf_adapter_2.x/python/npu_device/npu_device.py b/tf_adapter_2.x/python/npu_device/npu_device.py index 840e70c08..0c3f3a4be 100644 --- a/tf_adapter_2.x/python/npu_device/npu_device.py +++ b/tf_adapter_2.x/python/npu_device/npu_device.py @@ -135,10 +135,13 @@ _thread_local = threading.local() def never_nested_function(func=None, *args, **kwargs): def never_nested_decorator(f): - if kwargs.get('experimental_compile') or kwargs.get('jit_compile'): + if kwargs.get('experimental_compile'): logging.info("Skip xla compile tf function %s on npu", f.__name__) - kwargs['experimental_compile'] = False - kwargs['jit_compile'] = False + kwargs['experimental_compile'] = False + if kwargs.get('jit_compile'): + logging.info("Skip xla compile tf function %s on npu", f.__name__) + kwargs['jit_compile'] = False + tf_decorated_func = _hacked_tensorflow_function(*args, **kwargs)(f) def wrapper(*func_args, **func_kwargs): diff --git a/tf_adapter_2.x/python/npu_device/utils/scope.py b/tf_adapter_2.x/python/npu_device/utils/scope.py new file mode 100644 index 000000000..c175bb173 --- /dev/null +++ b/tf_adapter_2.x/python/npu_device/utils/scope.py @@ -0,0 +1,9 @@ +from tensorflow.python.framework import ops +from tensorflow.python.util import tf_contextlib +from tensorflow.core.framework import attr_value_pb2 + + +@tf_contextlib.contextmanager +def keep_dtype_scope(): + with ops.get_default_graph()._attr_scope({'_keep_dtype': attr_value_pb2.AttrValue(b=True)}): + yield -- Gitee From b3675900c5ee3e836b5ee195c5d50037a629da1e Mon Sep 17 00:00:00 2001 From: lianghuikang <505519763@qq.com> Date: Tue, 22 Jun 2021 13:41:32 +0800 Subject: [PATCH 05/21] convert save_summary_steps --- convert_tf2npu/ast_impl.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/convert_tf2npu/ast_impl.py b/convert_tf2npu/ast_impl.py index ee5d2d191..5618347b1 100644 --- a/convert_tf2npu/ast_impl.py +++ b/convert_tf2npu/ast_impl.py @@ -348,6 +348,17 @@ def ast_call(node): node.keywords = [] node.args = [] util_global.set_value('need_conver', True) + if (isinstance(node.func, ast.Attribute) and (node.func.attr == 'RunConfig')) and \ + (_call_name_match(node.func.value, 'estimator') or _call_name_match(node.func.value, 'tpu')): + save_summary_steps = None + for keyword in node.keywords: + if (keyword.arg == 'save_summary_steps'): + save_summary_steps = keyword + break + if len(node.args) < 3 and not save_summary_steps: + log_msg(getattr(node, 'lineno'), 'RunConfig() add save_summary_steps=0') + util_global.set_value('need_conver', True) + node.keywords.append(ast.keyword(arg='save_summary_steps', value=pasta.parse('0'))) if isinstance(node.func, ast.Attribute) and (node.func.attr == 'TPUEstimator') and \ ((isinstance(node.func.value, ast.Attribute) and (node.func.value.attr == 'tpu')) or \ (isinstance(node.func.value, ast.Name) and (node.func.value.id == 'tpu'))): -- Gitee From a51ad29ca340fe76292bf55c2108d8dd2b34c87d Mon Sep 17 00:00:00 2001 From: lianghuikang <505519763@qq.com> Date: Sat, 19 Jun 2021 14:31:09 +0800 Subject: [PATCH 06/21] add option op_precision_mode --- inc/graphengine/inc/external/ge/ge_api_types.h | 5 +++++ tf_adapter/interface_spec/api_npu_config.pyh | 2 +- .../optimizers/om_partition_subgraphs_pass.cc | 2 -- .../python/npu_bridge/estimator/npu/npu_config.py | 5 ++++- .../npu_bridge/estimator/npu/npu_estimator.py | 2 ++ tf_adapter/util/npu_attrs.cc | 13 +++++++++++++ 6 files changed, 25 insertions(+), 4 deletions(-) diff --git a/inc/graphengine/inc/external/ge/ge_api_types.h b/inc/graphengine/inc/external/ge/ge_api_types.h index fbd6c020e..6f5bbfbfe 100644 --- a/inc/graphengine/inc/external/ge/ge_api_types.h +++ b/inc/graphengine/inc/external/ge/ge_api_types.h @@ -113,6 +113,7 @@ const char *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; const char *const OP_DEBUG_LEVEL = "ge.opDebugLevel"; const char *const PERFORMANCE_MODE = "ge.performance_mode"; const char *const MODIFY_MIXLIST = "ge.exec.modify_mixlist"; +const char *const OP_PRECISION_MODE = "ge.exec.op_precision_mode"; } // namespace configure_option // Configure stream num by Session constructor options param, // its value should be int32_t type, default value is "1" @@ -326,6 +327,8 @@ const std::string PERFORMANCE_MODE = "ge.performance_mode"; const std::string MODIFY_MIXLIST = "ge.exec.modify_mixlist"; +const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode"; + // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; @@ -405,6 +408,7 @@ static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str(); static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); static const char *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str(); static const char *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str(); +static const char *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str(); // for interface: aclgrphBuildModel #ifdef __GNUC__ @@ -416,6 +420,7 @@ const std::set ir_builder_suppported_options = {INPUT_FORMAT, DYNAMIC_IMAGE_SIZE, DYNAMIC_DIMS, INSERT_OP_FILE, + OP_PRECISION_MODE, PRECISION_MODE, TUNE_DEVICE_IDS, EXEC_DISABLE_REUSED_MEMORY, diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 614a5a858..1674c3508 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -15,7 +15,7 @@ class NPURunConfig(run_config_lib.RunConfig): op_compiler_cache_mode=None, op_compiler_cache_dir=None, debug_dir=None, hcom_multi_mode=False, dynamic_input=False, dynamic_graph_execute_mode="dynamic_execute", dynamic_inputs_shape_range=None, train_distribute=None, eval_distribute=None, local_rank_id=None, local_device_list=None, session_device_id=None, - distribute_config=None, modify_mixlist=None): + distribute_config=None, modify_mixlist=None, op_precision_mode=None): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc index e32965a09..634958502 100644 --- a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc +++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc @@ -2085,8 +2085,6 @@ Status OMPartitionSubgraphsPass::ProcessGraph(std::unique_ptr *graph, Fun break; } } - ADP_LOG(INFO) << "pass options:"; - NpuAttrs::LogOptions(pass_options); ADP_LOG(INFO) << "all options:"; NpuAttrs::LogOptions(all_options); diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index bab6797c5..68dd28e12 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -87,7 +87,8 @@ class NPURunConfig(run_config_lib.RunConfig): local_device_list=None, session_device_id=None, distribute_config=None, - modify_mixlist=None + modify_mixlist=None, + op_precision_mode=None ): """ Constructs a NPUConfig. @@ -155,6 +156,7 @@ class NPURunConfig(run_config_lib.RunConfig): local_device_list: Available devices. distribute_config: Specify the NCA configuration file path modify_mixlist: Set the path of operator mixed precision configuration file. + op_precision_mode: Set the path of operator precision mode configuration file (.ini) """ # Check iterations_per_loop. @@ -236,6 +238,7 @@ class NPURunConfig(run_config_lib.RunConfig): self._session_device_id = session_device_id self._distribute_config = distribute_config self._modify_mixlist = modify_mixlist + self._op_precision_mode = op_precision_mode super(NPURunConfig, self).__init__( model_dir=model_dir, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index eed5a44e4..2bbf1adc5 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -758,6 +758,8 @@ class NPUEstimator(estimator_lib.Estimator): custom_op.parameter_map["session_device_id"].i = config._session_device_id if config._modify_mixlist is not None: custom_op.parameter_map["modify_mixlist"].s = tf.compat.as_bytes(config._modify_mixlist) + if config._op_precision_mode is not None: + custom_op.parameter_map["op_precision_mode"].s = tf.compat.as_bytes(config._op_precision_mode) # add profiling options to custom_op self.__load_profiling_options(config, custom_op) diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 0e87e5d47..4fca7f5f0 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -317,6 +317,7 @@ std::map NpuAttrs::GetSessOptions(OpKernelConstruction std::string dynamic_node_type; std::string session_device_id; std::string modify_mixlist; + std::string op_precision_mode; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); @@ -367,6 +368,7 @@ std::map NpuAttrs::GetSessOptions(OpKernelConstruction ctx->GetAttr("_dynamic_node_type", &dynamic_node_type); ctx->GetAttr("_session_device_id", &session_device_id); ctx->GetAttr("_modify_mixlist", &modify_mixlist); + ctx->GetAttr("_op_precision_mode", &op_precision_mode); } // session options @@ -396,6 +398,7 @@ std::map NpuAttrs::GetSessOptions(OpKernelConstruction sess_options["ge.session_device_id"] = session_device_id; } sess_options[ge::MODIFY_MIXLIST] = modify_mixlist; + sess_options["ge.exec.op_precision_mode"] = op_precision_mode; return sess_options; } @@ -768,6 +771,7 @@ std::map NpuAttrs::GetAllAttrOptions(AttrSlice attrs) std::string hcom_multi_mode; std::string session_device_id; std::string modify_mixlist; + std::string op_precision_mode; if (attrs.Find("_NpuOptimizer") != nullptr) { do_npu_optimizer = std::to_string(true); @@ -904,6 +908,9 @@ std::map NpuAttrs::GetAllAttrOptions(AttrSlice attrs) if (attrs.Find("_modify_mixlist") != nullptr) { modify_mixlist = attrs.Find("_modify_mixlist")->s(); } + if (attrs.Find("_op_precision_mode") != nullptr) { + op_precision_mode = attrs.Find("_op_precision_mode")->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -960,6 +967,7 @@ std::map NpuAttrs::GetAllAttrOptions(AttrSlice attrs) all_options["hcom_multi_mode"] = hcom_multi_mode; all_options["session_device_id"] = session_device_id; all_options["modify_mixlist"] = modify_mixlist; + all_options["op_precision_mode"] = op_precision_mode; return all_options; } @@ -1037,6 +1045,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options bool hcom_multi_mode = false; int session_device_id = -1; std::string modify_mixlist; + std::string op_precision_mode; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { @@ -1293,6 +1302,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options return errors::Internal("modify_mixlist is assigned, please ensure that precision_mode is assigned to 'allow_mix_precision'."); } } + if (params.count("op_precision_mode")) { + op_precision_mode = params.at("op_precision_mode").s(); + } } } @@ -1323,6 +1335,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["hcom_multi_mode"] = std::to_string(hcom_multi_mode); sess_options["session_device_id"] = std::to_string(session_device_id); sess_options["modify_mixlist"] = modify_mixlist; + sess_options["op_precision_mode"] = op_precision_mode; init_options["precision_mode"] = precision_mode; init_options["profiling_mode"] = std::to_string(profiling_mode); -- Gitee From 5722a74d47e5e5f4c8d9f62dadd4e5cdad03f3f8 Mon Sep 17 00:00:00 2001 From: medivh-x Date: Wed, 23 Jun 2021 11:52:25 +0800 Subject: [PATCH 07/21] support modify_mixlist --- tf_adapter_2.x/npu_device/core/npu_wrapper.cpp | 1 + tf_adapter_2.x/python/npu_device/configs/npu_config.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 2fe4f3a5d..c4441d04d 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -72,6 +72,7 @@ const std::map kConfigurableOptions = { {"is_tailing_optimization", ge::OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION}, {"op_debug_level", ge::OP_DEBUG_LEVEL}, {"debug_dir", ge::DEBUG_DIR}, + {"modify_mixlist", ge::MODIFY_MIXLIST}, {"enable_exception_dump", ge::OPTION_EXEC_ENABLE_EXCEPTION_DUMP}, {"enable_dump", ge::OPTION_EXEC_ENABLE_DUMP}, {"dump_path", ge::OPTION_EXEC_DUMP_PATH}, diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 35e286fe0..1e2b0c54d 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -27,6 +27,7 @@ class NpuConfig(NpuBaseConfig): self.is_tailing_optimization = OptionValue(False, [True, False]) self.op_debug_level = OptionValue(0, [0, 1, 2, 3]) self.debug_dir = OptionValue(None, None) + self.modify_mixlist = OptionValue(None, None) self.enable_exception_dump = OptionValue(0, [0, 1]) self.dump_config = NpuDumpConfig() self.profiling_config = NpuProfilingConfig() -- Gitee From bc4c9faf2739112b6ceb47f8600829e042ce24d1 Mon Sep 17 00:00:00 2001 From: huanruizhi Date: Wed, 23 Jun 2021 14:35:23 +0800 Subject: [PATCH 08/21] convert tool bugfix --- convert_tf2npu/ast_impl.py | 87 ++++++++++++++++++--------------- convert_tf2npu/conver_by_ast.py | 7 ++- convert_tf2npu/mappings/ast.py | 8 +-- convert_tf2npu/util.py | 2 +- 4 files changed, 55 insertions(+), 49 deletions(-) diff --git a/convert_tf2npu/ast_impl.py b/convert_tf2npu/ast_impl.py index ee5d2d191..91df4c194 100644 --- a/convert_tf2npu/ast_impl.py +++ b/convert_tf2npu/ast_impl.py @@ -81,19 +81,18 @@ def ast_if(node): args=[], keywords=[])) node.body = [keras_sess_assign] + node.body + [ast.Expr(value=close_sess_call)] util_global.set_value('need_conver', True) - if util_global.get_value("has_hccl_api", False): - log_msg(getattr(node, "lineno", "None"), " add npu resource init api") - close_sess_call = ast.Call(func=ast.Name(id="close_session", ctx=ast.Load()), - args=[ast.Name(id="npu_sess", ctx=ast.Load())], keywords=[]) - init_assign = ast.Assign(targets=[ast.Tuple(elts=[ast.Name(id="npu_sess", ctx=ast.Store()), - ast.Name(id="npu_shutdown", ctx=ast.Store())], - ctx=ast.Store())], - value=ast.Call(func=ast.Name(id="init_resource", ctx=ast.Load()), args=[], keywords=[])) - shutdown_call = ast.Call(func=ast.Name(id="shutdown_resource", ctx=ast.Load()), - args=[ast.Name(id="npu_sess", ctx=ast.Load()), ast.Name(id="npu_shutdown", ctx=ast.Load())], - keywords=[]) - node.body = [init_assign] + node.body + [ast.Expr(value=shutdown_call), ast.Expr(value=close_sess_call)] - util_global.set_value('need_conver', True) + log_msg(getattr(node, "lineno", "None"), " add npu resource init api") + close_sess_call = ast.Call(func=ast.Name(id="close_session", ctx=ast.Load()), + args=[ast.Name(id="npu_sess", ctx=ast.Load())], keywords=[]) + init_assign = ast.Assign(targets=[ast.Tuple(elts=[ast.Name(id="npu_sess", ctx=ast.Store()), + ast.Name(id="npu_shutdown", ctx=ast.Store())], + ctx=ast.Store())], + value=ast.Call(func=ast.Name(id="init_resource", ctx=ast.Load()), args=[], keywords=[])) + shutdown_call = ast.Call(func=ast.Name(id="shutdown_resource", ctx=ast.Load()), + args=[ast.Name(id="npu_sess", ctx=ast.Load()), ast.Name(id="npu_shutdown", ctx=ast.Load())], + keywords=[]) + node.body = [init_assign] + node.body + [ast.Expr(value=shutdown_call), ast.Expr(value=close_sess_call)] + util_global.set_value('need_conver', True) return node def convert_loss_scale_api(node): @@ -293,13 +292,20 @@ def ast_call(node): util_global.set_value('need_conver', True) return node if isinstance(node.func, ast.Attribute) and node.func.attr == "DistributedOptimizer": - log_success_report(getattr(node, "lineno", "None"), 'DistributedOptimizer') - return node.args[0] + log_msg(getattr(node, "lineno", "None"), 'change hvd.DistributedOptimizer to the input key optimzier') + opt_keyword = None + for keyword in node.keywords: + if keyword.arg == "optimizer": + opt_keyword = keyword + if opt_keyword is None: + return node.args[0] + else: + return opt_keyword.value if isinstance(node.func, ast.Attribute) and node.func.attr == 'shard': log_success_report(getattr(node, "lineno", "None"), 'shard') - node.args = [ast.Call(func=ast.Name(id='get_rank_size', ctx=ast.Load()), args=[], keywords=[]), - ast.Call(func=ast.Name(id='get_rank_id', ctx=ast.Load()), args=[], keywords=[])] - util_global.set_value("has_hccl_api", True) + node.args = [pasta.parse("int(os.getenv('RANK_SIZE', '1'))"), + pasta.parse("int(os.getenv('RANK_ID', '0'))")] + node.keywords.clear() util_global.set_value('need_conver', True) if isinstance(node.func, ast.Attribute) and node.func.attr == 'dropout': if isinstance(node.func.value, ast.Attribute) and node.func.value.attr == 'nn': @@ -315,6 +321,9 @@ def ast_call(node): for keyword in node.keywords: if keyword.arg != 'rate': keywords_new.append(keyword) + else: + keywords_new.append(ast.keyword(arg='keep_prob', value=ast.BinOp(left=ast.Num(n=1), op=ast.Sub(), + right=keyword.value))) node.keywords = keywords_new util_global.set_value('need_conver', True) if isinstance(node.func, ast.Attribute) and ((node.func.attr == 'map_and_batch') or (node.func.attr == 'batch' \ @@ -547,32 +556,30 @@ def remove_hvd_import(r_node): n = 0 lenline = len(r_node.body) - while n < lenline and not isinstance(r_node.body[n], ast.ImportFrom) and not isinstance(r_node.body[n], ast.Import): - n += 1 - - while n < lenline and (isinstance(r_node.body[n], ast.ImportFrom) or isinstance(r_node.body[n], ast.Import)): - if isinstance(r_node.body[n], ast.ImportFrom): - if r_node.body[n].module != None: - values = r_node.body[n].module.split(".") - if "horovod" in values: - log_msg(getattr(r_node.body[n], "lineno", "None"), " remove hvd import.") - r_node.body.pop(n) - lenline -= 1 - for value in r_node.body[n].names: - if isinstance(value, ast.alias): - values = value.name.split(".") - if "horovod" in values: - log_msg(getattr(r_node.body[n], "lineno", "None"), " remove hvd import.") - r_node.body.pop(n) - lenline -= 1 - elif isinstance(r_node.body[n], ast.Import): - for value in r_node.body[n].names: - if isinstance(value, ast.alias): - values = value.name.split(".") + while n < lenline: + if isinstance(r_node.body[n], ast.ImportFrom) or isinstance(r_node.body[n], ast.Import): + if isinstance(r_node.body[n], ast.ImportFrom): + if r_node.body[n].module != None: + values = r_node.body[n].module.split(".") if "horovod" in values: log_msg(getattr(r_node.body[n], "lineno", "None"), " remove hvd import.") r_node.body.pop(n) lenline -= 1 + for value in r_node.body[n].names: + if isinstance(value, ast.alias): + values = value.name.split(".") + if "horovod" in values: + log_msg(getattr(r_node.body[n], "lineno", "None"), " remove hvd import.") + r_node.body.pop(n) + lenline -= 1 + elif isinstance(r_node.body[n], ast.Import): + for value in r_node.body[n].names: + if isinstance(value, ast.alias): + values = value.name.split(".") + if "horovod" in values: + log_msg(getattr(r_node.body[n], "lineno", "None"), " remove hvd import.") + r_node.body.pop(n) + lenline -= 1 n += 1 def insert_npu_import(r_node): diff --git a/convert_tf2npu/conver_by_ast.py b/convert_tf2npu/conver_by_ast.py index b8c2a1dde..0d70f904b 100644 --- a/convert_tf2npu/conver_by_ast.py +++ b/convert_tf2npu/conver_by_ast.py @@ -101,12 +101,11 @@ def conver_ast(path, out_path_dst, file_name): insert_npu_import(r_node) if not util_global.get_value('has_main_func', False) and (util_global.get_value('has_hccl_api', False) or util_global.get_value('is_keras_net', False)): - log_warning('the network of keras and horovod, or using dataset.shard script do not have main func, ' + log_warning('the network of keras and horovod script do not have main func, ' 'should set -m or --main parameter') - if util_global.get_value('has_main_func', False) and util_global.get_value('has_hccl_api', False): - remove_hvd_import(r_node) - if util_global.get_value('is_main_file', False) and util_global.get_value('has_hccl_api', False): + if util_global.get_value('has_hccl_api', False): remove_hvd_import(r_node) + if util_global.get_value('is_main_file', False): insert_npu_resource_init(r_node) insert_npu_resource_shutdown(r_node) if util_global.get_value('is_main_file', False) and util_global.get_value('is_keras_net', False): diff --git a/convert_tf2npu/mappings/ast.py b/convert_tf2npu/mappings/ast.py index f2864da5b..1d3160291 100644 --- a/convert_tf2npu/mappings/ast.py +++ b/convert_tf2npu/mappings/ast.py @@ -4,9 +4,9 @@ "dropout": ["npu_ops", "tf.nn.dropout", "npu_ops.dropout"], "init": ["print", "hvd.init", "None"], "DistributedOptimizer": ["NPUDistributedOptimizer", "hvd.DistributedOptimizer", "NPUDistributedOptimizer"], -"rank": ["get_npu_rank_id", "hvd.rank", "get_npu_rank_id"], -"local_rank": ["get_npu_local_rank_id", "hvd.local_rank", "get_npu_local_rank_id"], -"size": ["get_npu_rank_size", "hvd.size", "get_npu_rank_size"], +"rank": ["get_npu_rank_id", "hvd.rank", "get_npu_rank_id"], +"local_rank": ["get_npu_local_rank_id", "hvd.local_rank", "get_npu_local_rank_id"], +"size": ["get_npu_rank_size", "hvd.size", "get_npu_rank_size"], "BroadcastGlobalVariablesHook": ["print", "hvd.BroadcastGlobalVariablesHook", "None"], "shard": ["", "dataset.shard(xxx, xxx)", "dataset.shard(get_rank_size(), get_rank_id())"], "EstimatorSpec": ["NPUEstimatorSpec", "tf.estimator.EstimatorSpec", "NPUEstimatorSpec"], @@ -49,7 +49,7 @@ "DNNLinearCombinedClassifier", "DNNLinearCombinedEstimator", "DNNLinearCombinedRegressor", "LinearClassifier", "LinearEstimator", "LinearRegressor"], "EstimatorFunc": ["train"], -"Session()": ["", "*.Session()", "*.Session(config=npu_session_config_init())"], +"Session()": ["", "*.*Session()", "*.*Session(config=npu_session_config_init())"], "ConfigProto()": ["", "*.ConfigProto()", "npu_config_proto(config_proto=*.ConfigProto())"], "GraphOptions()": ["", "*.GraphOptions()", "npu_graph_options(graph_options=*.GraphOptions())"], "OptimizerOptions()": ["", "*.OptimizerOptions()", "npu_optimizer_options(optimizer_options=*.OptimizerOptions())"], diff --git a/convert_tf2npu/util.py b/convert_tf2npu/util.py index 1034e227a..190afe51e 100644 --- a/convert_tf2npu/util.py +++ b/convert_tf2npu/util.py @@ -28,7 +28,7 @@ def log_info(lineno, msg, file): write_conver_report(content, file) def log_warning(msg): - content = "************" + msg + "************" + content = "WARNING:" + msg print(content) write_conver_report(content, util_global.get_value('report_file')[0]) -- Gitee From 14b67d9ba455f1ff1a8bd19dd40b45791a4ba7ae Mon Sep 17 00:00:00 2001 From: panghongjun Date: Thu, 24 Jun 2021 09:48:36 +0800 Subject: [PATCH 09/21] error message --- tf_adapter/kernels/geop_npu.cc | 26 +- .../kernels/pbtxt/geop_dynamic_config.pbtxt | 527 +++++++++++++++++ .../ut/kernels/pbtxt/geop_output_error.pbtxt | 548 ++++++++++++++++++ .../ut/kernels/testcase/geop_npu_test.cc | 14 +- 4 files changed, 1104 insertions(+), 11 deletions(-) create mode 100644 tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_config.pbtxt create mode 100644 tf_adapter/tests/ut/kernels/pbtxt/geop_output_error.pbtxt diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index f53502290..6ae91409f 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -528,7 +528,10 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { bool is_lazy_recompile_mode = dynamic_input_ == "1" && dynamic_graph_execute_mode_ == "lazy_recompile"; if (is_set_dynamic_config && is_tuning) { ADP_LOG(FATAL) << "dynamic input config can not use with mstuning."; - LOG(FATAL) << "dynamic input config can not use with mstuning."; + std::stringstream ss; + ss << "dynamic input config can not use with mstuning."; + OP_REQUIRES_ASYNC(ctx, false, errors::Internal(ss.str()), done); + return; } else if (is_set_dynamic_config && !is_tuning) { if (InitRebuildFlag(cache_graph_id) != 0) { OP_REQUIRES_ASYNC(ctx, false, errors::Internal("Failed to check rebuild flag"), done); @@ -731,7 +734,6 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { << ", graph id: " << cache_graph_id << std::endl << "Error Message is : " << std::endl << error_message; - LOG(FATAL) << ss.str(); OP_REQUIRES_ASYNC(ctx, status == ge::SUCCESS, errors::Unavailable(ss.str()), done); } else { add_graph_flag_ = true; @@ -781,21 +783,28 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { if (ge_status == ge::SUCCESS) { if (BuildOutputTensorInfo(ctx, outputs) != Status::OK()) { ADP_LOG(FATAL) << ctx->op_kernel().name() << " GEOP::DoRunAsync get output failed."; - LOG(FATAL) << ctx->op_kernel().name() << " GEOP::DoRunAsync get output failed."; + std::string error_message = ge::GEGetErrorMsg(); + std::stringstream ss; + ss << ctx->op_kernel().name() + << "GEOP::DoRunAsync get output failed." << std::endl + << "Error Message is : " << std::endl + << error_message; + OP_REQUIRES_ASYNC(ctx, false, errors::Internal(ss.str()), done); + return; } } else if (ge_status == ge::END_OF_SEQUENCE) { ctx->SetStatus(errors::OutOfRange("End of sequence")); ADP_LOG(WARNING) << "[GEOP] Out of range: End of sequence."; LOG(WARNING) << "[GEOP] Out of range: End of sequence."; } else if (ge_status != ge::SUCCESS) { - tensorflow::Status tfStatus = errors::Unavailable(ToString(ge_status)); - ctx->CtxFailureWithWarning(tfStatus); std::this_thread::sleep_for(std::chrono::milliseconds(kFatalSleepTime)); ADP_LOG(FATAL) << ctx->op_kernel().name() << "GEOP::::DoRunAsync Failed"; std::string error_message = ge::GEGetErrorMsg(); - LOG(FATAL) << ctx->op_kernel().name() << "GEOP::::DoRunAsync Failed" << std::endl - << "Error Message is : " << std::endl - << error_message; + std::stringstream ss; + ss << ctx->op_kernel().name() << "GEOP::::DoRunAsync Failed" << std::endl + << "Error Message is : " << std::endl << error_message; + OP_REQUIRES_ASYNC(ctx, false, errors::Internal(ss.str()), done); + return; } int64 run_end_time = InferShapeUtil::GetCurrentTimestap(); ADP_LOG(INFO) << "[GEOP] RunGraphAsync callback, status:" << ge_status << ", kernel_name:" @@ -818,7 +827,6 @@ void GeOp::ComputeAsync(OpKernelContext *ctx, DoneCallback done) { << ", graph id: " << cache_graph_id << std::endl << "Error Message is : " << std::endl << error_message; - LOG(FATAL) << ss.str(); OP_REQUIRES_ASYNC(ctx, status == ge::SUCCESS, errors::Unavailable(ss.str()), done); } diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_config.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_config.pbtxt new file mode 100644 index 000000000..b84ef13c0 --- /dev/null +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_config.pbtxt @@ -0,0 +1,527 @@ +node { + name: "retval_Add1_0_0" + op: "_Retval" + input: "GeOp61_0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "index" + value { + i: 0 + } + } +} +node { + name: "GeOp61_0" + op: "GeOp" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_FLOAT + } + } + } + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_auto_tune_mode" + value { + s: "" + } + } + attr { + key: "_buffer_optimize" + value { + s: "l2_optimize" + } + } + attr { + key: "_compress_weight_conf" + value { + s: "" + } + } + attr { + key: "_debug_dir" + value { + s: "" + } + } + attr { + key: "_distribute_config" + value { + s: "" + } + } + attr { + key: "_do_npu_optimizer" + value { + s: "1" + } + } + attr { + key: "_dump_debug_mode" + value { + s: "all" + } + } + attr { + key: "_dump_mode" + value { + s: "output" + } + } + attr { + key: "_dump_path" + value { + s: "./" + } + } + attr { + key: "_dump_step" + value { + s: "1" + } + } + attr { + key: "_dynamic_dims" + value { + s: "1,128;3,128;5,128" + } + } + attr { + key: "_dynamic_graph_execute_mode" + value { + s: "lazy_recompile" + } + } + attr { + key: "_dynamic_input" + value { + s: "1" + } + } + attr { + key: "_dynamic_node_type" + value { + s: "" + } + } + attr { + key: "_enable_compress_weight" + value { + s: "0" + } + } + attr { + key: "_enable_data_pre_proc" + value { + s: "0" + } + } + attr { + key: "_enable_dump" + value { + s: "1" + } + } + attr { + key: "_enable_dump_debug" + value { + s: "1" + } + } + attr { + key: "_enable_exception_dump" + value { + s: "" + } + } + attr { + key: "_enable_scope_fusion_passes" + value { + s: "" + } + } + attr { + key: "_enable_small_channel" + value { + s: "0" + } + } + attr { + key: "_fusion_switch_file" + value { + s: "" + } + } + attr { + key: "_graph_run_mode" + value { + s: "1" + } + } + attr { + key: "_hcom_multi_mode" + value { + s: "" + } + } + attr { + key: "_hcom_parallel" + value { + s: "0" + } + } + attr { + key: "_in_out_pair" + value { + s: "" + } + } + attr { + key: "_in_out_pair_flag" + value { + s: "1" + } + } + attr { + key: "_input_shape" + value { + s: "getnext:-1,-1" + } + } + attr { + key: "_is_tailing_optimization" + value { + s: "0" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_local_device_list" + value { + s: "" + } + } + attr { + key: "_local_rank_id" + value { + s: "-1" + } + } + attr { + key: "_lower_functional_ops" + value { + s: "0" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_mstune_mode" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_dir" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_mode" + value { + s: "" + } + } + attr { + key: "_op_debug_level" + value { + s: "0" + } + } + attr { + key: "_op_select_implmode" + value { + s: "" + } + } + attr { + key: "_op_tune_mode" + value { + s: "" + } + } + attr { + key: "_optypelist_for_implmode" + value { + s: "" + } + } + attr { + key: "_precision_mode" + value { + s: "" + } + } + attr { + key: "_profiling_mode" + value { + s: "0" + } + } + attr { + key: "_profiling_options" + value { + s: "" + } + } + attr { + key: "_session_device_id" + value { + s: "" + } + } + attr { + key: "_stream_max_parallel_num" + value { + s: "" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "_variable_format_optimize" + value { + s: "1" + } + } + attr { + key: "_work_path" + value { + s: "/home/ascend" + } + } + attr { + key: "_aoe_mode" + value { + s: "2" + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } + attr { + key: "function" + value { + func { + name: "GeOp61_0" + } + } + } +} +library { + function { + signature { + name: "GeOp61_0" + output_arg { + name: "Add1_0_retval" + type: DT_FLOAT + } + } + node_def { + name: "Const_1" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000 A\000\000 A" + } + } + } + } + node_def { + name: "Variable" + op: "VariableV2" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_class" + value { + list { + s: "loc:@Variable/read" + } + } + } + attr { + key: "_var_format" + value { + s: "4D" + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } + } + node_def { + name: "Variable/read" + op: "Identity" + input: "Variable:ref:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_var_format" + value { + s: "4D" + } + } + } + node_def { + name: "Add1" + op: "Add" + input: "Const_1:output:0" + input: "Variable/read:output:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + ret { + key: "Add1_0_retval" + value: "Add1:z:0" + } + } +} +versions { + producer: 134 +} diff --git a/tf_adapter/tests/ut/kernels/pbtxt/geop_output_error.pbtxt b/tf_adapter/tests/ut/kernels/pbtxt/geop_output_error.pbtxt new file mode 100644 index 000000000..da7b36e42 --- /dev/null +++ b/tf_adapter/tests/ut/kernels/pbtxt/geop_output_error.pbtxt @@ -0,0 +1,548 @@ +node { + name: "retval_Add_0_0" + op: "_Retval" + input: "GeOp51_0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "index" + value { + i: 0 + } + } +} +node { + name: "retval_Add_1_0" + op: "_Retval" + input: "GeOp51_0:1" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "index" + value { + i: 0 + } + } +} +node { + name: "GeOp51_0" + op: "GeOp" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "Tin" + value { + list { + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_FLOAT + type: DT_FLOAT + } + } + } + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_auto_tune_mode" + value { + s: "" + } + } + attr { + key: "_buffer_optimize" + value { + s: "l2_optimize" + } + } + attr { + key: "_compress_weight_conf" + value { + s: "" + } + } + attr { + key: "_debug_dir" + value { + s: "" + } + } + attr { + key: "_distribute_config" + value { + s: "" + } + } + attr { + key: "_do_npu_optimizer" + value { + s: "1" + } + } + attr { + key: "_dump_debug_mode" + value { + s: "all" + } + } + attr { + key: "_dump_mode" + value { + s: "output" + } + } + attr { + key: "_dump_path" + value { + s: "./" + } + } + attr { + key: "_dump_step" + value { + s: "1" + } + } + attr { + key: "_dynamic_dims" + value { + s: "" + } + } + attr { + key: "_dynamic_graph_execute_mode" + value { + s: "lazy_recompile" + } + } + attr { + key: "_dynamic_input" + value { + s: "0" + } + } + attr { + key: "_dynamic_node_type" + value { + s: "" + } + } + attr { + key: "_enable_compress_weight" + value { + s: "0" + } + } + attr { + key: "_enable_data_pre_proc" + value { + s: "0" + } + } + attr { + key: "_enable_dump" + value { + s: "1" + } + } + attr { + key: "_enable_dump_debug" + value { + s: "1" + } + } + attr { + key: "_enable_exception_dump" + value { + s: "" + } + } + attr { + key: "_enable_scope_fusion_passes" + value { + s: "" + } + } + attr { + key: "_enable_small_channel" + value { + s: "0" + } + } + attr { + key: "_fusion_switch_file" + value { + s: "" + } + } + attr { + key: "_graph_run_mode" + value { + s: "1" + } + } + attr { + key: "_hcom_multi_mode" + value { + s: "" + } + } + attr { + key: "_hcom_parallel" + value { + s: "0" + } + } + attr { + key: "_in_out_pair" + value { + s: "" + } + } + attr { + key: "_in_out_pair_flag" + value { + s: "1" + } + } + attr { + key: "_input_shape" + value { + s: "" + } + } + attr { + key: "_is_tailing_optimization" + value { + s: "0" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_local_device_list" + value { + s: "" + } + } + attr { + key: "_local_rank_id" + value { + s: "-1" + } + } + attr { + key: "_lower_functional_ops" + value { + s: "0" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_mstune_mode" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_dir" + value { + s: "" + } + } + attr { + key: "_op_compiler_cache_mode" + value { + s: "" + } + } + attr { + key: "_op_debug_level" + value { + s: "0" + } + } + attr { + key: "_op_select_implmode" + value { + s: "" + } + } + attr { + key: "_op_tune_mode" + value { + s: "" + } + } + attr { + key: "_optypelist_for_implmode" + value { + s: "" + } + } + attr { + key: "_precision_mode" + value { + s: "" + } + } + attr { + key: "_profiling_mode" + value { + s: "0" + } + } + attr { + key: "_profiling_options" + value { + s: "" + } + } + attr { + key: "_session_device_id" + value { + s: "" + } + } + attr { + key: "_stream_max_parallel_num" + value { + s: "" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "_variable_format_optimize" + value { + s: "1" + } + } + attr { + key: "_work_path" + value { + s: "" + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } + attr { + key: "function" + value { + func { + name: "GeOp51_0" + } + } + } +} +library { + function { + signature { + name: "GeOp51_0" + output_arg { + name: "Add_0_retval" + type: DT_FLOAT + } + output_arg { + name: "Add_1_retval" + type: DT_FLOAT + } + } + node_def { + name: "Const_1" + op: "Const" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_NpuOptimizer" + value { + s: "NpuOptimizer" + } + } + attr { + key: "_iterations_per_loop" + value { + s: "1" + } + } + attr { + key: "_job" + value { + s: "localhost" + } + } + attr { + key: "_mix_compile_mode" + value { + s: "0" + } + } + attr { + key: "_task_index" + value { + s: "0" + } + } + attr { + key: "_use_off_line" + value { + s: "1" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000 A\000\000 A" + } + } + } + } + node_def { + name: "Variable" + op: "VariableV2" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "_class" + value { + list { + s: "loc:@Variable/read" + } + } + } + attr { + key: "_var_format" + value { + s: "4D" + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } + } + node_def { + name: "Variable/read" + op: "Identity" + input: "Variable:ref:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_var_format" + value { + s: "4D" + } + } + } + node_def { + name: "Add" + op: "Add" + input: "Const_1:output:0" + input: "Variable/read:output:0" + device: "/job:localhost/replica:0/task:0/device:CPU:0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + } + ret { + key: "Add_0_retval" + value: "Add:z:0" + } + ret { + key: "Add_1_retval" + value: "Add:z:0" + } + } +} +versions { + producer: 134 +} diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc index 727b71d3e..3d49a063c 100644 --- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc @@ -88,11 +88,9 @@ Status GeOpRunGraphAsync(std::string example_path, gtl::InlinedVector(¶ms); AsyncOpKernel::DoneCallback done = []() { LOG(INFO) << "DONE DoneCallback"; }; async_op->ComputeAsync(ctx.get(), done); - EXPECT_EQ(ctx->status().ok(), true); if (!only_run_once) { auto ctx1 = absl::make_unique(¶ms); async_op->ComputeAsync(ctx1.get(), done); - EXPECT_EQ(ctx1->status().ok(), true); } } } @@ -105,6 +103,18 @@ TEST_F(GeOpTest, GeOpFuncTest) { gtl::InlinedVector inputs; EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp1_0").ok()); } +TEST_F(GeOpTest, GeDynamicConfigError) { + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_config.pbtxt"; + gtl::InlinedVector inputs; + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp61_0").ok()); +} +TEST_F(GeOpTest, GeOpOutputError) { + NodeDef node_def; + std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_output_error.pbtxt"; + gtl::InlinedVector inputs; + EXPECT_TRUE(GeOpRunGraphAsync(graph_def_path, inputs, node_def, "GeOp51_0").ok()); +} TEST_F(GeOpTest, GeOpVarInitGraphTest) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_var_init_graph.pbtxt"; -- Gitee From 4fed9336a76559417ad950aecd26c380ce3e943d Mon Sep 17 00:00:00 2001 From: yuxing Date: Thu, 24 Jun 2021 21:41:40 +0800 Subject: [PATCH 10/21] add fusion_switch_file to init --- tf_adapter/util/ge_plugin.cc | 2 ++ tf_adapter/util/npu_attrs.cc | 3 +++ 2 files changed, 5 insertions(+) diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index a369fbef1..a74f3be57 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -212,6 +212,8 @@ void GePlugin::Init(std::map &init_options, bool is_gl << ", work path : " << init_options["ge.tuningPath"] << ", distribute_config : " << init_options["distribute_config"]; + ADP_LOG(INFO) << "[GePlugin] fusion_switch_file :" << init_options["ge.fusionSwitchFile"]; + const char *tdt_uninit_env = std::getenv("ASCEND_TDT_UNINIT"); bool tdt_init = true; if (tdt_uninit_env != nullptr && std::atoi(tdt_uninit_env) == 1) { diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 4fca7f5f0..229f6e2db 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -438,6 +438,7 @@ std::map NpuAttrs::GetInitOptions(OpKernelConstruction std::string work_path; std::string distribute_config; std::string modify_mixlist; + std::string fusion_switch_file; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { ctx->GetAttr("_precision_mode", &precision_mode); @@ -456,6 +457,7 @@ std::map NpuAttrs::GetInitOptions(OpKernelConstruction ctx->GetAttr("_hcom_multi_mode", &hcom_multi_mode); ctx->GetAttr("_distribute_config", &distribute_config); ctx->GetAttr("_modify_mixlist", &modify_mixlist); + ctx->GetAttr("_fusion_switch_file", &fusion_switch_file); } @@ -479,6 +481,7 @@ std::map NpuAttrs::GetInitOptions(OpKernelConstruction init_options["ge.debugDir"] = debug_dir; init_options["ge.hcomMultiMode"] = hcom_multi_mode; init_options[ge::MODIFY_MIXLIST] = modify_mixlist; + init_options["ge.fusionSwitchFile"] = fusion_switch_file; return init_options; } -- Gitee From 43e7f7b926cb91a089b5fa2c45a38fdc78395884 Mon Sep 17 00:00:00 2001 From: medivh-x Date: Fri, 25 Jun 2021 10:58:45 +0800 Subject: [PATCH 11/21] add default value for hcom ops --- tf_adapter_2.x/python/npu_device/distribute/hccl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tf_adapter_2.x/python/npu_device/distribute/hccl.py b/tf_adapter_2.x/python/npu_device/distribute/hccl.py index 4b32864f9..63f44b0ca 100644 --- a/tf_adapter_2.x/python/npu_device/distribute/hccl.py +++ b/tf_adapter_2.x/python/npu_device/distribute/hccl.py @@ -46,7 +46,7 @@ def _all_reduce(values, reduction, fusion, fusion_id, group): return reduced_values -def all_reduce(values, reduction, fusion=1, fusion_id=-1, group="hccl_world_group"): +def all_reduce(values, reduction="mean", fusion=1, fusion_id=-1, group="hccl_world_group"): if global_npu_ctx() is None or not global_npu_ctx().is_cluster_worker(): logging.info("Skip all reduce as current process is not npu cluster worker") return values @@ -63,7 +63,7 @@ def _broadcast(values, root_rank, fusion, fusion_id, group): value.assign(hccl_ops.broadcast([value], root_rank, fusion, fusion_id, group)[0]) -def broadcast(values, root_rank, fusion=2, fusion_id=0, group="hccl_world_group"): +def broadcast(values, root_rank=0, fusion=2, fusion_id=0, group="hccl_world_group"): if global_npu_ctx() is None or not global_npu_ctx().is_cluster_worker(): logging.info("Skip broadcast as current process is not npu cluster worker") return -- Gitee From d4041b4917c8e76c98ac3af512f454e07b0f7638 Mon Sep 17 00:00:00 2001 From: wangkai Date: Fri, 25 Jun 2021 15:57:37 +0800 Subject: [PATCH 12/21] add soft_dp header target Signed-off-by: wangkai --- CMakeLists.txt | 2 +- tf_adapter/kernels/npu_sys_ctl_ops.cc | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ba8fd5ffb..c7054676f 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -140,7 +140,6 @@ else() ${TOP_DIR}/inc/ ${TOP_DIR}/inc/external/ ${TOP_DIR}/inc/common/ - ${TOP_DIR}/inc/soft_dp/ ${TOP_DIR}/soft_dp/ ${TOP_DIR}/ace/execfwk/soft_dp/ ${TOP_DIR}/graphengine/inc/ @@ -174,6 +173,7 @@ else() $ $ $ + $ -Wl,--no-as-needed c_sec ge_runner diff --git a/tf_adapter/kernels/npu_sys_ctl_ops.cc b/tf_adapter/kernels/npu_sys_ctl_ops.cc index 3fd031fdc..1d262d689 100644 --- a/tf_adapter/kernels/npu_sys_ctl_ops.cc +++ b/tf_adapter/kernels/npu_sys_ctl_ops.cc @@ -38,7 +38,6 @@ limitations under the License. #include "framework/omg/parser/parser_api.h" #include "ge/ge_api.h" #include "ge/ge_api_types.h" -#include "hccl/hcom.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/platform/mutex.h" #include "tf_adapter/common/adp_logger.h" -- Gitee From 0c4d920f8a9dea2d8100611578f9a5908776566b Mon Sep 17 00:00:00 2001 From: huanruizhi Date: Mon, 21 Jun 2021 19:58:44 +0800 Subject: [PATCH 13/21] add graph_run_mode session options --- tf_adapter/util/npu_attrs.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 4fca7f5f0..8e182a41f 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -318,6 +318,7 @@ std::map NpuAttrs::GetSessOptions(OpKernelConstruction std::string session_device_id; std::string modify_mixlist; std::string op_precision_mode; + std::string graph_run_mode = "1"; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); @@ -369,6 +370,7 @@ std::map NpuAttrs::GetSessOptions(OpKernelConstruction ctx->GetAttr("_session_device_id", &session_device_id); ctx->GetAttr("_modify_mixlist", &modify_mixlist); ctx->GetAttr("_op_precision_mode", &op_precision_mode); + ctx->GetAttr("_graph_run_mode", &graph_run_mode); } // session options @@ -399,6 +401,7 @@ std::map NpuAttrs::GetSessOptions(OpKernelConstruction } sess_options[ge::MODIFY_MIXLIST] = modify_mixlist; sess_options["ge.exec.op_precision_mode"] = op_precision_mode; + sess_options[ge::OPTION_GRAPH_RUN_MODE] = graph_run_mode; return sess_options; } -- Gitee From 4441d3c785d77d906f5e621167d7b74ffba2db43 Mon Sep 17 00:00:00 2001 From: huanruizhi Date: Sat, 26 Jun 2021 15:42:05 +0800 Subject: [PATCH 14/21] remove hvd import and tests discarded lines --- convert_tf2npu/ast_impl.py | 50 +++++++++++------------------- convert_tf2npu/conver_by_ast.py | 6 ++-- tf_adapter/tests/ut/CMakeLists.txt | 5 +-- 3 files changed, 21 insertions(+), 40 deletions(-) diff --git a/convert_tf2npu/ast_impl.py b/convert_tf2npu/ast_impl.py index 65e2381c3..de87b2445 100644 --- a/convert_tf2npu/ast_impl.py +++ b/convert_tf2npu/ast_impl.py @@ -32,16 +32,26 @@ def import_from(node): if "keras" in values: util_global.set_value('is_keras_net', True) if "horovod" in values: + log_msg(getattr(node, "lineno", "None"), "remove horovod import line to None") util_global.set_value('has_hccl_api', True) + new_node = ast.Expr(value=ast.NameConstant(value=None)) + ast.copy_location(new_node, node) + util_global.set_value('need_conver', True) + return new_node for value in node.names: if isinstance(value, ast.alias): values = value.name.split(".") if "keras" in values: util_global.set_value('is_keras_net', True) if "horovod" in values: + log_msg(getattr(node, "lineno", "None"), "remove horovod import line to None") util_global.set_value('has_hccl_api', True) + new_node = ast.Expr(value=ast.NameConstant(value=None)) + ast.copy_location(new_node, node) + util_global.set_value('need_conver', True) + return new_node util_global.set_value('need_conver', True) - + return node def ast_import(node): for value in node.names: @@ -50,8 +60,14 @@ def ast_import(node): if "keras" in values: util_global.set_value('is_keras_net', True) if "horovod" in values: + log_msg(getattr(node, "lineno", "None"), "remove horovod import line to None") util_global.set_value('has_hccl_api', True) - util_global.set_value('need_conver', True) + new_node = ast.Expr(value=ast.NameConstant(value=None)) + ast.copy_location(new_node, node) + util_global.set_value('need_conver', True) + return new_node + util_global.set_value('need_conver', True) + return node def ast_function_def(node): log_success_report(getattr(node, "lineno", "None"), node.name) @@ -563,36 +579,6 @@ def _call_name_match(call_func, call_name): return (isinstance(call_func, ast.Attribute) and (call_func.attr == call_name)) or \ (isinstance(call_func, ast.Name) and (call_func.id) == call_name) -def remove_hvd_import(r_node): - n = 0 - lenline = len(r_node.body) - - while n < lenline: - if isinstance(r_node.body[n], ast.ImportFrom) or isinstance(r_node.body[n], ast.Import): - if isinstance(r_node.body[n], ast.ImportFrom): - if r_node.body[n].module != None: - values = r_node.body[n].module.split(".") - if "horovod" in values: - log_msg(getattr(r_node.body[n], "lineno", "None"), " remove hvd import.") - r_node.body.pop(n) - lenline -= 1 - for value in r_node.body[n].names: - if isinstance(value, ast.alias): - values = value.name.split(".") - if "horovod" in values: - log_msg(getattr(r_node.body[n], "lineno", "None"), " remove hvd import.") - r_node.body.pop(n) - lenline -= 1 - elif isinstance(r_node.body[n], ast.Import): - for value in r_node.body[n].names: - if isinstance(value, ast.alias): - values = value.name.split(".") - if "horovod" in values: - log_msg(getattr(r_node.body[n], "lineno", "None"), " remove hvd import.") - r_node.body.pop(n) - lenline -= 1 - n += 1 - def insert_npu_import(r_node): npu_alias = ast.alias(name='*', asname=None) npu_import = ast.ImportFrom(module='npu_bridge.npu_init', names=[npu_alias], level=0) diff --git a/convert_tf2npu/conver_by_ast.py b/convert_tf2npu/conver_by_ast.py index 0d70f904b..64f93b95c 100644 --- a/convert_tf2npu/conver_by_ast.py +++ b/convert_tf2npu/conver_by_ast.py @@ -53,13 +53,13 @@ class ConverByAst(ast.NodeTransformer): return node def visit_ImportFrom(self, node): - import_from(node) self.generic_visit(node) + node = import_from(node) return node def visit_Import(self, node): - ast_import(node) self.generic_visit(node) + node = ast_import(node) return node def visit_Assign(self, node): @@ -103,8 +103,6 @@ def conver_ast(path, out_path_dst, file_name): or util_global.get_value('is_keras_net', False)): log_warning('the network of keras and horovod script do not have main func, ' 'should set -m or --main parameter') - if util_global.get_value('has_hccl_api', False): - remove_hvd_import(r_node) if util_global.get_value('is_main_file', False): insert_npu_resource_init(r_node) insert_npu_resource_shutdown(r_node) diff --git a/tf_adapter/tests/ut/CMakeLists.txt b/tf_adapter/tests/ut/CMakeLists.txt index e6142f3a6..b84900524 100644 --- a/tf_adapter/tests/ut/CMakeLists.txt +++ b/tf_adapter/tests/ut/CMakeLists.txt @@ -31,9 +31,6 @@ file(GLOB_RECURSE UT_SOURCES add_executable(tfadapter_utest "main.cc" ${UT_SOURCES} - #${TFADAPTER_DIR}/tf_adapter/kernels/geop_npu.cc - #${TFADAPTER_DIR}/tf_adapter/kernels/infeed_outfeed_ops.cc - #${TFADAPTER_DIR}/tf_adapter/kernels/npu_sys_ctl_ops.cc ) target_include_directories(tfadapter_utest PRIVATE @@ -57,7 +54,7 @@ foreach (UT_LINK_FLAG ${UT_LINK_FLAGS}) endforeach (UT_LINK_FLAG) string(STRIP ${PYTHON_LIB_PATH} PYTHON_LIB_PATH) -message("hrz python lib path------------${PYTHON_LIB_PATH}") +message("python lib path ${PYTHON_LIB_PATH}") add_dependencies(tfadapter_utest aoe_tuning) -- Gitee From 5bf5742269e95d01e368c9311571a875199d4ce2 Mon Sep 17 00:00:00 2001 From: huanruizhi Date: Mon, 28 Jun 2021 21:07:50 +0800 Subject: [PATCH 15/21] fix unique_ptr bug --- tf_adapter/kernels/geop_npu.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 6ae91409f..de8513084 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -1077,8 +1077,8 @@ void GeOp::AnalyzeInputDesc(void *tensor_ptr, ge::Tensor &input, ge::DataType ty ge_tensor_desc.SetPlacement(output_info->placement_); input.SetTensorDesc(ge_tensor_desc); - uint8_t* data = output_info->data_.get(); - input.SetData(output_info->data_.get(), output_info->output_size_, output_info->data_.get_deleter()); + uint8_t* data = output_info->data_.release(); + input.SetData(data, output_info->output_size_, output_info->data_.get_deleter()); ADP_LOG(INFO) << "[GEOP] Get input shape:" << input_shape.DebugString() << ", input placement:" << output_info->placement_ << ", input length:" << output_info->output_size_ -- Gitee From 8faacbc0b7e07fb7616a41c65719b825a3fdd957 Mon Sep 17 00:00:00 2001 From: lianghuikang <505519763@qq.com> Date: Mon, 28 Jun 2021 15:51:02 +0800 Subject: [PATCH 16/21] fix core dump --- tf_adapter/optimizers/control_flow_conversion_pass.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tf_adapter/optimizers/control_flow_conversion_pass.cc b/tf_adapter/optimizers/control_flow_conversion_pass.cc index 0f1e33c2a..c6d7c9afe 100644 --- a/tf_adapter/optimizers/control_flow_conversion_pass.cc +++ b/tf_adapter/optimizers/control_flow_conversion_pass.cc @@ -69,6 +69,7 @@ Status ControlFlowConversionPass::Run(const GraphOptimizationPassOptions &option // Delete _lower_using_switch_merge before LowerFunctionalOpsPass for (int i = 2; i < graph->num_node_ids(); ++i) { Node *n = graph->FindNodeId(i); + if (n == nullptr) { continue; } if (n->IsIfNode() || n->type_string() == "Case" || n->IsWhileNode()) { n->ClearAttr(kLowerUsingSwitchMergeAttr); } } -- Gitee From 04c2f9da3f81727ee2864a378f1ce477649ad9c9 Mon Sep 17 00:00:00 2001 From: huanruizhi Date: Wed, 30 Jun 2021 10:28:40 +0800 Subject: [PATCH 17/21] rm compile_deps --- CMakeLists.txt | 19 +++++++++++++++++-- tf_adapter_2.x/cmake/tensorflow/module.cmake | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c7054676f..6517e9251 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -130,11 +130,24 @@ else() ${CMAKE_CURRENT_LIST_DIR}/tf_adapter/optimizers/*.cc ${CMAKE_CURRENT_LIST_DIR}/tf_adapter/util/*.cc ) + + add_custom_target(tensorflow_source ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tensorflow_source.timestamp) + + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/tensorflow_source.timestamp + COMMAND echo "cp tensorflow1.15 source begin:" + COMMAND rm -rf ${BASE_DIR}/tensorflow_15 && mkdir -p ${BASE_DIR}/tensorflow_15 + COMMAND cp -rfL ${TOP_DIR}/open_source/tensorflow ${BASE_DIR}/tensorflow_15 || echo skip + COMMAND cd ${BASE_DIR}/tensorflow_15/tensorflow && git checkout . && git fetch --all --tags && git checkout tags/v1.15.5 + COMMAND echo "end cp tensorflow1.15 source" + DEPENDS ${TOP_DIR}/open_source/tensorflow) + add_library(tf_adapter SHARED ${SOURCES} ${BASE_DIR}/tf_adapter/util/ge_plugin_wrap.cxx ) + add_dependencies(tf_adapter tensorflow_source) + target_include_directories(tf_adapter PRIVATE ${BASE_DIR}/ ${TOP_DIR}/inc/ @@ -150,8 +163,9 @@ else() ${TOP_DIR}/abl/libc_sec/include/ ${TOP_DIR}/third_party/json/include/ ${TOP_DIR}/open_source/json/include/ - ${TOP_DIR}/third_party/tensorflow/tensorflow-1.15.0/ - ${TOP_DIR}/third_party/tensorflow/compile_deps/tf-1.15.0/include/ + ${BASE_DIR}/tensorflow_15/tensorflow + /opt/buildtools/tensorflow-1.15.5/tensorflow_core/include/ + /opt/buildtools/tensorflow-1.15.5/tensorflow-1.15.5.data/purelib/tensorflow_core/include/ ${HI_PYTHON_INC}/ ) @@ -226,6 +240,7 @@ else() && rm -rf ${BASE_DIR}/libpywrap_tensorflow_internal.so && rm -rf ${BASE_DIR}/libtensorflow_framework.so.1 && rm -rf ${BASE_DIR}/libtensorflow_framework.so + && rm -rf ${BASE_DIR}/tensorflow_15/ && echo "package whl end" ) ###################################### Tensorflow 2.x ########################################### diff --git a/tf_adapter_2.x/cmake/tensorflow/module.cmake b/tf_adapter_2.x/cmake/tensorflow/module.cmake index 10e1350f3..7949da518 100644 --- a/tf_adapter_2.x/cmake/tensorflow/module.cmake +++ b/tf_adapter_2.x/cmake/tensorflow/module.cmake @@ -19,7 +19,7 @@ else() add_library(pywrap_tensorflow_internal SHARED ${fake_sources}) set_target_properties(pywrap_tensorflow_internal PROPERTIES PREFIX _) - SET(TF_INCLUDE_DIR ${ASCEND_CI_BUILD_DIR}/third_party/tensorflow/compile_deps/tf-2.4.0/include/org) + SET(TF_INCLUDE_DIR /opt/buildtools/tensorflow-2.4.1/tensorflow/include/) target_link_libraries(tensorflow_libs INTERFACE tensorflow_framework pywrap_tensorflow_internal) -- Gitee From 506049c05603d3d6d7aa3fa79b5236d996bbb204 Mon Sep 17 00:00:00 2001 From: xuming Date: Thu, 1 Jul 2021 10:51:10 +0800 Subject: [PATCH 18/21] clear complie warning --- tf_adapter/kernels/data_item_deliver.h | 30 +++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tf_adapter/kernels/data_item_deliver.h b/tf_adapter/kernels/data_item_deliver.h index 39fd073e7..6fb2d9d37 100644 --- a/tf_adapter/kernels/data_item_deliver.h +++ b/tf_adapter/kernels/data_item_deliver.h @@ -45,8 +45,8 @@ limitations under the License. namespace tensorflow { namespace data { -static constexpr char *SOCKET_SERVER_PATH = "/tmp/server"; -static constexpr char *MESSAGE_HEAD = "head_check"; +static constexpr char const *SOCKET_SERVER_PATH = "/tmp/server"; +static constexpr char const *MESSAGE_HEAD = "head_check"; static constexpr int QLEN = 8; static constexpr int HEAD_INFO_SIZE = 3; static constexpr int ITEM_INFO_SIZE = 9; @@ -72,7 +72,7 @@ class DataItemDeliver { Status SendDataVec(std::vector &data_items, int fd); Status CreateSockAddr(struct sockaddr_un &sockaddr, const char *path, int local_rank_id); - int Recv(void *buffer, size_t data_len); + uint64_t Recv(void *buffer, size_t data_len); template Status GetDataLen(T &value, size_t size); Status GetTensorType(tdt::TdtDataType &data_type); @@ -83,12 +83,12 @@ class DataItemDeliver { mutex client_list_mu_; std::vector client_fd_list_; - int server_fd_; + int server_fd_ = -1; std::shared_ptr pools_; struct sockaddr_un local_addr_ = {0}; int local_rank_id_; - std::vector local_device_list_; uint32_t device_id_; + std::vector local_device_list_; std::string channel_name_; }; @@ -120,7 +120,7 @@ DataItemDeliver::~DataItemDeliver() { Status DataItemDeliver::ParallelInitSocketClient() { std::vector> init_status; - for (int i = 1; i < local_device_list_.size(); i++) { + for (size_t i = 1; i < local_device_list_.size(); i++) { init_status.emplace_back( pools_->Enqueue(&DataItemDeliver::InitSocketClient, this, i)); } @@ -227,7 +227,7 @@ Status DataItemDeliver::InitSocketServer() { Status DataItemDeliver::CheckHead(const char *check_value) { uint32_t head_size = 0; - int recvn = Recv(&head_size, UINT32_SIZE); + uint64_t recvn = Recv(&head_size, UINT32_SIZE); if (recvn != UINT32_SIZE) { ADP_LOG(ERROR) << "Failed to recv head length."; LOG(ERROR) << "Failed to recv head length."; @@ -290,7 +290,7 @@ Status DataItemDeliver::RecvDataVec(std::vector &items) { return Status::OK(); } -int DataItemDeliver::Recv(void *buffer, size_t data_len) { +uint64_t DataItemDeliver::Recv(void *buffer, size_t data_len) { int ret = -1; uint64_t buf_pos = 0; while (data_len > 0) { @@ -304,13 +304,13 @@ int DataItemDeliver::Recv(void *buffer, size_t data_len) { << ", channel_name:" << channel_name_; LOG(WARNING) << "Client connect closed, server_fd:" << server_fd_ << ", channel_name:" << channel_name_; - return ret; + return 0; } else if (ret < 0) { ADP_LOG(ERROR) << "Recv data failed,error:" << strerror(errno) << ", (errno:" << errno << "), server_fd:" << server_fd_; LOG(ERROR) << "Recv data failed,error:" << strerror(errno) << ", (errno:" << errno << "), server_fd:" << server_fd_; - return ret; + return 0; } buf_pos += ret; data_len -= ret; @@ -320,7 +320,7 @@ int DataItemDeliver::Recv(void *buffer, size_t data_len) { template Status DataItemDeliver::GetDataLen(T &value, size_t size) { - int recvn = Recv(&value, size); + uint64_t recvn = Recv(&value, size); if (recvn != size) { return errors::Internal("Failed to recv data length."); } @@ -328,7 +328,7 @@ Status DataItemDeliver::GetDataLen(T &value, size_t size) { } Status DataItemDeliver::GetTensorType(tdt::TdtDataType &data_type) { - int recvn = Recv(&data_type, UINT32_SIZE); + uint64_t recvn = Recv(&data_type, UINT32_SIZE); if (recvn != UINT32_SIZE) { return errors::Internal("Failed to recv data length."); } @@ -354,7 +354,7 @@ Status DataItemDeliver::GetTensorData(uint64_t &data_len, LOG(ERROR) << "Failed to reset buff memory. size:" << data_len; return errors::Internal("Failed to reset buff memory."); } - int recvn = Recv(buff, data_len); + uint64_t recvn = Recv(buff, data_len); if (recvn != data_len) { free(buff); ADP_LOG(ERROR) << "Failed to receive data."; @@ -384,7 +384,7 @@ Status DataItemDeliver::GetTensorString(std::string &str) { LOG(ERROR) << "Failed to reset buff memory."; return errors::Internal("Failed to reset buff memory."); } - int recvn = Recv(buff, size); + uint64_t recvn = Recv(buff, size); if (recvn != size) { free(buff); ADP_LOG(ERROR) << "Failed to receive data."; @@ -494,4 +494,4 @@ void DataItemDeliver::SocketSend(struct iovec temp_items[], int vector_size, } } // namespace data } // namespace tensorflow -#endif \ No newline at end of file +#endif -- Gitee From 6bb12c432c4b1a61d2efa05b0535d69ac8943f0f Mon Sep 17 00:00:00 2001 From: xuming Date: Thu, 1 Jul 2021 17:08:54 +0800 Subject: [PATCH 19/21] init server_fd_ and solve compile warning --- tf_adapter/kernels/data_item_deliver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf_adapter/kernels/data_item_deliver.h b/tf_adapter/kernels/data_item_deliver.h index 6fb2d9d37..b4bdb4309 100644 --- a/tf_adapter/kernels/data_item_deliver.h +++ b/tf_adapter/kernels/data_item_deliver.h @@ -421,7 +421,7 @@ Status DataItemDeliver::SendDataVec(std::vector &data_items, uint32_t head_size = (strlen(MESSAGE_HEAD) + 1) * CHAR_SIZE; head_info[0].iov_base = &head_size; head_info[0].iov_len = UINT32_SIZE; - head_info[1].iov_base = MESSAGE_HEAD; + head_info[1].iov_base = const_cast(MESSAGE_HEAD); head_info[1].iov_len = head_size; head_info[2].iov_base = &vector_size; head_info[2].iov_len = UINT32_SIZE; -- Gitee From 6e7293b472d2f7b14d1a5f50aa6da524f86a4012 Mon Sep 17 00:00:00 2001 From: xuming Date: Thu, 1 Jul 2021 20:53:02 +0800 Subject: [PATCH 20/21] init server_fd_ and solve compile warning --- tf_adapter/kernels/data_item_deliver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf_adapter/kernels/data_item_deliver.h b/tf_adapter/kernels/data_item_deliver.h index b4bdb4309..c9cfc6528 100644 --- a/tf_adapter/kernels/data_item_deliver.h +++ b/tf_adapter/kernels/data_item_deliver.h @@ -470,7 +470,7 @@ Status DataItemDeliver::CreateSockAddr(struct sockaddr_un &sock_addr, sock_addr.sun_family = AF_UNIX; int len = 0; if (-1 == - (len = snprintf(sock_addr.sun_path, sizeof(sock_addr.sun_path), "%s%s%d", + (len = snprintf_s(sock_addr.sun_path, sizeof(sock_addr.sun_path), "%s%s%d", path, channel_name_.c_str(), device_id))) { ADP_LOG(ERROR) << "Set sun_path failed."; LOG(ERROR) << "Set sun_path failed."; -- Gitee From 1a99e4949d52276939d998375900f56908b544e7 Mon Sep 17 00:00:00 2001 From: xuming Date: Thu, 1 Jul 2021 21:09:59 +0800 Subject: [PATCH 21/21] init server_fd_ and solve compile warning --- tf_adapter/kernels/data_item_deliver.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tf_adapter/kernels/data_item_deliver.h b/tf_adapter/kernels/data_item_deliver.h index c9cfc6528..8ebabcfe6 100644 --- a/tf_adapter/kernels/data_item_deliver.h +++ b/tf_adapter/kernels/data_item_deliver.h @@ -469,9 +469,9 @@ Status DataItemDeliver::CreateSockAddr(struct sockaddr_un &sock_addr, const char *path, int device_id) { sock_addr.sun_family = AF_UNIX; int len = 0; - if (-1 == - (len = snprintf_s(sock_addr.sun_path, sizeof(sock_addr.sun_path), "%s%s%d", - path, channel_name_.c_str(), device_id))) { + if (-1 == (len = snprintf_s(sock_addr.sun_path, sizeof(sock_addr.sun_path), + sizeof(sock_addr.sun_path) - 1, "%s%s%d", path, + channel_name_.c_str(), device_id))) { ADP_LOG(ERROR) << "Set sun_path failed."; LOG(ERROR) << "Set sun_path failed."; return errors::Internal("Set sun_path failed."); -- Gitee