From 7c72ac692935c368f321e89d20a12d4e60ed3f2e Mon Sep 17 00:00:00 2001 From: dengtao Date: Tue, 20 Sep 2022 22:12:04 +0800 Subject: [PATCH] =?UTF-8?q?=E5=BC=80=E5=85=B3=E9=BB=98=E8=AE=A4=E5=80=BC?= =?UTF-8?q?=E6=94=B9=E4=B8=BAtrue?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tf_adapter/interface_spec/api_npu_config.pyh | 4 +-- .../npu_bridge/estimator/npu/npu_config.py | 7 +++- .../npu_bridge/estimator/npu/npu_estimator.py | 4 +++ tf_adapter/util/npu_attrs.cc | 34 +++++++++++++++++++ .../npu_device/core/npu_wrapper.cpp | 2 ++ .../python/npu_device/configs/dump_config.py | 2 ++ 6 files changed, 50 insertions(+), 3 deletions(-) diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index b24b4e140..db3061b21 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -5,7 +5,7 @@ class NPURunConfig(run_config_lib.RunConfig): save_summary_steps=0, save_checkpoints_steps=None, save_checkpoints_secs=None, session_config=None, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000, log_step_count_steps=100, distribute=None, enable_data_pre_proc=True, precision_mode=None, enable_reduce_precision=False, - variable_format_optimize=True, mix_compile_mode=False, hcom_parallel=False, + variable_format_optimize=True, mix_compile_mode=False, hcom_parallel=True, graph_memory_max_size=None, variable_memory_max_size=None, auto_tune_mode=None, dump_config=None, stream_max_parallel_num=None, is_tailing_optimization=False, horovod_mode=False, graph_run_mode=1, op_debug_level=0, enable_scope_fusion_passes=None, @@ -31,4 +31,4 @@ class DynamicInputConfig(): def __init__(self, input_shape, dynamic_dims, dynamic_node_type): class MemoryConfig(): - def __init__(self, atomic_clean_policy=0, static_memory_policy=None): \ No newline at end of file + def __init__(self, atomic_clean_policy=0, static_memory_policy=None): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index 7aff4571b..d65858a4c 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -101,7 +101,9 @@ class NPURunConfig(run_config_lib.RunConfig): memory_config=None, experimental_config=None, jit_compile=True, - topo_sorting_mode=None + topo_sorting_mode=None, + dump_data="tensor", + dump_layer=None ): """ Constructs a NPUConfig. @@ -242,6 +244,9 @@ class NPURunConfig(run_config_lib.RunConfig): self._experimental_config = self._get_experimental_config(experimental_config) self._jit_compile = jit_compile self.topo_sorting_mode = topo_sorting_mode + self.dump_data = dump_data + self.dump_layer = dump_layer + super(NPURunConfig, self).__init__( model_dir=model_dir, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 7dab26e7d..888d351d5 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -754,6 +754,10 @@ class NPUEstimator(estimator_lib.Estimator): if config.topo_sorting_mode is not None: custom_op.parameter_map["topo_sorting_mode"].i = config.topo_sorting_mode custom_op.parameter_map["jit_compile"].b = config._jit_compile + if config.dump_data is not None: + custom_op.parameter_map["dump_data"].s = tf.compat.as_bytes(config.dump_data) + if config.dump_layer is not None: + custom_op.parameter_map["dump_layer"].s = tf.compat.as_bytes(config.dump_layer) self.__load_session_device_id(config, custom_op) self.__load_modify_mixlist(config, custom_op) diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 37f5aa98a..748718112 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -370,6 +370,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string dump_step; std::string dump_mode = "output"; std::string dump_debug_mode = "all"; + std::string dump_layer; std::string stream_max_parallel_num; std::string npuOptimizer; std::string is_tailing_optimization = "0"; @@ -453,6 +454,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void) ctx->GetAttr("_jit_compile", &jit_compile); (void) ctx->GetAttr("_topo_sorting_mode", &topo_sorting_mode); (void) ctx->GetAttr("_resource_config_path", &resource_config_path); + (void) ctx->GetAttr("_dump_layer", &dump_layer); } // session options @@ -469,6 +471,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options[ge::OPTION_EXEC_DUMP_PATH] = dump_path; sess_options[ge::OPTION_EXEC_DUMP_STEP] = dump_step; sess_options[ge::OPTION_EXEC_DUMP_MODE] = dump_mode; + sess_options["dump_layer"] = dump_layer; + sess_options["ge.exec.dumpLayer"] = dump_layer; sess_options[ge::OPTION_EXEC_ENABLE_DUMP_DEBUG] = enable_dump_debug; sess_options[ge::OPTION_EXEC_DUMP_DEBUG_MODE] = dump_debug_mode; sess_options["ge.exec.isTailingOptimization"] = is_tailing_optimization; @@ -551,6 +555,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr std::string graph_exec_timeout; std::string logical_device_cluster_deploy_mode = "LB"; std::string logical_device_id; + std::string dump_data = "tensor"; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_precision_mode", &precision_mode); @@ -583,6 +588,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr (void) ctx->GetAttr("_static_memory_policy", &static_memory_policy); (void) ctx->GetAttr("_logical_device_cluster_deploy_mode", &logical_device_cluster_deploy_mode); (void) ctx->GetAttr("_logical_device_id", &logical_device_id); + (void) ctx->GetAttr("_dump_data", &dump_data); } if (precision_mode.empty()) { @@ -625,6 +631,8 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr init_options_["ge.exec.graphExecTimeout"] = graph_exec_timeout; init_options_["ge.exec.logicalDeviceClusterDeployMode"] = logical_device_cluster_deploy_mode; init_options_["ge.exec.logicalDeviceId"] = logical_device_id; + init_options_["dump_data"] = dump_data; + init_options_["ge.exec.dumpData"] = dump_data; return init_options_; } @@ -940,6 +948,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string dump_step; std::string dump_mode = "output"; std::string dump_debug_mode = "all"; + std::string dump_data = "tensor"; + std::string dump_layer; std::string stream_max_parallel_num; std::string soc_config; @@ -1010,6 +1020,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto dump_path_value = attrs.Find("_dump_path"); auto dump_step_value = attrs.Find("_dump_step"); auto dump_mode_value = attrs.Find("_dump_mode"); + auto dump_data_value = attrs.Find("_dump_data"); + auto dump_layer_value = attrs.Find("_dump_layer"); auto dump_debug_mode_value = attrs.Find("_dump_debug_mode"); auto stream_max_parallel_num_value = attrs.Find("_stream_max_parallel_num"); auto soc_config_value = attrs.Find("_soc_config"); @@ -1295,6 +1307,12 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (topo_sorting_mode_value != nullptr) { topo_sorting_mode = topo_sorting_mode_value->s(); } + if (dump_data_value != nullptr) { + dump_data = dump_data_value->s(); + } + if (dump_layer_value != nullptr) { + dump_layer = dump_layer_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1313,6 +1331,10 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["dump_mode"] = dump_mode; all_options["enable_dump_debug"] = enable_dump_debug; all_options["dump_debug_mode"] = dump_debug_mode; + all_options["dump_data"] = dump_data; + all_options["ge.exec.dumpData"] = dump_data; + all_options["dump_layer"] = dump_layer; + all_options["ge.exec.dumpLayer"] = dump_layer; all_options["soc_config"] = soc_config; all_options["is_tailing_optimization"] = is_tailing_optimization; @@ -1404,6 +1426,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options std::string dump_step; std::string dump_mode = "output"; std::string dump_debug_mode = "all"; + std::string dump_data = "tensor"; + std::string dump_layer; std::string stream_max_parallel_num; std::string soc_config; std::string hccl_timeout; @@ -1837,6 +1861,12 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("resource_config_path") > 0) { resource_config_path = params.at("resource_config_path").s(); } + if (params.count("dump_data") > 0) { + dump_data = params.at("dump_data").s(); + } + if (params.count("dump_layer") > 0) { + dump_layer = params.at("dump_layer").s(); + } } } @@ -1855,6 +1885,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["dump_path"] = dump_path; sess_options["dump_step"] = dump_step; sess_options["dump_mode"] = dump_mode; + sess_options["dump_layer"] = dump_layer; + sess_options["ge.exec.dumpLayer"] = dump_layer; sess_options["enable_dump_debug"] = std::to_string(static_cast(enable_dump_debug)); sess_options["dump_debug_mode"] = dump_debug_mode; sess_options["is_tailing_optimization"] = std::to_string(static_cast(is_tailing_optimization)); @@ -1944,6 +1976,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["ge.exec.logicalDeviceClusterDeployMode"] = logical_device_cluster_deploy_mode; init_options_["logical_device_id"] = logical_device_id; init_options_["ge.exec.logicalDeviceId"] = logical_device_id; + init_options_["dump_data"] = dump_data; + init_options_["ge.exec.dumpData"] = dump_data; pass_options["do_npu_optimizer"] = std::to_string(static_cast(do_npu_optimizer)); pass_options["enable_data_pre_proc"] = std::to_string(static_cast(enable_dp)); diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 43c335bde..610d906ac 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -104,6 +104,8 @@ const std::map kConfigurableOptions = { {"jit_compile", "ge.jit_compile"}, {"topo_sorting_mode", "ge.topoSortingMode"}, {"customize_dtypes", "ge.customizeDtypes"}, + {"dump_data", "ge.exec.dumpData"}, + {"dump_layer", "ge.exec.dumpLayer"}, // private options {"_distribute.rank_id", ge::OPTION_EXEC_RANK_ID}, {"_distribute.rank_table", ge::OPTION_EXEC_RANK_TABLE_FILE}, diff --git a/tf_adapter_2.x/python/npu_device/configs/dump_config.py b/tf_adapter_2.x/python/npu_device/configs/dump_config.py index 398f2ad64..db91a1715 100644 --- a/tf_adapter_2.x/python/npu_device/configs/dump_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/dump_config.py @@ -29,5 +29,7 @@ class NpuDumpConfig(NpuBaseConfig): self.dump_mode = OptionValue('output', ['input', 'output', 'all']) self.enable_dump_debug = OptionValue(False, [True, False]) self.dump_debug_mode = OptionValue('all', ['aicore_overflow', 'atomic_overflow', 'all']) + self.dump_data = OptionValue('tensor', ['tensor', 'stats']) + self.dump_layer = OptionValue(None, None) super(NpuDumpConfig, self).__init__() -- Gitee