From 6f86a95e5afdc93c8132209569207c1be0683c86 Mon Sep 17 00:00:00 2001 From: gengchao Date: Tue, 24 Dec 2024 15:21:44 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E8=87=AA=E5=8A=A8=E8=9E=8D?= =?UTF-8?q?=E5=90=88=E9=80=89=E9=A1=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tf_adapter/interface_spec/api_npu_config.pyh | 2 +- tf_adapter/kernels/geop_npu.cc | 5 ++++- tf_adapter/kernels/geop_npu.h | 1 + .../npu_bridge/estimator/npu/npu_config.py | 4 +++- .../npu_bridge/estimator/npu/npu_estimator.py | 1 + tf_adapter/util/ge_plugin.cc | 1 + tf_adapter/util/npu_attrs.cc | 19 +++++++++++++++++++ .../npu_device/core/npu_wrapper.cpp | 3 ++- .../python/npu_device/configs/npu_config.py | 2 +- 9 files changed, 33 insertions(+), 5 deletions(-) diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index cafdb3f24..bf687c385 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -22,7 +22,7 @@ class NPURunConfig(run_config_lib.RunConfig): event_sync_timeout=-1, external_weight=False, es_cluster_config=None, deterministic=0, frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None, ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, - execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1): + execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1, ge_autofuse=False): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 80fa15a3b..bcca9cefe 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -414,6 +414,7 @@ void GeOp::Initialize(OpKernelConstruction *ctx) { (void) ctx->GetAttr("_embedding_flags", &embedding_flags_); (void) ctx->GetAttr("_dynamic_input", &dynamic_input_); (void) ctx->GetAttr("_jit_compile", &jit_compile_); + (void) ctx->GetAttr("_ge_autofuse", &ge_autofuse_); if (!dynamic_input_.empty() && dynamic_input_ == "1") { jit_compile_ = "1"; is_dynamic_input_ = true; @@ -435,7 +436,8 @@ void GeOp::Initialize(OpKernelConstruction *ctx) { << ", is_var_init_graph: " << is_var_init_graph_ << ", use_counter_filter: " << use_counter_filter_ << ", max_key_num: " << max_key_num_ << ", embedding_dim: " << embedding_dim_ << ", padding_key: " << padding_key_ << ", embedding_flags: " << embedding_flags_ - << ", compile_dynamic_mode: " << compile_dynamic_mode_; + << ", compile_dynamic_mode: " << compile_dynamic_mode_ + << ", ge_autofuse:: " << ge_autofuse_; // global environment Initialize, invoke once for each process std::string sess_config = ""; @@ -1239,6 +1241,7 @@ Status GeOp::SetGraphOptions(OpKernelContext *ctx) { graph_options_["ge.jit_compile"] = jit_compile_; graph_options_["ge.exec.overflow"] = "1"; graph_options_["ge.graphLevelSat"] = (mix_compile_mode_ == "0") ? "1" : "0"; + graph_options_["ge.autofuse"] = ge_autofuse_ ? "true" : "false"; return DoAccelerateTrain(); } diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h index fc44c4022..d0e7c906f 100644 --- a/tf_adapter/kernels/geop_npu.h +++ b/tf_adapter/kernels/geop_npu.h @@ -279,6 +279,7 @@ public: AccelerateInfo accelerate_info_; GraphHandler graph_handler_; bool need_compile_graph_first_; + bool ge_autofuse_; }; } // namespace tensorflow #endif // TENSORFLOW_KERNELS_GEOP_NPU_H_ diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index a1789fe25..50000b41d 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -117,7 +117,8 @@ class NPURunConfig(run_config_lib.RunConfig): compile_dynamic_mode=None, execute_times=-1, graph_max_parallel_model_num=1, - export_compile_stat=1 + export_compile_stat=1, + ge_autofuse=False ): """ Constructs a NPUConfig. @@ -286,6 +287,7 @@ class NPURunConfig(run_config_lib.RunConfig): self._graph_max_parallel_model_num = graph_max_parallel_model_num self.execute_times = execute_times self._export_compile_stat = export_compile_stat + self._ge_autofuse = ge_autofuse super(NPURunConfig, self).__init__( model_dir=model_dir, tf_random_seed=tf_random_seed, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 7b372f621..fe7e462d7 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -821,6 +821,7 @@ class NPUEstimator(estimator_lib.Estimator): custom_op.parameter_map["frozen_variable"].b = config._frozen_variable custom_op.parameter_map["variable_placement"].s = tf.compat.as_bytes(config._variable_placement) custom_op.parameter_map["execute_times"].i = config.execute_times + custom_op.parameter_map["ge_autofuse"].b = config._ge_autofuse self.__load_session_device_id(config, custom_op) self.__load_modify_mixlist(config, custom_op) diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index 443743f51..4eb14fce7 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -127,6 +127,7 @@ void SetOptionNameMap(json &option_name_map) { option_name_map.emplace("ge.executeTimes", "execute_times"); option_name_map.emplace(ge::OPTION_EXEC_DYNAMIC_EXECUTE_MODE, "dynamic_graph_execute_mode"); option_name_map.emplace(ge::OPTION_EXEC_DYNAMIC_INPUT, "dynamic_input"); + option_name_map.emplace("ge.autofuse", "ge_autofuse"); } } // namespace diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index d89e01ebc..765277a87 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -689,6 +689,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr std::string es_cluster_config; std::string execute_times = "-1"; std::string export_compile_stat; + std::string ge_autofuse = "0"; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_precision_mode", &precision_mode); @@ -731,6 +732,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr (void) ctx->GetAttr("_es_cluster_config", &es_cluster_config); (void) ctx->GetAttr("_execute_times", &execute_times); (void) ctx->GetAttr("_export_compile_stat", &export_compile_stat); + (void) ctx->GetAttr("_ge_autofuse", &ge_autofuse); } std::lock_guard lock(mutex_); @@ -785,6 +787,8 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr init_options_["ge.esClusterConfig"] = es_cluster_config; init_options_["execute_times"] = execute_times; init_options_["ge.executeTimes"] = execute_times; + init_options_["ge.autofuse"] = ge_autofuse; + init_options_["ge_autofuse"] = ge_autofuse; if (!export_compile_stat.empty()) { init_options_["export_compile_stat"] = export_compile_stat; init_options_["ge.exportCompileStat"] = export_compile_stat; @@ -1213,6 +1217,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string compile_dynamic_mode; std::string execute_times = "-1"; std::string export_compile_stat; + std::string ge_autofuse="0"; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1310,6 +1315,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto compile_dynamic_mode_value = attrs.Find("_compile_dynamic_mode"); auto execute_times_value = attrs.Find("_execute_times"); auto export_compile_stat_value = attrs.Find("_export_compile_stat"); + auto ge_autofuse_value = attrs.Find("_ge_autofuse"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1622,6 +1628,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (export_compile_stat_value != nullptr) { export_compile_stat = export_compile_stat_value->s(); } + if (ge_autofuse_value != nullptr) { + ge_autofuse = ge_autofuse_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1734,6 +1743,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["compile_dynamic_mode"] = compile_dynamic_mode; all_options["execute_times"] = execute_times; all_options["ge.executeTimes"] = execute_times; + all_options["ge.autofuse"] = ge_autofuse; + all_options["ge_autofuse"] = ge_autofuse; if (!export_compile_stat.empty()) { all_options["export_compile_stat"] = export_compile_stat; all_options["ge.exportCompileStat"] = export_compile_stat; @@ -1862,6 +1873,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options std::string accelerate_train_mode; int32_t execute_times = -1; int32_t export_compile_stat = 1; + bool ge_autofuse = false; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { @@ -2404,6 +2416,10 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("execute_times") > 0) { execute_times = params.at("execute_times").i(); } + + if (params.count("ge_autofuse") > 0) { + ge_autofuse = params.at("ge_autofuse").b(); + } if (params.count("frozen_variable") > 0) { frozen_variable = params.at("frozen_variable").b(); } @@ -2563,6 +2579,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["ge.esClusterConfig"] = es_cluster_config; init_options_["execute_times"] = std::to_string(execute_times); init_options_["ge.executeTimes"] = std::to_string(execute_times); + + init_options_["ge_autofuse"] = std::to_string(ge_autofuse); + init_options_["ge.autofuse"] = std::to_string(ge_autofuse); for (const auto &option : init_options_) { std::string attr_name = std::string("_") + option.first; node->AddAttr(attr_name, option.second); diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 73dec5535..567179770 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -102,7 +102,8 @@ const std::map kGlobalConfigOptions = { {"_distribute.cm_chief_port", ge::OPTION_EXEC_CM_CHIEF_PORT}, {"_distribute.cm_chief_worker_device", ge::OPTION_EXEC_CM_CHIEF_DEVICE}, {"_distribute.cm_worker_ip", ge::OPTION_EXEC_CM_WORKER_IP}, - {"_distribute.cm_worker_size", ge::OPTION_EXEC_CM_WORKER_SIZE} + {"_distribute.cm_worker_size", ge::OPTION_EXEC_CM_WORKER_SIZE}, + {"ge_autofuse", "ge.autofuse"} }; const std::map kSessionConfigOptions = { diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 21abadeca..eb853a8dd 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -80,5 +80,5 @@ class NpuConfig(NpuBaseConfig): ['fp16', 'origin', 'cube_fp16in_fp32out', 'mixed_float16', 'mixed_bfloat16', 'cube_hif8', 'mixed_hif8']) self.export_compile_stat = OptionValue(1, [0, 1, 2]) - + self.ge_autofuse = OptionValue(False, [True, False]) super(NpuConfig, self).__init__() -- Gitee