diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 02a6d4395ecb7f7bd511532cee6c141369bc602d..5b20313abb871a634b920ec1d399c0a8cff24f46 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -21,7 +21,8 @@ class NPURunConfig(run_config_lib.RunConfig): topo_sorting_mode=None, aoe_config_file=None, insert_op_file=None, stream_sync_timeout=-1, event_sync_timeout=-1, external_weight=False, es_cluster_config=None, deterministic=0, frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None, - ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None): + ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, + execute_times=None): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index 2b9f41b33a5664c0fe36463586f6f3a9740b3551..cb05a7a79a74312ed5d7000b165b1bab6ef4b0dd 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -114,7 +114,8 @@ class NPURunConfig(run_config_lib.RunConfig): ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, - compile_dynamic_mode=None + compile_dynamic_mode=None, + execute_times=None ): """ Constructs a NPUConfig. @@ -277,6 +278,7 @@ class NPURunConfig(run_config_lib.RunConfig): self._jit_compile = jit_compile self._input_fusion_size = input_fusion_size self._compile_dynamic_mode = compile_dynamic_mode + self.execute_times = execute_times super(NPURunConfig, self).__init__( model_dir=model_dir, tf_random_seed=tf_random_seed, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 4542ef2ffbfd6b4f3c41a59afa54abf7e802b3ae..6e938ac1126fd0ef2901337b90eac16f8b573d46 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -802,6 +802,8 @@ class NPUEstimator(estimator_lib.Estimator): custom_op.parameter_map["es_cluster_config"].s = tf.compat.as_bytes(config.es_cluster_config) if config._compile_dynamic_mode is not None: custom_op.parameter_map["compile_dynamic_mode"].b = config._compile_dynamic_mode + if config.execute_times is not None: + custom_op.parameter_map["execute_times"].s = config.execute_times custom_op.parameter_map["jit_compile"].s = tf.compat.as_bytes(config._jit_compile) custom_op.parameter_map["input_fusion_size"].i = config._input_fusion_size custom_op.parameter_map["stream_sync_timeout"].i = config.stream_sync_timeout diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index 3cde8932b53cbc49d4fb3d7a0f2250b4a926d389..a618c1e0dcbe70381c13866f85f74098fcc1b059 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -123,6 +123,7 @@ void SetOptionNameMap(json &option_name_map) { option_name_map.emplace("ge.esClusterConfig", "es_cluster_config"); option_name_map.emplace(ge::OPTION_EXEC_DYNAMIC_EXECUTE_MODE, "dynamic_graph_execute_mode"); option_name_map.emplace(ge::OPTION_EXEC_DYNAMIC_INPUT, "dynamic_input"); + option_name_map.emplace("ge.executeTimes", "execute_times"); } } // namespace diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 48340040bdaec44c43c856e61a77f6487d109029..0ab462d2b403c397f4800a624872be51c3dd5bc9 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -684,6 +684,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr std::string stream_sync_timeout = "-1"; std::string event_sync_timeout = "-1"; std::string es_cluster_config; + std::string execute_times; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_precision_mode", &precision_mode); @@ -724,6 +725,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr (void) ctx->GetAttr("_stream_sync_timeout", &stream_sync_timeout); (void) ctx->GetAttr("_event_sync_timeout", &event_sync_timeout); (void) ctx->GetAttr("_es_cluster_config", &es_cluster_config); + (void) ctx->GetAttr("_execute_times", &execute_times); } std::lock_guard lock(mutex_); @@ -776,6 +778,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr init_options_["stream_sync_timeout"] = stream_sync_timeout; init_options_["event_sync_timeout"] = event_sync_timeout; init_options_["ge.esClusterConfig"] = es_cluster_config; + init_options_["ge.executeTimes"] = execute_times; return init_options_; } @@ -1195,6 +1198,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string accelerate_train_mode; std::string input_fusion_size; std::string compile_dynamic_mode; + std::string execute_times; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1289,6 +1293,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto accelerate_train_mode_value = attrs.Find("_accelerate_train_mode"); auto input_fusion_size_value = attrs.Find("_input_fusion_size"); auto compile_dynamic_mode_value = attrs.Find("_compile_dynamic_mode"); + auto execute_times_value = attrs.Find("_execute_times"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1581,6 +1586,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (es_cluster_config_value != nullptr) { es_cluster_config = es_cluster_config_value->s(); } + if (execute_times_value != nullptr) { + execute_times = execute_times_value->s(); + } if (jit_compile_value != nullptr) { std::string jit_compile = jit_compile_value->s(); all_options["jit_compile"] = jit_compile; @@ -1701,6 +1709,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["frozen_variable"] = frozen_variable; all_options["variable_location"] = variable_location; all_options["compile_dynamic_mode"] = compile_dynamic_mode; + all_options["execute_times"] = execute_times; + all_options["ge.executeTimes"] = execute_times; return all_options; } @@ -1821,6 +1831,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options std::string jit_compile; int64_t input_fusion_size = 131072L; // default 128KB std::string accelerate_train_mode; + std::string execute_times; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { @@ -2355,6 +2366,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("es_cluster_config") > 0) { es_cluster_config = params.at("es_cluster_config").s(); } + if (params.count("execute_times") > 0) { + execute_times = params.at("execute_times").s(); + } if (params.count("frozen_variable") > 0) { frozen_variable = params.at("frozen_variable").b(); } @@ -2504,6 +2518,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["event_sync_timeout"] = std::to_string(event_sync_timeout); init_options_["es_cluster_config"] = es_cluster_config; init_options_["ge.esClusterConfig"] = es_cluster_config; + init_options_["execute_times"] = execute_times; + init_options_["ge.executeTimes"] = execute_times; for (const auto &option : init_options_) { std::string attr_name = std::string("_") + option.first; node->AddAttr(attr_name, option.second);