diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 185a2d9707f1bedcb30b1a98b81b392e785258ee..de7b76e6faa259a312b33ff04bced68d6eaa0a44 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -24,7 +24,7 @@ class NPURunConfig(run_config_lib.RunConfig): ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, input_batch_cpy=False, shape_generalization_mode="STRICT", all_tensor_not_empty=False, - auto_multistream_parallel_mode=None): + auto_multistream_parallel_mode=None, oo_level="O3", optimization_switch=None): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/interface_spec/api_npu_plugin.pyh b/tf_adapter/interface_spec/api_npu_plugin.pyh index 2cd0a160876d7579de984b0205aa57257e246731..f71758154268a53af7023e9955a5448c0e3fe039 100644 --- a/tf_adapter/interface_spec/api_npu_plugin.pyh +++ b/tf_adapter/interface_spec/api_npu_plugin.pyh @@ -7,6 +7,6 @@ def npu_resource_init(graph_run_mode=1, op_debug_level=0, enable_profiling=False op_compiler_cache_mode=None, op_compiler_cache_dir=None, debug_dir=None, hcom_multi_mode=False, distribute_config=None, aoe_config_file=None, precision_mode_v2=None, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, - input_batch_cpy=False): + input_batch_cpy=False, oo_level="O3", optimization_switch=None): def npu_resource_shutdown(): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index 645b1849dd0b2892b270ae943c894d5444ef4e56..674a372e4bab7edf184a0331d5844cab0c5665f1 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -121,7 +121,9 @@ class NPURunConfig(run_config_lib.RunConfig): input_batch_cpy=False, shape_generalization_mode="STRICT", all_tensor_not_empty=False, - auto_multistream_parallel_mode=None + auto_multistream_parallel_mode=None, + oo_level="O3", + optimization_switch=None ): """ Constructs a NPUConfig. @@ -199,6 +201,8 @@ class NPURunConfig(run_config_lib.RunConfig): ADAPTIVE: generalizes the varying axes. all_tensor_not_empty: default is: False. auto_multistream_parallel_mode: default is None; cv: cube vector parallel. + oo_level: The switch of optimization level. + optimization_switch: The switch of optimization switch. """ # Check iterations_per_loop. @@ -302,6 +306,8 @@ class NPURunConfig(run_config_lib.RunConfig): self._shape_generalization_mode = shape_generalization_mode self._all_tensor_not_empty = all_tensor_not_empty self._auto_multistream_parallel_mode = auto_multistream_parallel_mode + self._oo_level = oo_level + self._optimization_switch = optimization_switch super(NPURunConfig, self).__init__( model_dir=model_dir, tf_random_seed=tf_random_seed, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 86bd6a2c7b4b5c6796ffc57cd98ad190b109567a..3dec41e41ccf276c6540a1eec33ef28c3a2dcfb0 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -752,6 +752,24 @@ class NPUEstimator(estimator_lib.Estimator): if config._input_batch_cpy is not None: custom_op.parameter_map["input_batch_cpy"].b = config._input_batch_cpy + def __load_oo_level(self, config, custom_op): + """Load oo_level config, and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + if config._oo_level is not None: + custom_op.parameter_map["oo_level"].s = tf.compat.as_bytes(config._oo_level) + + def __load_optimization_switch(self, config, custom_op): + """Load optimization_switch config, and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + if config._optimization_switch is not None: + custom_op.parameter_map["optimization_switch"].s = tf.compat.as_bytes(config._optimization_switch) + def __load_graph_optimizers(self, config): """ Change the session config and load the graph optimizers: @@ -903,6 +921,10 @@ class NPUEstimator(estimator_lib.Estimator): self.__load_input_batch_cpy(config, custom_op) + self.__load_oo_level(config, custom_op) + + self.__load_optimization_switch(config, custom_op) + return config def __load_job_info(self, job_start_file): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py index 97571455e647332275e55d7d699f2f480a742682..e59a7f0b772fa891f32fddde228663cb3bca03f4 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py @@ -76,7 +76,9 @@ def npu_resource_init(graph_run_mode=1, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, - input_batch_cpy=False): + input_batch_cpy=False, + oo_level="O3", + optimization_switch=None): """Initialize NPU resource""" util.check_nonnegative_integer(graph_run_mode, "graph_run_mode") check_graph_run_mode(graph_run_mode) @@ -129,10 +131,13 @@ def npu_resource_init(graph_run_mode=1, if oo_constant_folding is not None: util.check_bool_type(oo_constant_folding, "oo_constant_folding") init["ge.oo.constantFolding"] = "true" if oo_constant_folding is True else "false" - # input_batch_cpy if input_batch_cpy is not None: util.check_bool_type(input_batch_cpy, "input_batch_cpy") init["ge.inputBatchCpy"] = "true" if input_batch_cpy is True else "false" + if oo_level is not None: + init["ge.oo.level"] = str(oo_level) + if optimization_switch is not None: + init["ge.optimization_switch"] = str(optimization_switch) init_options = tf_adapter.map_string_string(init) tf_adapter.PluginInit(init_options) diff --git a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc index fbb165193d89ba5ca615673fe2435b574400b3e1..b4d628c64a56feca75b153ad8f8b18386484a206 100644 --- a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc @@ -229,5 +229,21 @@ TEST_F(GePluginTest, PluginInitTest_input_batch_cpy) { ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); NpuClose(); } + +TEST_F(GePluginTest, PluginInitTest_oo_level) { + std::map init_options; + init_options["ge.oo.level"] = "O3"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} + +TEST_F(GePluginTest, PluginInitTest_optimization_switch) { + std::map init_options; + init_options["ge.optimizationSwitch"] = "pass1:on"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} } } // end tensorflow diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index d3503e35ec8d7fe8f5440c77b731b98b77d3c690..8a46d0094f54b46001c4cac245aa47d388bfd452 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -601,5 +601,37 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode) { EXPECT_EQ(s.ok(), false); } +TEST_F(NpuAttrTest, GetAllAttrOptions_oo_level) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue oo_level = AttrValue(); + oo_level.set_s("O3"); + attr_map["_oo_level"] = oo_level; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("ge.oo.level"), all_options.cend()); +} + +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_optimization_switch) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue optimization_switch = AttrValue(); + optimization_switch.set_s("pass1:on"); + attr_map["_optimization_switch"] = optimization_switch; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("ge.optimizationSwitch"), all_options.cend()); +} + } } // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc index fac80fc57529d18c25c1ed4fd2afbbd28a2ff703..5bd524bf92c7b4c51fec66c6af76dfd363f0785b 100644 --- a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc @@ -219,5 +219,22 @@ TEST_F(GePluginTest, PluginInitTest_input_batch_cpy) { ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); NpuClose(); } + +TEST_F(GePluginTest, PluginInitTest_oo_level) { + std::map init_options; + init_options["ge.oo.level"] = "O3"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} + +TEST_F(GePluginTest, PluginInitTest_oo_level2) { + std::map init_options; + init_options["ge.optimizationSwitch"] = "pass1:on"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} + } } // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index b9cdce5324159b1e6fe1068d2b397b5e00bc4a93..6a4e614a6b8c8db7a2c48fbb0bbf424c3c49a7f9 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -739,5 +739,41 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode) { (*custom_config->mutable_parameter_map())["compile_hybrid_mode"] = compile_hybrid_mode_value; EXPECT_EQ(s.ok(), false); } + +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_oo_level) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue oo_level = AttrValue(); + oo_level.set_s("O3"); + (*custom_config->mutable_parameter_map())["oo_level"] = oo_level; + + AttrValue jit_compile = AttrValue(); + jit_compile.set_s("2"); + (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + +TEST_F(NpuAttrTest, GetAllAttrOptions_optimization_switch) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue optimization_switch = AttrValue(); + optimization_switch.set_s("pass1:on"); + attr_map["_optimization_switch"] = optimization_switch; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("ge.optimizationSwitch"), all_options.cend()); +} } } // end tensorflow diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index 2ae110fd7a70a701f21fa2cc7446b757ad6caed9..c82d050baee560f5bafa048642e7eaabcd1fd9a2 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -131,6 +131,8 @@ void SetOptionNameMap(json &option_name_map) { option_name_map.emplace("ge.inputBatchCpy", "input_batch_cpy"); option_name_map.emplace(ge::OPTION_ALL_TENSOR_NOT_EMPTY, "all_tensor_not_empty"); option_name_map.emplace("ge.autoMultistreamParallelMode", "auto_multistream_parallel_mode"); + option_name_map.emplace("ge.oo.level", "oo_level"); + option_name_map.emplace("ge.optimizationSwitch", "optimization_switch"); } } // namespace @@ -311,6 +313,14 @@ void GePlugin::Init(std::map &init_options, const bool ADP_LOG(INFO) << "[GePlugin] input_batch_cpy : " << init_options["ge.inputBatchCpy"]; } + if (init_options.find("ge.oo.level") != init_options.end()) { + ADP_LOG(INFO) << "[GePlugin] oo_level : " << init_options["ge.oo.level"]; + } + + if (init_options.find("ge.optimizationSwitch") != init_options.end()) { + ADP_LOG(INFO) << "[GePlugin] optimization_switch : " << init_options["ge.optimizationSwitch"]; + } + bool tdt_uninit_env = false; (void) ReadBoolFromEnvVar("ASCEND_TDT_UNINIT", false, &tdt_uninit_env); if (!kIsHeterogeneous && !tdt_uninit_env) { diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 6ee28bafe327e9d92f0c28be527ee9f4766fdb79..67406594f34c6bc283889478d43b0d26395cbda4 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -516,6 +516,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string aicore_num; std::string all_tensor_not_empty; std::string auto_multistream_parallel_mode; + std::string oo_level; + std::string optimization_switch; const bool is_npu_optimizer_valid = (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()); if (is_npu_optimizer_valid) { (void) ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); @@ -595,6 +597,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void) ctx->GetAttr("_aicore_num", &aicore_num); (void) ctx->GetAttr("_all_tensor_not_empty", &all_tensor_not_empty); (void) ctx->GetAttr("_auto_multistream_parallel_mode", &auto_multistream_parallel_mode); + (void) ctx->GetAttr("_oo_level", &oo_level); + (void) ctx->GetAttr("_optimization_switch", &optimization_switch); } // session options @@ -669,9 +673,13 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options["all_tensor_not_empty"] = all_tensor_not_empty; sess_options["auto_multistream_parallel_mode"] = auto_multistream_parallel_mode; sess_options["ge.autoMultistreamParallelMode"] = auto_multistream_parallel_mode; - SetForbiddenClosePassOn(sess_options); sess_options["aicore_num"] = aicore_num; sess_options["ge.aicoreNum"] = aicore_num; + sess_options["ge.oo.level"] = oo_level; + sess_options["oo_level"] = oo_level; + sess_options["ge.optimizationSwitch"] = optimization_switch; + sess_options["optimization_switch"] = optimization_switch; + SetForbiddenClosePassOn(sess_options); return sess_options; } @@ -735,6 +743,8 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr std::string aicore_num; std::string oo_constant_folding; std::string input_batch_cpy; + std::string oo_level; + std::string optimization_switch; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_precision_mode", &precision_mode); @@ -779,6 +789,8 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr (void) ctx->GetAttr("_aicore_num", &aicore_num); (void) ctx->GetAttr("_oo_constant_folding", &oo_constant_folding); (void) ctx->GetAttr("_input_batch_cpy", &input_batch_cpy); + (void) ctx->GetAttr("_oo_level", &oo_level); + (void) ctx->GetAttr("_optimization_switch", &optimization_switch); } std::lock_guard lock(mutex_); @@ -847,6 +859,10 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr init_options_["input_batch_cpy"] = input_batch_cpy; init_options_["ge.inputBatchCpy"] = input_batch_cpy; init_options_["ge.inputPlacement"] = "DeviceHbm"; + init_options_["oo_level"] = oo_level; + init_options_["ge.oo.level"] = oo_level; + init_options_["optimization_switch"] = optimization_switch; + init_options_["ge.optimizationSwitch"] = optimization_switch; SetForbiddenClosePassOn(init_options_); return init_options_; } @@ -1286,6 +1302,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string all_tensor_not_empty; std::string auto_multistream_parallel_mode; std::string compile_hybrid_mode; + std::string oo_level; + std::string optimization_switch; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1389,6 +1407,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto all_tensor_not_empty_value = attrs.Find("_all_tensor_not_empty"); auto auto_multistream_parallel_mode_value = attrs.Find("_auto_multistream_parallel_mode"); auto compile_hybrid_mode_value = attrs.Find("_compile_hybrid_mode"); + auto oo_level_value = attrs.Find("_oo_level"); + auto optimization_switch_value = attrs.Find("_optimization_switch"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1718,6 +1738,12 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (auto_multistream_parallel_mode_value != nullptr) { auto_multistream_parallel_mode = auto_multistream_parallel_mode_value->s(); } + if (oo_level_value != nullptr) { + oo_level = oo_level_value->s(); + } + if (optimization_switch_value != nullptr) { + optimization_switch = optimization_switch_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1847,6 +1873,10 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["ge.inputBatchCpy"] = input_batch_cpy; all_options["shape_generalization_mode"] = shape_generalization_mode; all_options["compile_hybrid_mode"] = compile_hybrid_mode; + all_options["oo_level"] = oo_level; + all_options["ge.oo.level"] = oo_level; + all_options["optimization_switch"] = optimization_switch; + all_options["ge.optimizationSwitch"] = optimization_switch; return all_options; } @@ -1978,6 +2008,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options bool all_tensor_not_empty = false; std::string auto_multistream_parallel_mode; std::string compile_hybrid_mode; + std::string oo_level = "O3"; + std::string optimization_switch; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { if (custom_optimizer.name() == "NpuOptimizer") { @@ -2573,6 +2605,17 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["ge.inputBatchCpy"] = input_batch_cpy_str; init_options_["ge.inputPlacement"] = "DeviceHbm"; } + if (params.count("oo_level") > 0) { + oo_level = params.at("oo_level").s(); + init_options_["oo_level"] = oo_level; + init_options_["ge.oo.level"] = oo_level; + } + if (params.count("optimization_switch") > 0) { + optimization_switch = params.at("optimization_switch").s(); + init_options_["optimization_switch"] = optimization_switch; + init_options_["ge.optimizationSwitch"] = optimization_switch; + } + if (params.count("jit_compile") > 0) { const static std::vector kJitCompileList = {"true", "false", @@ -2671,6 +2714,11 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["auto_multistream_parallel_mode"] = auto_multistream_parallel_mode; sess_options["ge.autoMultistreamParallelMode"] = auto_multistream_parallel_mode; graph_options["compile_hybrid_mode"] = compile_hybrid_mode; + sess_options["oo_level"] = oo_level; + sess_options["ge.oo.level"] = oo_level; + sess_options["optimization_switch"] = optimization_switch; + sess_options["ge.optimizationSwitch"] = optimization_switch; + init_options_["profiling_mode"] = std::to_string(static_cast(profiling_mode)); init_options_[ge::OPTION_EXEC_PROFILING_MODE] = std::to_string(static_cast(profiling_mode)); init_options_["profiling_options"] = profiling_options; @@ -2792,6 +2840,10 @@ void NpuAttrs::LogOptions(const std::map &options) { // tf场景存在某些不可关闭pass,因此需要默认设置forbidden_close_pass为on,即开启这些不可关闭的pass void NpuAttrs::SetForbiddenClosePassOn(std::map &option) { - option["ge.optimizationSwitch"].append("forbidden_close_pass:on"); + if (option["ge.optimizationSwitch"].empty()) { + option["ge.optimizationSwitch"] = "forbidden_close_pass:on"; + } else { + option["ge.optimizationSwitch"].append(";forbidden_close_pass:on"); + } } } // namespace tensorflow diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 2671a05795a60610450629d031485f5d64104d9f..d1ed19793fb109242e08adfad8e0da82e53d018c 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -98,6 +98,8 @@ const std::map kGlobalConfigOptions = { {"oo_constant_folding", "ge.oo.constantFolding"}, {"input_batch_cpy", "ge.inputBatchCpy"}, {"shape_generalization_mode", "shape_generalization_mode"}, + {"oo_level", "ge.oo.level"}, + {"optimization_switch", "ge.optimizationSwitch"}, // private options {"_distribute.rank_id", ge::OPTION_EXEC_RANK_ID}, {"_distribute.rank_table", ge::OPTION_EXEC_RANK_TABLE_FILE}, diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index b18bf56d9ac7f425ce7e7de028f85b708312b028..da66c4eac6585145ad76082634db12f02159b631 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -86,5 +86,7 @@ class NpuConfig(NpuBaseConfig): self.shape_generalization_mode = OptionValue("STRICT", ["STRICT", "FULL", "ADAPTIVE"]) self.all_tensor_not_empty = OptionValue(False, [True, False]) self.auto_multistream_parallel_mode = OptionValue(None, ['cv']) + self.oo_level = OptionValue("O3", ["O0", "O1", "O2", "O3"]) + self.optimization_switch = OptionValue(None, None) super(NpuConfig, self).__init__()