From bce7b63dfcd1ac66846d03f0689e2a60efe62cf5 Mon Sep 17 00:00:00 2001 From: yangyongqiang Date: Tue, 18 Feb 2025 13:52:21 +0000 Subject: [PATCH] !2925 add option oo_constant_folding Merge pull request !2925 from yangyongqiang/feature_0211 --- tf_adapter/interface_spec/api_npu_config.pyh | 3 +- tf_adapter/interface_spec/api_npu_plugin.pyh | 2 +- .../npu_bridge/estimator/npu/npu_config.py | 5 ++- .../npu_bridge/estimator/npu/npu_estimator.py | 11 ++++++ .../npu_bridge/estimator/npu/npu_plugin.py | 6 +++- .../tests/st/util/testcase/ge_plugin_test.cc | 8 +++++ .../tests/st/util/testcase/npu_attrs_test.cc | 35 +++++++++++++++++++ .../tests/ut/util/testcase/ge_plugin_test.cc | 8 +++++ .../tests/ut/util/testcase/npu_attrs_test.cc | 35 +++++++++++++++++++ tf_adapter/util/ge_plugin.cc | 4 +++ tf_adapter/util/npu_attrs.cc | 23 ++++++++++++ .../npu_device/core/npu_wrapper.cpp | 1 + .../python/npu_device/configs/npu_config.py | 1 + .../tests/stub/include/stub/defines.h | 2 ++ 14 files changed, 140 insertions(+), 4 deletions(-) diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 892230504..492d996e9 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -22,7 +22,8 @@ class NPURunConfig(run_config_lib.RunConfig): event_sync_timeout=-1, external_weight=False, es_cluster_config=None, deterministic=0, frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None, ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, - execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None): + execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, + oo_constant_folding=True): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/interface_spec/api_npu_plugin.pyh b/tf_adapter/interface_spec/api_npu_plugin.pyh index 59d0ea0f8..a65a3272e 100644 --- a/tf_adapter/interface_spec/api_npu_plugin.pyh +++ b/tf_adapter/interface_spec/api_npu_plugin.pyh @@ -6,6 +6,6 @@ def npu_resource_init(graph_run_mode=1, op_debug_level=0, enable_profiling=False enable_exception_dump=2, aoe_mode=None, work_path=None, op_compiler_cache_mode=None, op_compiler_cache_dir=None, debug_dir=None, hcom_multi_mode=False, distribute_config=None, aoe_config_file=None, - precision_mode_v2=None, export_compile_stat=1, aicore_num=None): + precision_mode_v2=None, export_compile_stat=1, aicore_num=None, oo_constant_folding=True): def npu_resource_shutdown(): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index aa678d02a..b8607ff2c 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -118,7 +118,8 @@ class NPURunConfig(run_config_lib.RunConfig): execute_times=-1, graph_max_parallel_model_num=1, export_compile_stat=1, - aicore_num=None + aicore_num=None, + oo_constant_folding=True ): """ Constructs a NPUConfig. @@ -189,6 +190,7 @@ class NPURunConfig(run_config_lib.RunConfig): export_compile_stat: configure statistics of the graph compiler, 0: Not Generate; 1: Generated when the program aicore_num: default is: ''. exits (default); 2: Generated when graph compilation complete. + oo_constant_folding: The switch of constant folding, False: disable; True(default): enable. """ # Check iterations_per_loop. @@ -289,6 +291,7 @@ class NPURunConfig(run_config_lib.RunConfig): self.execute_times = execute_times self._export_compile_stat = export_compile_stat self._aicore_num = aicore_num + self._oo_constant_folding = oo_constant_folding super(NPURunConfig, self).__init__( model_dir=model_dir, tf_random_seed=tf_random_seed, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 738eba1ea..67bd1244a 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -729,6 +729,15 @@ class NPUEstimator(estimator_lib.Estimator): if config._aicore_num is not None: custom_op.parameter_map["aicore_num"].s = tf.compat.as_bytes(config._aicore_num) + def __oo_constant_folding(self, config, custom_op): + """Load oo_constant_folding config, and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + if config._oo_constant_folding is not None: + custom_op.parameter_map["oo_constant_folding"].b = config._oo_constant_folding + def __load_graph_optimizers(self, config): """ Change the session config and load the graph optimizers: @@ -871,6 +880,8 @@ class NPUEstimator(estimator_lib.Estimator): self.__load_aicore_num(config, custom_op) + self.__oo_constant_folding(config, custom_op) + return config def __load_job_info(self, job_start_file): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py index fa4e69f0d..bba68219b 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py @@ -74,7 +74,8 @@ def npu_resource_init(graph_run_mode=1, aoe_config_file=None, precision_mode_v2=None, export_compile_stat=1, - aicore_num=None): + aicore_num=None, + oo_constant_folding=True): """Initialize NPU resource""" util.check_nonnegative_integer(graph_run_mode, "graph_run_mode") check_graph_run_mode(graph_run_mode) @@ -124,6 +125,9 @@ def npu_resource_init(graph_run_mode=1, init["ge.exportCompileStat"] = str(export_compile_stat) if aicore_num is not None: init["ge.aicoreNum"] = str(aicore_num) + if oo_constant_folding is not None: + util.check_bool_type(oo_constant_folding, "oo_constant_folding") + init["ge.oo.constantFolding"] = "true" if oo_constant_folding is True else "false" init_options = tf_adapter.map_string_string(init) tf_adapter.PluginInit(init_options) diff --git a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc index 10f0f4085..144e0b9d8 100644 --- a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc @@ -213,5 +213,13 @@ TEST_F(GePluginTest, PluginInitTest_aicore_num) { ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); NpuClose(); } + +TEST_F(GePluginTest, PluginInitTest_oo_constant_folding) { + std::map init_options; + init_options["ge.oo.constantFolding"] = "true"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} } } // end tensorflow diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index dea78f7eb..408580c90 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -409,5 +409,40 @@ TEST_F(NpuAttrTest, GetAllAttrOptions_aicore_num) { EXPECT_NE(all_options.find("aicore_num"), all_options.cend()); } +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_oo_constant_folding) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue oo_constant_folding = AttrValue(); + oo_constant_folding.set_b(true); + (*custom_config->mutable_parameter_map())["oo_constant_folding"] = oo_constant_folding; + + AttrValue jit_compile = AttrValue(); + jit_compile.set_s("2"); + (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + +TEST_F(NpuAttrTest, GetAllAttrOptions_oo_constant_folding) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue oo_constant_folding = AttrValue(); + oo_constant_folding.set_s("true"); + attr_map["_oo_constant_folding"] = oo_constant_folding; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("oo_constant_folding"), all_options.cend()); +} } } // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc index f39fe3afc..2a54c6e05 100644 --- a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc @@ -203,5 +203,13 @@ TEST_F(GePluginTest, PluginInitTest_aicore_num) { ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); NpuClose(); } + +TEST_F(GePluginTest, PluginInitTest_oo_constant_folding) { + std::map init_options; + init_options["ge.oo.constantFolding"] = "true"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} } } // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index ede01ab83..58494d316 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -548,5 +548,40 @@ TEST_F(NpuAttrTest, GetAllAttrOptions_aicore_num) { EXPECT_NE(all_options.find("aicore_num"), all_options.cend()); } +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_oo_constant_folding) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue oo_constant_folding = AttrValue(); + oo_constant_folding.set_b(true); + (*custom_config->mutable_parameter_map())["oo_constant_folding"] = oo_constant_folding; + + AttrValue jit_compile = AttrValue(); + jit_compile.set_s("2"); + (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + +TEST_F(NpuAttrTest, GetAllAttrOptions_oo_constant_folding) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue oo_constant_folding = AttrValue(); + oo_constant_folding.set_s("true"); + attr_map["_oo_constant_folding"] = oo_constant_folding; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("oo_constant_folding"), all_options.cend()); +} } } // end tensorflow diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index 29dca4467..5c9525c98 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -299,6 +299,10 @@ void GePlugin::Init(std::map &init_options, const bool ADP_LOG(INFO) << "[GePlugin] aicoreNum : " << init_options["ge.aicoreNum"]; } + if (init_options.find("ge.oo.constantFolding") != init_options.end()) { + ADP_LOG(INFO) << "[GePlugin] oo_constant_folding : " << init_options["ge.oo.constantFolding"]; + } + bool tdt_uninit_env = false; (void) ReadBoolFromEnvVar("ASCEND_TDT_UNINIT", false, &tdt_uninit_env); if (!kIsHeterogeneous && !tdt_uninit_env) { diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index ad49fa7e5..f28fec892 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -690,6 +690,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr std::string execute_times = "-1"; std::string export_compile_stat; std::string aicore_num; + std::string oo_constant_folding; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_precision_mode", &precision_mode); @@ -733,6 +734,7 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr (void) ctx->GetAttr("_execute_times", &execute_times); (void) ctx->GetAttr("_export_compile_stat", &export_compile_stat); (void) ctx->GetAttr("_aicore_num", &aicore_num); + (void) ctx->GetAttr("_oo_constant_folding", &oo_constant_folding); } std::lock_guard lock(mutex_); @@ -793,6 +795,10 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr } init_options_["aicore_num"] = aicore_num; init_options_["ge.aicoreNum"] = aicore_num; + if (!oo_constant_folding.empty()) { + init_options_["oo_constant_folding"] = oo_constant_folding; + init_options_["ge.oo.constantFolding"] = oo_constant_folding; + } return init_options_; } @@ -1218,6 +1224,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string execute_times = "-1"; std::string export_compile_stat; std::string aicore_num; + std::string oo_constant_folding; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1316,6 +1323,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto execute_times_value = attrs.Find("_execute_times"); auto export_compile_stat_value = attrs.Find("_export_compile_stat"); auto aicore_num_value = attrs.Find("_aicore_num"); + auto oo_constant_folding_value = attrs.Find("_oo_constant_folding"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1631,6 +1639,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (aicore_num_value != nullptr) { aicore_num = aicore_num_value->s(); } + if (oo_constant_folding_value != nullptr) { + oo_constant_folding = oo_constant_folding_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1749,6 +1760,10 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & } all_options["aicore_num"] = aicore_num; all_options["ge.aicoreNum"] = aicore_num; + if (!oo_constant_folding.empty()) { + all_options["oo_constant_folding"] = oo_constant_folding; + all_options["ge.oo.constantFolding"] = oo_constant_folding; + } return all_options; } @@ -1874,6 +1889,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options int32_t execute_times = -1; int32_t export_compile_stat = 1; std::string aicore_num; + bool oo_constant_folding = true; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { @@ -2430,6 +2446,13 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["export_compile_stat"] = std::to_string(export_compile_stat); init_options_["ge.exportCompileStat"] = std::to_string(export_compile_stat); } + if ((params.count("oo_constant_folding") > 0) && + (params.at("oo_constant_folding").value_case() == AttrValue::ValueCase::kB)) { + oo_constant_folding = params.at("oo_constant_folding").b(); + const auto oo_constant_folding_str = oo_constant_folding ? "true" : "false"; + init_options_["oo_constant_folding"] = oo_constant_folding_str; + init_options_["ge.oo.constantFolding"] = oo_constant_folding_str; + } if ((params.count("aicore_num") > 0)) { aicore_num = params.at("aicore_num").s(); init_options_["aicore_num"] = aicore_num; diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index c6dd9bb34..038309e1b 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -95,6 +95,7 @@ const std::map kGlobalConfigOptions = { {"execute_times", "execute_times"}, {"export_compile_stat", "ge.exportCompileStat"}, {"aicore_num", "ge.aicoreNum"}, + {"oo_constant_folding", "ge.oo.constantFolding"}, // private options {"_distribute.rank_id", ge::OPTION_EXEC_RANK_ID}, {"_distribute.rank_table", ge::OPTION_EXEC_RANK_TABLE_FILE}, diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index cab34f04a..7e3b7fa7b 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -81,5 +81,6 @@ class NpuConfig(NpuBaseConfig): 'mixed_bfloat16', 'cube_hif8', 'mixed_hif8']) self.export_compile_stat = OptionValue(1, [0, 1, 2]) self.aicore_num = OptionValue(None, None) + self.oo_constant_folding = OptionValue(True, [True, False]) super(NpuConfig, self).__init__() diff --git a/tf_adapter_2.x/tests/stub/include/stub/defines.h b/tf_adapter_2.x/tests/stub/include/stub/defines.h index fe6676d70..51b8cfd7b 100644 --- a/tf_adapter_2.x/tests/stub/include/stub/defines.h +++ b/tf_adapter_2.x/tests/stub/include/stub/defines.h @@ -312,6 +312,8 @@ const char *const OPTION_EXPORT_COMPILE_STAT = "ge.exportCompileStat"; const char *const OPTION_AICORE_NUM = "ge.aicoreNum"; +const char *const OO_CONSTANT_FOLDING = "ge.oo.constantFolding"; + // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; // Topo sorting mode -- Gitee