From 47eec40d48e2c2fe4c32dc5d95c235a5433a9389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=A2=E6=AD=A3=E6=81=BA?= Date: Mon, 29 Sep 2025 10:06:42 +0800 Subject: [PATCH] all tensor not empty --- tf_adapter/interface_spec/api_npu_config.pyh | 2 +- .../npu_bridge/estimator/npu/npu_config.py | 5 ++- .../npu_bridge/estimator/npu/npu_estimator.py | 2 ++ .../tests/st/util/testcase/npu_attrs_test.cc | 36 +++++++++++++++++++ .../tests/ut/util/testcase/npu_attrs_test.cc | 36 +++++++++++++++++++ tf_adapter/util/ge_plugin.cc | 1 + tf_adapter/util/npu_attrs.cc | 18 +++++++++- .../npu_device/core/npu_wrapper.cpp | 3 +- .../python/npu_device/configs/npu_config.py | 1 + .../tests/stub/include/stub/defines.h | 1 + 10 files changed, 101 insertions(+), 4 deletions(-) diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 1263432bc..3d36a2245 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -23,7 +23,7 @@ class NPURunConfig(run_config_lib.RunConfig): frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None, ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, - oo_constant_folding=True, input_batch_cpy=False, shape_generalization_mode="STRICT"): + oo_constant_folding=True, input_batch_cpy=False, shape_generalization_mode="STRICT", all_tensor_not_empty=False): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index 8d5fe1739..a38214b55 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -119,7 +119,8 @@ class NPURunConfig(run_config_lib.RunConfig): aicore_num=None, oo_constant_folding=True, input_batch_cpy=False, - shape_generalization_mode="STRICT" + shape_generalization_mode="STRICT", + all_tensor_not_empty=False ): """ Constructs a NPUConfig. @@ -195,6 +196,7 @@ class NPURunConfig(run_config_lib.RunConfig): STRICT: default, use the input shape; FULL: full generalization; ADAPTIVE: generalizes the varying axes. + all_tensor_not_empty: default is: False. """ # Check iterations_per_loop. @@ -296,6 +298,7 @@ class NPURunConfig(run_config_lib.RunConfig): self._oo_constant_folding = oo_constant_folding self._input_batch_cpy = input_batch_cpy self._shape_generalization_mode = shape_generalization_mode + self._all_tensor_not_empty = all_tensor_not_empty super(NPURunConfig, self).__init__( model_dir=model_dir, tf_random_seed=tf_random_seed, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 3f1ffa863..52763a15a 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -851,6 +851,8 @@ class NPUEstimator(estimator_lib.Estimator): if config._shape_generalization_mode is not None: custom_op.parameter_map["shape_generalization_mode"].s = tf.compat.as_bytes( config._shape_generalization_mode) + if config._all_tensor_not_empty is not None: + custom_op.parameter_map["all_tensor_not_empty"].b = config._all_tensor_not_empty self.__load_session_device_id(config, custom_op) self.__load_modify_mixlist(config, custom_op) diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index 4f20143f4..fd47759ae 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -497,5 +497,41 @@ TEST_F(NpuAttrTest, GetAllAttrOptions_input_batch_cpy) { EXPECT_NE(all_options.find("ge.inputBatchCpy"), all_options.cend()); } +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_all_tensor_not_empty) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue all_tensor_not_empty = AttrValue(); + all_tensor_not_empty.set_b(true); + (*custom_config->mutable_parameter_map())["all_tensor_not_empty"] = all_tensor_not_empty; + + AttrValue jit_compile = AttrValue(); + jit_compile.set_s("2"); + (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + +TEST_F(NpuAttrTest, GetAllAttrOptions_all_tensor_not_empty) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue all_tensor_not_empty = AttrValue(); + all_tensor_not_empty.set_b(true); + attr_map["_all_tensor_not_empty"] = all_tensor_not_empty; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("ge.exec.allTensorNotEmpty"), all_options.cend()); +} + } } // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index 85ba473ef..006141158 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -635,5 +635,41 @@ TEST_F(NpuAttrTest, GetAllAttrOptions_input_batch_cpy) { const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); EXPECT_NE(all_options.find("ge.inputBatchCpy"), all_options.cend()); } + +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_all_tensor_not_empty) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue all_tensor_not_empty = AttrValue(); + all_tensor_not_empty.set_b(true); + (*custom_config->mutable_parameter_map())["all_tensor_not_empty"] = all_tensor_not_empty; + + AttrValue jit_compile = AttrValue(); + jit_compile.set_s("2"); + (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + +TEST_F(NpuAttrTest, GetAllAttrOptions_all_tensor_not_empty) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue all_tensor_not_empty = AttrValue(); + all_tensor_not_empty.set_b(true); + attr_map["_all_tensor_not_empty"] = all_tensor_not_empty; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("ge.exec.allTensorNotEmpty"), all_options.cend()); +} } } // end tensorflow diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index f11b008cf..62ced776a 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -129,6 +129,7 @@ void SetOptionNameMap(json &option_name_map) { option_name_map.emplace(ge::OPTION_EXEC_DYNAMIC_INPUT, "dynamic_input"); option_name_map.emplace(ge::AICORE_NUM, "aicore_num"); option_name_map.emplace("ge.inputBatchCpy", "input_batch_cpy"); + option_name_map.emplace(ge::OPTION_ALL_TENSOR_NOT_EMPTY, "all_tensor_not_empty"); } } // namespace diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 4741ecc56..77d9fbcf1 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -496,6 +496,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string input_batch_cpy; std::string jit_compile; std::string aicore_num; + std::string all_tensor_not_empty; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); (void) ctx->GetAttr("_hcom_parallel", &hcom_parallel); @@ -574,6 +575,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void) ctx->GetAttr("_graph_compiler_cache_dir", &graph_compiler_cache_dir); (void) ctx->GetAttr("_input_batch_cpy", &input_batch_cpy); (void) ctx->GetAttr("_aicore_num", &aicore_num); + (void) ctx->GetAttr("_all_tensor_not_empty", &all_tensor_not_empty); } // session options @@ -645,6 +647,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr } sess_options["ge.inputBatchCpy"] = input_batch_cpy; sess_options["input_batch_cpy"] = input_batch_cpy; + sess_options[ge::OPTION_ALL_TENSOR_NOT_EMPTY] = all_tensor_not_empty; + sess_options["all_tensor_not_empty"] = all_tensor_not_empty; SetForbiddenClosePassOn(sess_options); sess_options["aicore_num"] = aicore_num; sess_options["ge.aicoreNum"] = aicore_num; @@ -1258,6 +1262,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string oo_constant_folding; std::string input_batch_cpy; std::string shape_generalization_mode = "STRICT"; + std::string all_tensor_not_empty; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1358,6 +1363,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto oo_constant_folding_value = attrs.Find("_oo_constant_folding"); auto input_batch_cpy_value = attrs.Find("_input_batch_cpy"); auto shape_generalization_mode_value = attrs.Find("_shape_generalization_mode"); + auto all_tensor_not_empty_value = attrs.Find("_all_tensor_not_empty"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1678,6 +1684,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (shape_generalization_mode_value != nullptr) { shape_generalization_mode = shape_generalization_mode_value->s(); } + if (all_tensor_not_empty_value != nullptr) { + all_tensor_not_empty = all_tensor_not_empty_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1793,6 +1802,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & } all_options["aicore_num"] = aicore_num; all_options["ge.aicoreNum"] = aicore_num; + all_options[ge::OPTION_ALL_TENSOR_NOT_EMPTY] = all_tensor_not_empty; + all_options["all_tensor_not_empty"] = all_tensor_not_empty; if (!oo_constant_folding.empty()) { all_options["oo_constant_folding"] = oo_constant_folding; all_options["ge.oo.constantFolding"] = oo_constant_folding; @@ -1928,6 +1939,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options bool oo_constant_folding = true; bool input_batch_cpy = false; std::string shape_generalization_mode = "STRICT"; + bool all_tensor_not_empty = false; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { if (custom_optimizer.name() == "NpuOptimizer") { @@ -2496,6 +2508,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["aicore_num"] = aicore_num; init_options_["ge.aicoreNum"] = aicore_num; } + if (params.count("all_tensor_not_empty") > 0) { + all_tensor_not_empty = params.at("all_tensor_not_empty").b(); + } // input_batch_cpy if (params.count("input_batch_cpy") > 0) { input_batch_cpy = params.at("input_batch_cpy").b(); @@ -2595,7 +2610,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["ge.inputBatchCpy"] = std::to_string(input_batch_cpy); sess_options["aicore_num"] = aicore_num; sess_options["ge.aicoreNum"] = aicore_num; - + sess_options["all_tensor_not_empty"] = std::to_string(all_tensor_not_empty); + sess_options[ge::OPTION_ALL_TENSOR_NOT_EMPTY] = std::to_string(all_tensor_not_empty); init_options_["profiling_mode"] = std::to_string(static_cast(profiling_mode)); init_options_[ge::OPTION_EXEC_PROFILING_MODE] = std::to_string(static_cast(profiling_mode)); init_options_["profiling_options"] = profiling_options; diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 14536b87f..553569f0d 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -150,7 +150,8 @@ const std::map kSessionConfigOptions = { {"graph_compiler_cache_dir", "ge.graph_compiler_cache_dir"}, {"graph_slice", "ge.graphSliceMode"}, {"input_fusion_size", "ge.exec.input_fusion_size"}, - {"compile_dynamic_mode", "ge.compile_dynamic_mode"} + {"compile_dynamic_mode", "ge.compile_dynamic_mode"}, + {"all_tensor_not_empty", ge::OPTION_ALL_TENSOR_NOT_EMPTY} }; } // namespace diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 72a6c9f0e..3293e3c32 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -84,5 +84,6 @@ class NpuConfig(NpuBaseConfig): self.oo_constant_folding = OptionValue(True, [True, False]) self.input_batch_cpy = OptionValue(False, [True, False]) self.shape_generalization_mode = OptionValue("STRICT", ["STRICT", "FULL", "ADAPTIVE"]) + self.all_tensor_not_empty = OptionValue(False, [True, False]) super(NpuConfig, self).__init__() diff --git a/tf_adapter_2.x/tests/stub/include/stub/defines.h b/tf_adapter_2.x/tests/stub/include/stub/defines.h index 51b8cfd7b..45dc1da2a 100644 --- a/tf_adapter_2.x/tests/stub/include/stub/defines.h +++ b/tf_adapter_2.x/tests/stub/include/stub/defines.h @@ -65,6 +65,7 @@ const char *const OPTION_EXEC_LOGICAL_DEVICE_CLUSTER_DEPLOY_MODE = "ge.exec.logi const char *const OPTION_EXEC_LOGICAL_DEVICE_ID = "ge.exec.logicalDeviceId"; const char *const OPTION_EXEC_MODEL_DEPLOY_MODE = "ge.exec.modelDeployMode"; const char *const OPTION_EXEC_MODEL_DEPLOY_DEVICELIST = "ge.exec.modelDeployDevicelist"; +const char *const OPTION_ALL_TENSOR_NOT_EMPTY = "ge.exec.allTensorNotEmpty"; // Option key: memory init const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; -- Gitee