diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 4bb2268e23d97b02b9d9673debd40382ab8dd0fe..185a2d9707f1bedcb30b1a98b81b392e785258ee 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -34,7 +34,7 @@ class DumpConfig(): dump_mode="output", enable_dump_debug=False, dump_debug_mode="all", dump_data="tensor", dump_layer=None): class DynamicInputConfig(): - def __init__(self, input_shape, dynamic_dims, dynamic_node_type): + def __init__(self, input_shape, dynamic_dims, dynamic_node_type, compile_hybrid_mode): class MemoryConfig(): def __init__(self, atomic_clean_policy=0, static_memory_policy=0, memory_optimization_policy=None, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index 4523153abe5a8ca45cf8e24149dac28510b2ae03..645b1849dd0b2892b270ae943c894d5444ef4e56 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -497,7 +497,7 @@ class NpuExecutePlacement(Enum): class DynamicInputConfig(): """dynamic dims and input shape config with npu support""" - def __init__(self, input_shape, dynamic_dims, dynamic_node_type): + def __init__(self, input_shape, dynamic_dims, dynamic_node_type, compile_hybrid_mode): """ Constructs a DynamicInputConfig. @@ -506,10 +506,12 @@ class DynamicInputConfig(): dynamic_dims: This parameter corresponds to input_shape. The dim value in dims corresponds to the parameter "-1" in input_shape. dynamic_node_type: Dataset or placeholder is dynamic input. type: 0 or 1. + compile_hybrid_mode: Dynamic gear hybrid compilation switch (0/1), currently placeholder-only. """ self._input_shape = input_shape self._dynamic_dims = dynamic_dims self._dynamic_node_type = dynamic_node_type + self._compile_hybrid_mode = compile_hybrid_mode def set_npu_default_config(func, args, kwargs): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 5b8f23af227751370b096ad334c4e797c4883c79..86bd6a2c7b4b5c6796ffc57cd98ad190b109567a 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -692,10 +692,12 @@ class NPUEstimator(estimator_lib.Estimator): if (config._dynamic_input_config is not None and config._dynamic_input_config._input_shape is not None and config._dynamic_input_config._dynamic_dims is not None and - config._dynamic_input_config._dynamic_node_type is not None): + config._dynamic_input_config._dynamic_node_type is not None and + config._dynamic_input_config._compile_hybrid_mode is not None): custom_op.parameter_map["input_shape"].s = tf.compat.as_bytes(config._dynamic_input_config._input_shape) custom_op.parameter_map["dynamic_dims"].s = tf.compat.as_bytes(config._dynamic_input_config._dynamic_dims) custom_op.parameter_map["dynamic_node_type"].i = config._dynamic_input_config._dynamic_node_type + custom_op.parameter_map["compile_hybrid_mode"].i = config._dynamic_input_config._compile_hybrid_mode def __load_mstune_config(self, config, custom_op): """Load mstune config ,and add to custom_optimizers diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index 9c60554e78f76930e6631e8e93ca162a3260ee4c..d3503e35ec8d7fe8f5440c77b731b98b77d3c690 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -568,6 +568,38 @@ TEST_F(NpuAttrTest, GetAllAttrOptions_auto_multistream_parallel_mode) { const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); EXPECT_NE(all_options.find("ge.autoMultistreamParallelMode"), all_options.cend()); } +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue input_shape_value = AttrValue(); + AttrValue dynamic_dims_value = AttrValue(); + AttrValue dynamic_node_type_value = AttrValue(); + AttrValue compile_hybrid_mode_value = AttrValue(); + input_shape_value.set_s("data:-1,1,30,1"); + dynamic_dims_value.set_s("50;30"); + dynamic_node_type_value.set_i(0); + compile_hybrid_mode_value.set_i(1); + + (*custom_config->mutable_parameter_map())["input_shape"] = input_shape_value; + (*custom_config->mutable_parameter_map())["dynamic_dims"] = dynamic_dims_value; + (*custom_config->mutable_parameter_map())["dynamic_node_type"] = dynamic_node_type_value; + (*custom_config->mutable_parameter_map())["compile_hybrid_mode"] = compile_hybrid_mode_value; + + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); + const int illegal_value = 2; + dynamic_node_type_value.set_i(1); + compile_hybrid_mode_value.set_i(illegal_value); + (*custom_config->mutable_parameter_map())["dynamic_node_type"] = dynamic_node_type_value; + (*custom_config->mutable_parameter_map())["compile_hybrid_mode"] = compile_hybrid_mode_value; + EXPECT_EQ(s.ok(), false); +} } } // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index 8ecc3801df1d7805bc6e0e7838726b38a408ce19..b9cdce5324159b1e6fe1068d2b397b5e00bc4a93 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -707,5 +707,37 @@ TEST_F(NpuAttrTest, GetAllAttrOptions_auto_multistream_parallel_mode) { const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); EXPECT_NE(all_options.find("ge.autoMultistreamParallelMode"), all_options.cend()); } +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue input_shape_value = AttrValue(); + AttrValue dynamic_dims_value = AttrValue(); + AttrValue dynamic_node_type_value = AttrValue(); + AttrValue compile_hybrid_mode_value = AttrValue(); + input_shape_value.set_s("data:-1,1,30,1"); + dynamic_dims_value.set_s("50;30"); + dynamic_node_type_value.set_i(0); + compile_hybrid_mode_value.set_i(1); + + (*custom_config->mutable_parameter_map())["input_shape"] = input_shape_value; + (*custom_config->mutable_parameter_map())["dynamic_dims"] = dynamic_dims_value; + (*custom_config->mutable_parameter_map())["dynamic_node_type"] = dynamic_node_type_value; + (*custom_config->mutable_parameter_map())["compile_hybrid_mode"] = compile_hybrid_mode_value; + + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); + const int illegal_value = 2; + dynamic_node_type_value.set_i(1); + compile_hybrid_mode_value.set_i(illegal_value); + (*custom_config->mutable_parameter_map())["dynamic_node_type"] = dynamic_node_type_value; + (*custom_config->mutable_parameter_map())["compile_hybrid_mode"] = compile_hybrid_mode_value; + EXPECT_EQ(s.ok(), false); +} } } // end tensorflow diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 7a2942ff59543a3c3b775369ecfd991ad53ffee4..76db019551237c7a2cc5aa28e09882d7772aa6a9 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -448,12 +448,19 @@ std::map NpuAttrs::GetGraphOptions(const OpKernelConst std::string input_shape; std::string dynamic_dims; std::string dynamic_node_type; + std::string compile_hybrid_mode; (void) ctx->GetAttr("_input_shape", &input_shape); (void) ctx->GetAttr("_dynamic_dims", &dynamic_dims); (void) ctx->GetAttr("_dynamic_node_type", &dynamic_node_type); + (void) ctx->GetAttr("_compile_hybrid_mode", &compile_hybrid_mode); graph_options["ge.inputShape"] = input_shape; graph_options["ge.dynamicDims"] = dynamic_dims; graph_options["ge.dynamicNodeType"] = dynamic_node_type; + graph_options["ge.compileHybridMode"] = compile_hybrid_mode; + ADP_LOG(INFO) << "[GEOP] ge.inputShape:" << graph_options["ge.inputShape"] + << ", ge.dynamicDims:" << graph_options["ge.dynamicDims"] + << ", ge.dynamicNodeType:" << graph_options["ge.dynamicNodeType"] + << ", ge.compileHybridMode:" << graph_options["ge.compileHybridMode"]; return graph_options; } @@ -495,7 +502,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string topo_sorting_mode; std::string insert_op_file; std::string resource_config_path; - std::string external_weight = "0"; + std::string external_weight; std::string graph_parallel_option_path; std::string enable_graph_parallel; std::string graph_compiler_cache_dir; @@ -1258,7 +1265,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string aoe_config_file; std::string stream_sync_timeout = "-1"; std::string event_sync_timeout = "-1"; - std::string external_weight = "0"; + std::string external_weight; std::string graph_parallel_option_path; std::string enable_graph_parallel; std::string graph_compiler_cache_dir; @@ -1273,6 +1280,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string shape_generalization_mode = "STRICT"; std::string all_tensor_not_empty; std::string auto_multistream_parallel_mode; + std::string compile_hybrid_mode; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1375,6 +1383,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto shape_generalization_mode_value = attrs.Find("_shape_generalization_mode"); auto all_tensor_not_empty_value = attrs.Find("_all_tensor_not_empty"); auto auto_multistream_parallel_mode_value = attrs.Find("_auto_multistream_parallel_mode"); + auto compile_hybrid_mode_value = attrs.Find("_compile_hybrid_mode"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1392,6 +1401,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (graph_max_parallel_model_num_value != nullptr) { graph_max_parallel_model_num = graph_max_parallel_model_num_value->s(); } + if (compile_hybrid_mode_value != nullptr) { + compile_hybrid_mode = compile_hybrid_mode_value->s(); + } if (lower_functional_ops_value != nullptr) { lower_functional_ops = lower_functional_ops_value->s(); } @@ -1828,6 +1840,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["input_batch_cpy"] = input_batch_cpy; all_options["ge.inputBatchCpy"] = input_batch_cpy; all_options["shape_generalization_mode"] = shape_generalization_mode; + all_options["compile_hybrid_mode"] = compile_hybrid_mode; return all_options; } @@ -1944,7 +1957,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options std::string graph_compiler_cache_dir; int32_t stream_sync_timeout = -1; int32_t event_sync_timeout = -1; - bool external_weight = false; + std::string external_weight; bool frozen_variable = false; std::string variable_location = "Device"; std::string graph_slice_mode; @@ -1958,6 +1971,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options std::string shape_generalization_mode = "STRICT"; bool all_tensor_not_empty = false; std::string auto_multistream_parallel_mode; + std::string compile_hybrid_mode; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { if (custom_optimizer.name() == "NpuOptimizer") { @@ -2273,6 +2287,19 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options ADP_LOG(FATAL) << "dynamic_node_type should be 0 or 1."; LOG(FATAL) << "dynamic_node_type should be 0 or 1."; } + if (params.count("compile_hybrid_mode") > 0) { + compile_hybrid_mode = std::to_string(params.at("compile_hybrid_mode").i()); + if (compile_hybrid_mode != "0" && compile_hybrid_mode != "1") { + ADP_LOG(ERROR) << "compile_hybrid_mode should be 0 or 1"; + LOG(ERROR) << "compile_hybrid_mode should be 0 or 1"; + return errors::Internal("compile_hybrid_mode should be 0 or 1"); + } + if (compile_hybrid_mode == "1" && dynamic_node_type != 1) { + ADP_LOG(ERROR) << "When compile_hybrid_mode set, dynamic_node_type should be 1."; + LOG(ERROR) << "When compile_hybrid_mode set, dynamic_node_type should be 1."; + return errors::Internal("When compile_hybrid_mode set, dynamic_node_type should be 1."); + } + } } else if (params.count("input_shape") == 0 && params.count("dynamic_dims") == 0 && params.count("dynamic_node_type") == 0) { // the three parameters are not set normally. @@ -2497,7 +2524,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options event_sync_timeout = params.at("event_sync_timeout").i(); } if (params.count("external_weight") > 0) { - external_weight = params.at("external_weight").b(); + external_weight = std::to_string(params.at("external_weight").b()); } if (params.count("frozen_variable") > 0) { frozen_variable = params.at("frozen_variable").b(); @@ -2619,8 +2646,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["ge.exec.memoryOptimizationPolicy"] = memory_optimization_policy; sess_options["variable_use_1g_huge_page"] = variable_use_1g_huge_page; sess_options["ge.variableUse1gHugePage"] = variable_use_1g_huge_page; - sess_options["external_weight"] = std::to_string(static_cast(external_weight)); - sess_options["ge.externalWeight"] = std::to_string(static_cast(external_weight)); + sess_options["external_weight"] = external_weight; + sess_options["ge.externalWeight"] = external_weight; sess_options["jit_compile"] = jit_compile; sess_options["ge.jit_compile"] = jit_compile; sess_options["input_fusion_size"] = std::to_string(input_fusion_size); @@ -2635,6 +2662,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options graph_options["dynamic_node_type"] = std::to_string(dynamic_node_type); sess_options["auto_multistream_parallel_mode"] = auto_multistream_parallel_mode; sess_options["ge.autoMultistreamParallelMode"] = auto_multistream_parallel_mode; + graph_options["compile_hybrid_mode"] = compile_hybrid_mode; init_options_["profiling_mode"] = std::to_string(static_cast(profiling_mode)); init_options_[ge::OPTION_EXEC_PROFILING_MODE] = std::to_string(static_cast(profiling_mode)); init_options_["profiling_options"] = profiling_options;