diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 3d36a2245917975cbbf36ea6e05c3a8c427e2f18..d8d87caf9dd465a9f1b309c9ac66cb59e046b95a 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -23,7 +23,8 @@ class NPURunConfig(run_config_lib.RunConfig): frozen_variable=False, variable_placement="Device", jit_compile="auto", precision_mode_v2=None, ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, - oo_constant_folding=True, input_batch_cpy=False, shape_generalization_mode="STRICT", all_tensor_not_empty=False): + oo_constant_folding=True, input_batch_cpy=False, shape_generalization_mode="STRICT", all_tensor_not_empty=False, + multi_stream_mode=None): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index a38214b557b8e8e8d3c91e480729456ef3757b0f..d0a93afd60ec7a9365f7042ba7b669497afeeb57 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -120,7 +120,8 @@ class NPURunConfig(run_config_lib.RunConfig): oo_constant_folding=True, input_batch_cpy=False, shape_generalization_mode="STRICT", - all_tensor_not_empty=False + all_tensor_not_empty=False, + multi_stream_mode=None ): """ Constructs a NPUConfig. @@ -197,6 +198,7 @@ class NPURunConfig(run_config_lib.RunConfig): FULL: full generalization; ADAPTIVE: generalizes the varying axes. all_tensor_not_empty: default is: False. + multi_stream_mode: default is None; CV: aic aiv parallel. """ # Check iterations_per_loop. @@ -299,6 +301,7 @@ class NPURunConfig(run_config_lib.RunConfig): self._input_batch_cpy = input_batch_cpy self._shape_generalization_mode = shape_generalization_mode self._all_tensor_not_empty = all_tensor_not_empty + self._multi_stream_mode = multi_stream_mode super(NPURunConfig, self).__init__( model_dir=model_dir, tf_random_seed=tf_random_seed, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 52763a15a213fbad5ccbc485e17c6d92ecb5e0b6..3dde5feb02a74fce821f9c425244d1b1968eadf8 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -853,6 +853,8 @@ class NPUEstimator(estimator_lib.Estimator): config._shape_generalization_mode) if config._all_tensor_not_empty is not None: custom_op.parameter_map["all_tensor_not_empty"].b = config._all_tensor_not_empty + if config._multi_stream_mode is not None: + custom_op.parameter_map["multi_stream_mode"].s = tf.compat.as_bytes(config._multi_stream_mode) self.__load_session_device_id(config, custom_op) self.__load_modify_mixlist(config, custom_op) diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index 62ced776af7d686be43f69242377ab2ac945148c..a2baa3a88bfdb9720e1010a23297d777aa9ee871 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -130,6 +130,7 @@ void SetOptionNameMap(json &option_name_map) { option_name_map.emplace(ge::AICORE_NUM, "aicore_num"); option_name_map.emplace("ge.inputBatchCpy", "input_batch_cpy"); option_name_map.emplace(ge::OPTION_ALL_TENSOR_NOT_EMPTY, "all_tensor_not_empty"); + option_name_map.emplace(ge::OPTION_MULTI_STREAM_MODE, "multi_stream_mode"); } } // namespace diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 77d9fbcf145cf9708341c1de095a907ee39355b7..162cddb1dfe62fa69eee3c7feeaa006de1c6f755 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -497,6 +497,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string jit_compile; std::string aicore_num; std::string all_tensor_not_empty; + std::string multi_stream_mode; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); (void) ctx->GetAttr("_hcom_parallel", &hcom_parallel); @@ -576,6 +577,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void) ctx->GetAttr("_input_batch_cpy", &input_batch_cpy); (void) ctx->GetAttr("_aicore_num", &aicore_num); (void) ctx->GetAttr("_all_tensor_not_empty", &all_tensor_not_empty); + (void) ctx->GetAttr("_multi_stream_mode", &multi_stream_mode); } // session options @@ -649,6 +651,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options["input_batch_cpy"] = input_batch_cpy; sess_options[ge::OPTION_ALL_TENSOR_NOT_EMPTY] = all_tensor_not_empty; sess_options["all_tensor_not_empty"] = all_tensor_not_empty; + sess_options["multi_stream_mode"] = multi_stream_mode; + sess_options[ge::OPTION_MULTI_STREAM_MODE] = multi_stream_mode; SetForbiddenClosePassOn(sess_options); sess_options["aicore_num"] = aicore_num; sess_options["ge.aicoreNum"] = aicore_num; @@ -1263,6 +1267,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string input_batch_cpy; std::string shape_generalization_mode = "STRICT"; std::string all_tensor_not_empty; + std::string multi_stream_mode; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1364,6 +1369,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto input_batch_cpy_value = attrs.Find("_input_batch_cpy"); auto shape_generalization_mode_value = attrs.Find("_shape_generalization_mode"); auto all_tensor_not_empty_value = attrs.Find("_all_tensor_not_empty"); + auto multi_stream_mode_value = attrs.Find("_multi_stream_mode"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1687,6 +1693,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (all_tensor_not_empty_value != nullptr) { all_tensor_not_empty = all_tensor_not_empty_value->s(); } + if (multi_stream_mode_value != nullptr) { + multi_stream_mode = multi_stream_mode_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1804,6 +1813,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["ge.aicoreNum"] = aicore_num; all_options[ge::OPTION_ALL_TENSOR_NOT_EMPTY] = all_tensor_not_empty; all_options["all_tensor_not_empty"] = all_tensor_not_empty; + all_options["multi_stream_mode"] = multi_stream_mode; + all_options[ge::OPTION_MULTI_STREAM_MODE] = multi_stream_mode; if (!oo_constant_folding.empty()) { all_options["oo_constant_folding"] = oo_constant_folding; all_options["ge.oo.constantFolding"] = oo_constant_folding; @@ -1940,6 +1951,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options bool input_batch_cpy = false; std::string shape_generalization_mode = "STRICT"; bool all_tensor_not_empty = false; + std::string multi_stream_mode; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { if (custom_optimizer.name() == "NpuOptimizer") { @@ -2511,6 +2523,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("all_tensor_not_empty") > 0) { all_tensor_not_empty = params.at("all_tensor_not_empty").b(); } + if (params.count("multi_stream_mode") > 0) { + multi_stream_mode = params.at("multi_stream_mode").s(); + } // input_batch_cpy if (params.count("input_batch_cpy") > 0) { input_batch_cpy = params.at("input_batch_cpy").b(); @@ -2612,6 +2627,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["ge.aicoreNum"] = aicore_num; sess_options["all_tensor_not_empty"] = std::to_string(all_tensor_not_empty); sess_options[ge::OPTION_ALL_TENSOR_NOT_EMPTY] = std::to_string(all_tensor_not_empty); + sess_options["multi_stream_mode"] = multi_stream_mode; + sess_options[ge::OPTION_MULTI_STREAM_MODE] = multi_stream_mode; init_options_["profiling_mode"] = std::to_string(static_cast(profiling_mode)); init_options_[ge::OPTION_EXEC_PROFILING_MODE] = std::to_string(static_cast(profiling_mode)); init_options_["profiling_options"] = profiling_options; diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 553569f0dd134abeabd3b50d26bb2bce77d94e8f..1964707c1058e62164bebc43ab55ecd18fa785cd 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -151,7 +151,8 @@ const std::map kSessionConfigOptions = { {"graph_slice", "ge.graphSliceMode"}, {"input_fusion_size", "ge.exec.input_fusion_size"}, {"compile_dynamic_mode", "ge.compile_dynamic_mode"}, - {"all_tensor_not_empty", ge::OPTION_ALL_TENSOR_NOT_EMPTY} + {"all_tensor_not_empty", ge::OPTION_ALL_TENSOR_NOT_EMPTY}, + {"multi_stream_mode", ge::OPTION_MULTI_STREAM_MODE} }; } // namespace diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 3293e3c32419120bea37ea200e610eec41d00978..341e6d70e5cfd679737f0dc05062271c61fea748 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -85,5 +85,6 @@ class NpuConfig(NpuBaseConfig): self.input_batch_cpy = OptionValue(False, [True, False]) self.shape_generalization_mode = OptionValue("STRICT", ["STRICT", "FULL", "ADAPTIVE"]) self.all_tensor_not_empty = OptionValue(False, [True, False]) + self.multi_stream_mode = OptionValue(None, ['CV']) super(NpuConfig, self).__init__() diff --git a/tf_adapter_2.x/tests/stub/include/stub/defines.h b/tf_adapter_2.x/tests/stub/include/stub/defines.h index 45dc1da2a165308d6ec69b165bba9a5b5f40ef23..bc158950e81459a753ab0d4e66873fc35a130b8f 100644 --- a/tf_adapter_2.x/tests/stub/include/stub/defines.h +++ b/tf_adapter_2.x/tests/stub/include/stub/defines.h @@ -66,6 +66,7 @@ const char *const OPTION_EXEC_LOGICAL_DEVICE_ID = "ge.exec.logicalDeviceId"; const char *const OPTION_EXEC_MODEL_DEPLOY_MODE = "ge.exec.modelDeployMode"; const char *const OPTION_EXEC_MODEL_DEPLOY_DEVICELIST = "ge.exec.modelDeployDevicelist"; const char *const OPTION_ALL_TENSOR_NOT_EMPTY = "ge.exec.allTensorNotEmpty"; +const char *const OPTION_MULTI_STREAM_MODE = "ge.multiStreamMode"; // Option key: memory init const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";