diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index ba4cfce10bfdce7ac0d59d74aee08f63debc37b6..17b3ab61da8471bb2595519663e34851966753d8 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -105,7 +105,8 @@ class NPURunConfig(run_config_lib.RunConfig): insert_op_file=None, stream_sync_timeout=-1, event_sync_timeout=-1, - external_weight=False + external_weight=False, + es_cluster_config=None ): """ Constructs a NPUConfig. @@ -148,6 +149,7 @@ class NPURunConfig(run_config_lib.RunConfig): work_path: Stores temporary files generated during optimization, default is current path. buffer_optimize: Whether to enable buffer optimization. enable_small_channel: Whether to enable small channel optimization. + deterministic: Whether to enable deterministic calculation. fusion_switch_file: Fusion switch configuration file path. enable_compress_weight: Whether to enable global weight compression. compress_weight_conf:Path and file name of the node list configuration file to be compressed. @@ -162,6 +164,7 @@ class NPURunConfig(run_config_lib.RunConfig): experimental_config: The experimental configuration. topo_sorting_mode: Provides an interface for users to customize topology sorting. external_weight: Whether convert const to fileconstant and save weight to file. + es_cluster_config: esClusterConfig from user input in embedding service. """ # Check iterations_per_loop. @@ -217,6 +220,7 @@ class NPURunConfig(run_config_lib.RunConfig): self._work_path = work_path self._buffer_optimize = buffer_optimize self._enable_small_channel = enable_small_channel + self._deterministic = deterministic self._fusion_switch_file = fusion_switch_file self._enable_compress_weight = enable_compress_weight self._compress_weight_conf = compress_weight_conf @@ -250,6 +254,7 @@ class NPURunConfig(run_config_lib.RunConfig): self.stream_sync_timeout = stream_sync_timeout self.event_sync_timeout = event_sync_timeout self._external_weight = external_weight + self.es_cluster_config = es_cluster_config super(NPURunConfig, self).__init__( model_dir=model_dir, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 024febd95fc49b7cabe04eec7846d879d3312831..14716c7b029e38c13179ca5bf44261a70b42d318 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -718,6 +718,7 @@ class NPUEstimator(estimator_lib.Estimator): if config._buffer_optimize is not None: custom_op.parameter_map["buffer_optimize"].s = tf.compat.as_bytes(config._buffer_optimize) custom_op.parameter_map["enable_small_channel"].i = config._enable_small_channel + custom_op.parameter_map["deterministic"].i = config._deterministic if config._fusion_switch_file is not None: custom_op.parameter_map["fusion_switch_file"].s = tf.compat.as_bytes(config._fusion_switch_file) custom_op.parameter_map["enable_compress_weight"].b = config._enable_compress_weight diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 03de204b732257d9da2e921d21aff0ca30f0c982..49855064fbb90bda5f831cd55f32e9cd86c4419d 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -378,6 +378,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string optypelist_for_implmode; std::string buffer_optimize = "l2_optimize"; std::string enable_small_channel = "0"; + std::string deterministic = "0"; std::string fusion_switch_file; std::string enable_compress_weight = "0"; std::string compress_weight_conf; @@ -441,6 +442,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void) ctx->GetAttr("_dynamic_dims", &dynamic_dims); (void) ctx->GetAttr("_buffer_optimize", &buffer_optimize); (void) ctx->GetAttr("_enable_small_channel", &enable_small_channel); + (void) ctx->GetAttr("_deterministic", &deterministic); (void) ctx->GetAttr("_fusion_switch_file", &fusion_switch_file); (void) ctx->GetAttr("_enable_compress_weight", &enable_compress_weight); (void) ctx->GetAttr("_compress_weight_conf", &compress_weight_conf); @@ -486,6 +488,7 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options["ge.dynamicDims"] = dynamic_dims; sess_options["ge.bufferOptimize"] = buffer_optimize; sess_options["ge.enableSmallChannel"] = enable_small_channel; + sess_options["ge.deterministic"] = deterministic; sess_options["ge.fusionSwitchFile"] = fusion_switch_file; sess_options["ge.enableCompressWeight"] = enable_compress_weight; sess_options["compress_weight_conf"] = compress_weight_conf; @@ -993,6 +996,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string distribute_config; std::string buffer_optimize = "l2_optimize"; std::string enable_small_channel = "0"; + std::string deterministic = "0"; std::string fusion_switch_file; std::string enable_compress_weight = "0"; std::string compress_weight_conf; @@ -1072,6 +1076,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto distribute_config_value = attrs.Find("_distribute_config"); auto buffer_optimize_value = attrs.Find("_buffer_optimize"); auto enable_small_channel_value = attrs.Find("_enable_small_channel"); + auto deterministic_value = attrs.Find("_deterministic"); auto fusion_switch_file_value = attrs.Find("_fusion_switch_file"); auto enable_compress_weight_value = attrs.Find("_enable_compress_weight"); auto compress_weight_conf_value = attrs.Find("_compress_weight_conf"); @@ -1264,6 +1269,9 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (enable_small_channel_value != nullptr) { enable_small_channel = enable_small_channel_value->s(); } + if (deterministic_value != nullptr) { + deterministic = deterministic_value->s(); + } if (fusion_switch_file_value != nullptr) { fusion_switch_file = fusion_switch_file_value->s(); } @@ -1422,6 +1430,7 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["distribute_config"] = distribute_config; all_options["buffer_optimize"] = buffer_optimize; all_options["enable_small_channel"] = enable_small_channel; + all_options["deterministic"] = deterministic; all_options["fusion_switch_file"] = fusion_switch_file; all_options["enable_compress_weight"] = enable_compress_weight; all_options["compress_weight_conf"] = compress_weight_conf; @@ -1531,6 +1540,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options std::string distribute_config; std::string buffer_optimize = "l2_optimize"; int64_t enable_small_channel = 0L; + int64_t deterministic = 0L; std::string fusion_switch_file; bool enable_compress_weight = false; std::string compress_weight_conf; @@ -1827,6 +1837,9 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (graph_run_mode == 0L) { enable_small_channel = 1L; } + if (params.count("deterministic") > 0) { + deterministic = params.at("deterministic").i(); + } if (params.count("fusion_switch_file") > 0) { fusion_switch_file = params.at("fusion_switch_file").s(); } @@ -1981,6 +1994,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["dynamic_node_type"] = std::to_string(dynamic_node_type); sess_options["buffer_optimize"] = buffer_optimize; sess_options["enable_small_channel"] = std::to_string(enable_small_channel); + sess_options["deterministic"] = std::to_string(deterministic); sess_options["fusion_switch_file"] = fusion_switch_file; sess_options["enable_compress_weight"] = std::to_string(static_cast(enable_compress_weight)); sess_options["compress_weight_conf"] = compress_weight_conf; diff --git a/tf_adapter/util/session_manager.cc b/tf_adapter/util/session_manager.cc index a9fb0481f4da09e83bf4ef3176a8805703277eee..17fe6d6e0ef7588d9dcd0143b011ded4257951f2 100644 --- a/tf_adapter/util/session_manager.cc +++ b/tf_adapter/util/session_manager.cc @@ -147,6 +147,8 @@ void SessionManager::PrintGeSessionOptions(std::map &s ADP_LOG(INFO) << "[GEOP] enable_small_channel :" << sess_options["ge.enableSmallChannel"]; + ADP_LOG(INFO) << "[GEOP] deterministic :" << sess_options["ge.deterministic"]; + ADP_LOG(INFO) << "[GEOP] fusion_switch_file :" << sess_options["ge.fusionSwitchFile"]; ADP_LOG(INFO) << "[GEOP] enable_compress_weight :" << sess_options["ge.enableCompressWeight"]; diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 9d50a7687d4529b076e0e44b4156823cb48f4cf9..dd2f7b60a32422735842d8f8db381adbb3a3f2af 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -92,6 +92,7 @@ const std::map kConfigurableOptions = { {"dynamic_node_type", ge::DYNAMIC_NODE_TYPE}, {"dynamic_dims", ge::kDynamicDims}, {"enable_small_channel", ge::ENABLE_SMALL_CHANNEL}, + {"deterministic", "ge.deterministic"}, {"graph_exec_timeout", "ge.exec.graphExecTimeout"}, {"logical_device_cluster_deploy_mode", ge::OPTION_EXEC_LOGICAL_DEVICE_CLUSTER_DEPLOY_MODE}, {"logical_device_id", ge::OPTION_EXEC_LOGICAL_DEVICE_ID}, diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 225fc546962c5ddc40c788c12f8aa0dbb8af0150..31f9aa902933b1b57bb0b338347c7152c9cd725d 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -53,6 +53,7 @@ class NpuConfig(NpuBaseConfig): self.aoe_config = NpuAoeConfig() self.profiling_config = NpuProfilingConfig() self.enable_small_channel = OptionValue(False, [True, False]) + self.deterministic = OptionValue(False, [True, False]) self.graph_exec_timeout = OptionValue(None, None) self.jit_compile = OptionValue(False, [True, False]) self.topo_sorting_mode = OptionValue(None, [0, 1, None])