From 81dfa09b75a8792482f9bac9abd81384b7f2bb04 Mon Sep 17 00:00:00 2001 From: zhangfan Date: Tue, 5 Nov 2024 13:28:36 +0000 Subject: [PATCH 1/2] !2861 logic optimize Merge pull request !2861 from zhangfan/ge_dev --- tf_adapter/kernels/aicpu/host_queue_dataset_op.cc | 8 ++++++-- tf_adapter/kernels/geop_npu.cc | 7 +++++-- tf_adapter/optimizers/frozen_variable_pass.cc | 6 ++++-- tf_adapter/optimizers/mark_start_node_pass.cc | 7 +++++-- .../npu_device/core/optimizers/runtime/node_placer.cpp | 8 ++++---- 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/tf_adapter/kernels/aicpu/host_queue_dataset_op.cc b/tf_adapter/kernels/aicpu/host_queue_dataset_op.cc index 5d8bc7b14..f3440cd46 100644 --- a/tf_adapter/kernels/aicpu/host_queue_dataset_op.cc +++ b/tf_adapter/kernels/aicpu/host_queue_dataset_op.cc @@ -600,7 +600,9 @@ class HostQueueDatasetOp : public DatasetOpKernel { auto start = std::chrono::system_clock::now(); buffer_element.status = input_impls_[1]->GetNext(ctx.get(), &args, &end_of_sequence); auto end = std::chrono::system_clock::now(); - if ((!buffer_element.status.ok()) || (buffer_element.status.ok() && end_of_sequence)) { + bool is_finished = + ((!buffer_element.status.ok()) || (buffer_element.status.ok() && end_of_sequence)); + if (is_finished) { HandleGetNextStatus(buffer_element.status, end_of_sequence); mutex_lock lck(mu_); buffer_element.host_thread_finished = true; @@ -621,7 +623,9 @@ class HostQueueDatasetOp : public DatasetOpKernel { { mutex_lock lck(mu_); for (auto &tensor : args) { - if ((!is_string) && (from_npu_dataset != NPU_ALLOCATOR_NPU) && (tensor.dtype() == DT_STRING)) { + bool not_use_mem_pool = + ((!is_string) && (from_npu_dataset != NPU_ALLOCATOR_NPU) && (tensor.dtype() == DT_STRING)); + if (not_use_mem_pool) { ADP_LOG(INFO) << "Data type is string, do not use memory pool"; is_string = true; } diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index e92da4d66..a4ed173da 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -1596,16 +1596,19 @@ void GeOp::HandleDpOpAndGetNextNodes(Graph &graph) { std::vector remove_nodes; for (Node *node : graph.nodes()) { CHECK_NOT_NULL(node); + bool is_GetNext = (node->type_string() == "IteratorGetNext" || node->type_string() == "GetNext"); if (node->type_string() == "DPOP") { ProcessDpOpFuncDef(*node); - } else if (node->type_string() == "IteratorGetNext" || node->type_string() == "GetNext") { + } else if (is_GetNext) { Node *iterator_node = nullptr; std::string iterator_name; NodeDef &node_def = const_cast(node->def()); for (auto in_edge : node->in_edges()) { CHECK_NOT_NULL(in_edge); CHECK_NOT_NULL(in_edge->src()); - if (in_edge->src()->type_string() == "IteratorV2" || in_edge->src()->type_string() == "Iterator") { + bool isIterator = + (in_edge->src()->type_string() == "IteratorV2" || in_edge->src()->type_string() == "Iterator"); + if (isIterator) { iterator_name = in_edge->src()->name(); iterator_node = in_edge->src(); } diff --git a/tf_adapter/optimizers/frozen_variable_pass.cc b/tf_adapter/optimizers/frozen_variable_pass.cc index c0ef3bbc1..f63c7c460 100644 --- a/tf_adapter/optimizers/frozen_variable_pass.cc +++ b/tf_adapter/optimizers/frozen_variable_pass.cc @@ -152,7 +152,8 @@ Status FrozenVariablePass::DoConstantFolding(const GraphOptimizationPassOptions Status FrozenVariablePass::Run(const GraphOptimizationPassOptions &options) { ADP_LOG(INFO) << "FrozenVariablePass Run"; - if (options.graph == nullptr || options.session_options == nullptr) { + bool not_need_process = (options.graph == nullptr || options.session_options == nullptr); + if (not_need_process) { return Status::OK(); } @@ -176,7 +177,8 @@ Status FrozenVariablePass::Run(const GraphOptimizationPassOptions &options) { std::vector remove_nodes; bool generate_partitioned_call = false; for (Node *node : graph_in->op_nodes()) { - if ((node != nullptr) && (IsNeedBuildPartitionedCall(node))) { + bool need_build = ((node != nullptr) && (IsNeedBuildPartitionedCall(node))); + if (need_build) { std::vector cluster_nodes = {node}; std::vector cluster_out_nodes; for (auto out_node : node->out_nodes()) { diff --git a/tf_adapter/optimizers/mark_start_node_pass.cc b/tf_adapter/optimizers/mark_start_node_pass.cc index f2223ae6e..cdeaa2c94 100644 --- a/tf_adapter/optimizers/mark_start_node_pass.cc +++ b/tf_adapter/optimizers/mark_start_node_pass.cc @@ -59,13 +59,16 @@ class MarkStartNodePass : public GraphOptimizationPass { Status MarkStartNodePass::Run(const GraphOptimizationPassOptions &options) { int graph_num = graph_run_num++; - if (options.graph == nullptr || options.flib_def == nullptr || options.session_options == nullptr) { + bool not_need_process = + (options.graph == nullptr || options.flib_def == nullptr || options.session_options == nullptr); + if (not_need_process) { return Status::OK(); } std::map pass_options = NpuAttrs::GetPassOptions(options); std::string job = pass_options["job"]; - if (job == "ps" || job == "default" || job == "localhost") { + bool skip_flag = (job == "ps" || job == "default" || job == "localhost") ; + if (skip_flag) { ADP_LOG(INFO) << "job is " << job << " Skip the optimizer : MarkStartNodePass."; return Status::OK(); } diff --git a/tf_adapter_2.x/npu_device/core/optimizers/runtime/node_placer.cpp b/tf_adapter_2.x/npu_device/core/optimizers/runtime/node_placer.cpp index 5549135b7..805f86960 100644 --- a/tf_adapter_2.x/npu_device/core/optimizers/runtime/node_placer.cpp +++ b/tf_adapter_2.x/npu_device/core/optimizers/runtime/node_placer.cpp @@ -215,11 +215,11 @@ tensorflow::Status NodePlacer::BuildNpuOp() { if (edge->IsControlEdge()) { DLOG() << "Collect control output " << edge->src()->name() << " of cluster " << cluster->name; (void)control_outputs.insert(edge->dst()); - } else { - DLOG() << "Collect output edge " << edge->DebugString() << " of cluster " << cluster->name; - output_edges.push_back(edge); - output_types.emplace_back(EdgeDataType(*edge)); + continue; } + DLOG() << "Collect output edge " << edge->DebugString() << " of cluster " << cluster->name; + output_edges.push_back(edge); + output_types.emplace_back(EdgeDataType(*edge)); } } } -- Gitee From 10bd0b0a39076dc39387045fc030c5b678e50fe9 Mon Sep 17 00:00:00 2001 From: guopeian Date: Tue, 8 Oct 2024 11:21:51 +0800 Subject: [PATCH 2/2] =?UTF-8?q?hif8=20=EF=BC=88cherry=20picked=20commit=20?= =?UTF-8?q?from=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tf_adapter/util/npu_attrs.cc | 2 +- tf_adapter_2.x/python/npu_device/configs/npu_config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 1d619cd59..28111ff74 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -2022,7 +2022,7 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options if (params.count("precision_mode_v2") > 0) { precision_mode_v2 = params.at("precision_mode_v2").s(); const static std::vector kPrecisionModeV2List = {"fp16", "origin", "cube_fp16in_fp32out", - "mixed_float16", "mixed_bfloat16"}; + "mixed_float16", "mixed_bfloat16", "cube_hif8", "mixed_hif8"}; NPU_REQUIRES_OK(CheckValueAllowed(precision_mode_v2, kPrecisionModeV2List)); init_options_["precision_mode_v2"] = precision_mode_v2; init_options_["ge.exec.precision_mode_v2"] = precision_mode_v2; diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index 1371ec002..c7834a873 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -78,6 +78,6 @@ class NpuConfig(NpuBaseConfig): self.experimental = NpuExperimentalConfig() self.precision_mode_v2 = OptionValue(None, ['fp16', 'origin', 'cube_fp16in_fp32out', 'mixed_float16', - 'mixed_bfloat16']) + 'mixed_bfloat16', 'cube_hif8', 'mixed_hif8']) super(NpuConfig, self).__init__() -- Gitee