diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc
index 82c27e49f9a25c5c8b6da8ec6d02dec0f8cfaa36..bd66ca6a263804b61c6f92b127eeaba5a6f45d60 100644
--- a/tf_adapter/kernels/geop_npu.cc
+++ b/tf_adapter/kernels/geop_npu.cc
@@ -2125,8 +2125,11 @@ int GeOp::RunTuning(std::vector<Tensor> &input_vec, std::vector<ge::Tensor> &inp
   SetDynamicInput();
   graph_options_["ge.exec.overflow"] = "1";
   graph_options_["ge.graphLevelSat"] = (mix_compile_mode_ == "0") ? "1" : "0";
-
   // run aoe tuning
+  return ExecuteAoeTuning(ge_graph, is_allreduce, inputs);
+}
+
+int GeOp::ExecuteAoeTuning(ge::Graph &ge_graph, bool is_allreduce, std::vector<ge::Tensor> &inputs) {
   if ((init_options_["ge.jobType"] == "1") || (init_options_["ge.jobType"] == "2") ||
       ((init_options_["ge.jobType"] == "4") && is_allreduce)) {
     std::function<void()> callback = [this]() {
diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h
index 7fc55199ccf494e682c92d4f1c4dd112212c25b7..c0a0b8028174e20d32c6ec29079a291be91ed3b9 100644
--- a/tf_adapter/kernels/geop_npu.h
+++ b/tf_adapter/kernels/geop_npu.h
@@ -170,6 +170,8 @@ public:
 
   int RunTuning(std::vector<Tensor> &input_vec, std::vector<ge::Tensor> &inputs, const OpKernelContext *const ctx);
 
+  int ExecuteAoeTuning(ge::Graph &ge_graph, bool is_allreduce, std::vector<ge::Tensor> &inputs);
+
   std::string BuildSubGraph(FunctionLibraryDefinition *flib_def, const std::string &graph);
 
   void SetDynamicInput();
diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc
index 781677d43805a8bec4853802e97888fad451e551..6ee28bafe327e9d92f0c28be527ee9f4766fdb79 100644
--- a/tf_adapter/util/npu_attrs.cc
+++ b/tf_adapter/util/npu_attrs.cc
@@ -516,7 +516,8 @@ std::map<std::string, std::string> NpuAttrs::GetSessOptions(const OpKernelConstr
   std::string aicore_num;
   std::string all_tensor_not_empty;
   std::string auto_multistream_parallel_mode;
-  if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) {
+  const bool is_npu_optimizer_valid = (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK());
+  if (is_npu_optimizer_valid) {
     (void) ctx->GetAttr("_variable_format_optimize", &variable_format_optimize);
     (void) ctx->GetAttr("_hcom_parallel", &hcom_parallel);
     (void) ctx->GetAttr("_graph_memory_max_size", &graph_memory_max_size);
@@ -524,12 +525,13 @@ std::map<std::string, std::string> NpuAttrs::GetSessOptions(const OpKernelConstr
     (void) ctx->GetAttr("_enable_dump", &enable_dump);
     (void) ctx->GetAttr("_enable_dump_debug", &enable_dump_debug);
     (void) ctx->GetAttr("_input_fusion_size", &input_fusion_size);
-
-    if (enable_dump != "0" || enable_dump_debug != "0") {
+    const bool need_dump_path = enable_dump != "0" || enable_dump_debug != "0";
+    if (need_dump_path) {
       (void) ctx->GetAttr("_dump_path", &dump_path);
     }
     if (enable_dump != "0") {
-      if (ctx->GetAttr("_dump_step", &dump_step) == Status::OK() && !dump_step.empty()) {
+      const bool is_valid_dump_step = ctx->GetAttr("_dump_step", &dump_step) == Status::OK() && !dump_step.empty();
+      if (is_valid_dump_step) {
         Status s = checkDumpStep(dump_step);
         if (!s.ok()) {
           ADP_LOG(FATAL) << s.error_message();
@@ -622,8 +624,9 @@ std::map<std::string, std::string> NpuAttrs::GetSessOptions(const OpKernelConstr
   sess_options[ge::OPTYPELIST_FOR_IMPLMODE] = optypelist_for_implmode;
   sess_options[ge::GRAPH_MAX_PARALLEL_MODEL_NUM] = graph_max_parallel_model_num;
   // 如果compile_dynamic_mode为0, jit_compile=1, shape_generalization_mode!=STRICT, 需要将ge.compile_dynamic_mode设置为1
-  if ((compile_dynamic_mode == "0" && jit_compile != "1") ||
-      (compile_dynamic_mode == "0" && jit_compile == "1" && shape_generalization_mode != "STRICT")) {
+  const bool need_set_1 = (compile_dynamic_mode == "0" && jit_compile != "1") ||
+                          (compile_dynamic_mode == "0" && jit_compile == "1" && shape_generalization_mode != "STRICT");
+  if (need_set_1) {
     sess_options["ge.compile_dynamic_mode"] = "1";
   } else {
     sess_options["ge.compile_dynamic_mode"] = compile_dynamic_mode;
diff --git a/tf_adapter_2.x/npu_device/core/optimizers/runtime/node_placer.cpp b/tf_adapter_2.x/npu_device/core/optimizers/runtime/node_placer.cpp
index 805f86960848bc0fd18993dc1c35ec2ff685d71e..a63aa3e842922edabf2ca5dcc6e1cceb22fb5d69 100644
--- a/tf_adapter_2.x/npu_device/core/optimizers/runtime/node_placer.cpp
+++ b/tf_adapter_2.x/npu_device/core/optimizers/runtime/node_placer.cpp
@@ -438,10 +438,12 @@ tensorflow::Status NodePlacer::BuildConcreteCluster() {
     if (IsNodePlacedOn(node, Placement::CPU)) {
       continue;
     }
-    if (std::any_of(node->out_edges().begin(), node->out_edges().end(),
-                    [this](const tensorflow::Edge *edge) { return !IsSupportedNpuBound(*edge); }) &&
-        std::all_of(node->in_edges().begin(), node->in_edges().end(),
-                    [this](const tensorflow::Edge *edge) { return IsSupportedNpuBound(*edge); })) {
+    const bool is_start_node =
+      std::any_of(node->out_edges().begin(), node->out_edges().end(),
+                  [this](const tensorflow::Edge *edge) { return !IsSupportedNpuBound(*edge); }) &&
+      std::all_of(node->in_edges().begin(), node->in_edges().end(),
+                  [this](const tensorflow::Edge *edge) { return IsSupportedNpuBound(*edge); });
+    if (is_start_node) {
       DLOG() << "Need concrete for start node " << node->name();
       starts.push_back(node);
     }
@@ -468,7 +470,8 @@ tensorflow::Status NodePlacer::BuildConcreteCluster() {
     std::queue<std::shared_ptr<Cluster>> q;
     for (auto &node : cluster->nodes) {
       auto iter = concrete_clusters_.find(node);
-      if ((iter != concrete_clusters_.end()) && (iter->second != cluster)) {
+      const bool need_push = (iter != concrete_clusters_.end()) && (iter->second != cluster);
+      if (need_push) {
         q.push(iter->second);
       }
     }
@@ -483,7 +486,8 @@ tensorflow::Status NodePlacer::BuildConcreteCluster() {
       for (auto &node : path_cluster->nodes) {
         (void)cluster->Merge(node);
         auto iter = concrete_clusters_.find(node);
-        if (iter != concrete_clusters_.end() && iter->second != path_cluster) {
+        const bool need_push = iter != concrete_clusters_.end() && iter->second != path_cluster;
+        if (need_push) {
           q.push(iter->second);
         }
       }
diff --git a/tf_adapter_2.x/npu_device/core/optimizers/runtime/npu_trans_resource_input_to_node_optimizer.cpp b/tf_adapter_2.x/npu_device/core/optimizers/runtime/npu_trans_resource_input_to_node_optimizer.cpp
index 6cb0a2bc50730936039a863ad82f596033690ea1..6b6c6eaa2a5e6f7044170c3d08664c3c65755e61 100644
--- a/tf_adapter_2.x/npu_device/core/optimizers/runtime/npu_trans_resource_input_to_node_optimizer.cpp
+++ b/tf_adapter_2.x/npu_device/core/optimizers/runtime/npu_trans_resource_input_to_node_optimizer.cpp
@@ -289,6 +289,26 @@ tensorflow::Status TransResourceInput2Node(TFE_Context *context, tensorflow::Gra
 
   return tensorflow::Status::OK();
 }
+
+void GetRetvalNodes(tensorflow::Graph *mutable_graph, std::map<int32_t, tensorflow::Node *> &indexed_retvals) {
+  for (auto node : mutable_graph->op_nodes()) {
+    if (!node->IsRetval()) {
+      continue;
+    }
+    indexed_retvals[node->attrs().Find("index")->i()] = node;
+  }
+}
+
+void GetConsumeInAndProduceOut(npu::NpuMutableConcreteGraph *graph, std::set<int32_t> &consumed_inputs,
+                               std::set<int32_t> &produced_outputs) {
+  for (auto node : graph->Graph()->nodes()) {
+    if (node->IsArg()) {
+      (void)consumed_inputs.insert(node->attrs().Find("index")->i());
+    } else if (node->IsRetval()) {
+      (void)produced_outputs.insert(node->attrs().Find("index")->i());
+    }
+  }
+}
 }  // namespace
 
 namespace npu {
@@ -308,12 +328,7 @@ tensorflow::Status TransResourceInput2NodeOptimize(TFE_Context *context, NpuMuta
 
   std::map<int32_t, int64_t> bypass_outputs;
   std::map<int32_t, tensorflow::Node *> indexed_retvals;
-  for (auto node : mutable_graph->op_nodes()) {
-    if (!node->IsRetval()) {
-      continue;
-    }
-    indexed_retvals[node->attrs().Find("index")->i()] = node;
-  }
+  GetRetvalNodes(mutable_graph, indexed_retvals);
 
   for (auto item : indexed_retvals) {
     const tensorflow::Edge *edge;
@@ -394,13 +409,7 @@ tensorflow::Status TransResourceInput2NodeOptimize(TFE_Context *context, NpuMuta
 
   std::set<int32_t> consumed_inputs;
   std::set<int32_t> produced_outputs;
-  for (auto node : graph->Graph()->nodes()) {
-    if (node->IsArg()) {
-      (void)consumed_inputs.insert(node->attrs().Find("index")->i());
-    } else if (node->IsRetval()) {
-      (void)produced_outputs.insert(node->attrs().Find("index")->i());
-    }
-  }
+  GetConsumeInAndProduceOut(graph, consumed_inputs, produced_outputs);
 
   FixGraphArgRetvalIndex(*mutable_graph);